Newsgroups: php.internals Path: news.php.net Xref: news.php.net php.internals:33511 Return-Path: Mailing-List: contact internals-help@lists.php.net; run by ezmlm Delivered-To: mailing list internals@lists.php.net Received: (qmail 44805 invoked by uid 1010); 29 Nov 2007 19:21:09 -0000 Delivered-To: ezmlm-scan-internals@lists.php.net Delivered-To: ezmlm-internals@lists.php.net Received: (qmail 44790 invoked from network); 29 Nov 2007 19:21:09 -0000 Received: from unknown (HELO lists.php.net) (127.0.0.1) by localhost with SMTP; 29 Nov 2007 19:21:09 -0000 X-Host-Fingerprint: 216.145.49.21 snv-global3.corp.yahoo.com Received: from [216.145.49.21] ([216.145.49.21:27533] helo=localhost.localdomain) by pb1.pair.com (ecelerity 2.1.1.9-wez r(12769M)) with ESMTP id 28/76-32949-5211F474 for ; Thu, 29 Nov 2007 14:21:09 -0500 Message-ID: <28.76.32949.5211F474@pb1.pair.com> To: internals@lists.php.net Date: Thu, 29 Nov 2007 11:21:06 -0800 User-Agent: Thunderbird 2.0.0.9 (Windows/20071031) MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit X-Posted-By: 216.145.49.21 Subject: Proposed feature for json_encode() From: pollita@php.net (Sara Golemon) While it's technically "safe" to include user supplied data in json_encode() serialized values. The fact that characters such as <>&' remain as is means there room for some as-yet unidentified problem either in the browser's rendering or (more likely) elsewhere in one's codebase for this data to get into the wrong context and be executed. To that end, the attached patch allows the caller to be paranoid about their data and stipulate that <>&' should be encoded to hex references instead. This doesn't stop a web developer from dropping that content into an innerHTML of course, but it's one more rope holding the ship together. Obviously, since this adds five characters per pedantically escaped character, it's not something you'd want on by default, so the normal behavior would be to leave them alone. echo json_encode(""); "" echo json_encode("", JSON_HEX_TAG); "\u003Cfoo\u003E" echo json_encode("", JSON_HEX_TAG | JSON_HEX_APOS); "\u003Cfoo bar=\u0027baz\u0027\u003E" If noone objects, I'll commit this in a week along with an MFH for 5.3 -Sara Index: json.c =================================================================== RCS file: /repository/pecl/json/json.c,v retrieving revision 1.31 diff -u -p -r1.31 json.c --- json.c 1 Oct 2007 15:25:01 -0000 1.31 +++ json.c 29 Nov 2007 19:01:34 -0000 @@ -32,6 +32,10 @@ static const char digits[] = "0123456789abcdef"; +#define PHP_JSON_HEX_TAG (1<<0) +#define PHP_JSON_HEX_AMP (1<<1) +#define PHP_JSON_HEX_APOS (1<<2) + /* {{{ json_functions[] * * Every user visible function must have an entry in json_functions[]. @@ -43,6 +47,18 @@ const function_entry json_functions[] = }; /* }}} */ +/* {{{ MINIT */ +static PHP_MINIT_FUNCTION(json) +{ + REGISTER_LONG_CONSTANT("JSON_HEX_TAG", PHP_JSON_HEX_TAG, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("JSON_HEX_AMP", PHP_JSON_HEX_AMP, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("JSON_HEX_APOS", PHP_JSON_HEX_APOS, CONST_CS | CONST_PERSISTENT); + + return SUCCESS; +} +/* }}} */ + + /* {{{ json_module_entry */ zend_module_entry json_module_entry = { @@ -51,7 +67,7 @@ zend_module_entry json_module_entry = { #endif "json", json_functions, - NULL, + PHP_MINIT(json), NULL, NULL, NULL, @@ -78,8 +94,8 @@ PHP_MINFO_FUNCTION(json) } /* }}} */ -static void json_encode_r(smart_str *buf, zval *val TSRMLS_DC); -static void json_escape_string(smart_str *buf, zstr s, int len, zend_uchar type); +static void json_encode_r(smart_str *buf, zval *val, int options TSRMLS_DC); +static void json_escape_string(smart_str *buf, zstr s, int len, zend_uchar type, int options); static int json_determine_array_type(zval **val TSRMLS_DC) /* {{{ */ { @@ -115,7 +131,7 @@ static int json_determine_array_type(zva } /* }}} */ -static void json_encode_array(smart_str *buf, zval **val TSRMLS_DC) /* {{{ */ +static void json_encode_array(smart_str *buf, zval **val, int options TSRMLS_DC) /* {{{ */ { int i, r; HashTable *myht; @@ -172,7 +188,7 @@ static void json_encode_array(smart_str need_comma = 1; } - json_encode_r(buf, *data TSRMLS_CC); + json_encode_r(buf, *data, options TSRMLS_CC); } else if (r == 1) { if (i == HASH_KEY_IS_STRING || i == HASH_KEY_IS_UNICODE) { @@ -187,10 +203,10 @@ static void json_encode_array(smart_str need_comma = 1; } - json_escape_string(buf, key, key_len - 1, (i==HASH_KEY_IS_UNICODE)?IS_UNICODE:IS_STRING); + json_escape_string(buf, key, key_len - 1, (i==HASH_KEY_IS_UNICODE)?IS_UNICODE:IS_STRING, options); smart_str_appendc(buf, ':'); - json_encode_r(buf, *data TSRMLS_CC); + json_encode_r(buf, *data, options TSRMLS_CC); } else { if (need_comma) { smart_str_appendc(buf, ','); @@ -203,7 +219,7 @@ static void json_encode_array(smart_str smart_str_appendc(buf, '"'); smart_str_appendc(buf, ':'); - json_encode_r(buf, *data TSRMLS_CC); + json_encode_r(buf, *data, options TSRMLS_CC); } } @@ -227,7 +243,7 @@ static void json_encode_array(smart_str #define REVERSE16(us) (((us & 0xf) << 12) | (((us >> 4) & 0xf) << 8) | (((us >> 8) & 0xf) << 4) | ((us >> 12) & 0xf)) -static void json_escape_string(smart_str *buf, zstr s, int len, zend_uchar type) /* {{{ */ +static void json_escape_string(smart_str *buf, zstr s, int len, zend_uchar type, int options) /* {{{ */ { int pos = 0; unsigned short us; @@ -305,6 +321,42 @@ static void json_escape_string(smart_str smart_str_appendl(buf, "\\t", 2); } break; + case '<': + { + if (options & PHP_JSON_HEX_TAG) { + smart_str_appendl(buf, "\\u003C", 6); + } else { + smary_str_appendc(buf, '<'); + } + } + break; + case '>': + { + if (options & PHP_JSON_HEX_TAG) { + smart_str_appendl(buf, "\\u003E", 6); + } else { + smary_str_appendc(buf, '>'); + } + } + break; + case '&': + { + if (options & PHP_JSON_HEX_AMP) { + smart_str_appendl(buf, "\\u0026", 6); + } else { + smary_str_appendc(buf, '&'); + } + } + break; + case '\'': + { + if (options & PHP_JSON_HEX_APOS) { + smart_str_appendl(buf, "\\u0027", 6); + } else { + smary_str_appendc(buf, '\''); + } + } + break; default: { if (us >= ' ' && (us & 127) == us) @@ -337,7 +389,7 @@ static void json_escape_string(smart_str } /* }}} */ -static void json_encode_r(smart_str *buf, zval *val TSRMLS_DC) /* {{{ */ +static void json_encode_r(smart_str *buf, zval *val, int options TSRMLS_DC) /* {{{ */ { switch (Z_TYPE_P(val)) { case IS_NULL: @@ -374,11 +426,11 @@ static void json_encode_r(smart_str *buf break; case IS_STRING: case IS_UNICODE: - json_escape_string(buf, Z_UNIVAL_P(val), Z_UNILEN_P(val), Z_TYPE_P(val)); + json_escape_string(buf, Z_UNIVAL_P(val), Z_UNILEN_P(val), Z_TYPE_P(val), options); break; case IS_ARRAY: case IS_OBJECT: - json_encode_array(buf, &val TSRMLS_CC); + json_encode_array(buf, &val, options TSRMLS_CC); break; default: zend_error(E_WARNING, "[json] (json_encode_r) type is unsupported, encoded as null."); @@ -396,12 +448,13 @@ PHP_FUNCTION(json_encode) { zval *parameter; smart_str buf = {0}; + long options = 0; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", ¶meter) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z|l", ¶meter, &options) == FAILURE) { return; } - json_encode_r(&buf, parameter TSRMLS_CC); + json_encode_r(&buf, parameter, options TSRMLS_CC); /* * Return as binary string, since the result is 99% likely to be just