Newsgroups: php.internals Path: news.php.net Xref: news.php.net php.internals:26756 Return-Path: Mailing-List: contact internals-help@lists.php.net; run by ezmlm Delivered-To: mailing list internals@lists.php.net Received: (qmail 18556 invoked by uid 1010); 1 Dec 2006 21:44:10 -0000 Delivered-To: ezmlm-scan-internals@lists.php.net Delivered-To: ezmlm-internals@lists.php.net Received: (qmail 18541 invoked from network); 1 Dec 2006 21:44:10 -0000 Received: from unknown (HELO lists.php.net) (127.0.0.1) by localhost with SMTP; 1 Dec 2006 21:44:10 -0000 Authentication-Results: pb1.pair.com header.from=andrei@gravitonic.com; sender-id=unknown Authentication-Results: pb1.pair.com smtp.mail=andrei@gravitonic.com; spf=permerror; sender-id=unknown Received-SPF: error (pb1.pair.com: domain gravitonic.com from 204.11.219.139 cause and error) X-PHP-List-Original-Sender: andrei@gravitonic.com X-Host-Fingerprint: 204.11.219.139 lerdorf.com Linux 2.5 (sometimes 2.4) (4) Received: from [204.11.219.139] ([204.11.219.139:45740] helo=lerdorf.com) by pb1.pair.com (ecelerity 2.1.1.9-wez r(12769M)) with ESMTP id 21/6C-40958-602A0754 for ; Fri, 01 Dec 2006 16:44:09 -0500 Received: from [66.228.175.145] (borndress-lm.corp.yahoo.com [66.228.175.145]) (authenticated bits=0) by lerdorf.com (8.13.8/8.13.8/Debian-2) with ESMTP id kB1LhVn2020571 for ; Fri, 1 Dec 2006 13:43:32 -0800 Mime-Version: 1.0 (Apple Message framework v624) To: php-dev Internals Message-ID: <0c60b01d19ec7e242ade31e29120ee27@gravitonic.com> Content-Type: multipart/mixed; boundary=Apple-Mail-2--128864400 Date: Fri, 1 Dec 2006 13:43:32 -0800 X-Mailer: Apple Mail (2.624) Subject: PHP 5.2: Serialization patch From: andrei@gravitonic.com (Andrei Zmievski) --Apple-Mail-2--128864400 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset=US-ASCII; format=flowed When serializing binary strings in PHP 6, we have to escape non-ASCII characters and then unescape them on unserialization. This patch adds the unescapement support to PHP 5.2, in order to make it easier to exchange data between PHP 5 and 6. If no one has objections, I will commit soon. -Andrei --Apple-Mail-2--128864400 Content-Transfer-Encoding: 7bit Content-Type: text/plain; x-unix-mode=0644; name="php52_serialization.diff.txt" Content-Disposition: attachment; filename=php52_serialization.diff.txt Index: ext/standard/var_unserializer.c =================================================================== RCS file: /repository/php-src/ext/standard/var_unserializer.c,v retrieving revision 1.70.2.4 diff -u -r1.70.2.4 var_unserializer.c --- ext/standard/var_unserializer.c 1 Jan 2006 12:50:16 -0000 1.70.2.4 +++ ext/standard/var_unserializer.c 1 Dec 2006 21:39:39 -0000 @@ -1,4 +1,4 @@ -/* Generated by re2c 0.9.11 on Sun Jan 1 14:39:32 2006 */ +/* Generated by re2c 0.9.12 on Fri Dec 1 13:39:29 2006 */ #line 1 "ext/standard/var_unserializer.re" /* +----------------------------------------------------------------------+ @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: var_unserializer.c,v 1.70.2.4 2006/01/01 12:50:16 sniper Exp $ */ +/* $Id: var_unserializer.re,v 1.52.2.2 2006/01/01 12:26:08 sniper Exp $ */ #include "php.h" #include "ext/standard/php_var.h" @@ -140,6 +140,38 @@ /* }}} */ +static char *unserialize_str(const unsigned char **p, int len) +{ + int i, j; + char *str = emalloc(len+1); + + for (i = 0; i < len; i++) { + if (**p != '\\') { + str[i] = (char)**p; + } else { + unsigned char ch = 0; + + for (j = 0; j < 2; j++) { + (*p)++; + if (**p >= '0' && **p <= '9') { + ch = (ch << 4) + (**p -'0'); + } else if (**p >= 'a' && **p <= 'f') { + ch = (ch << 4) + (**p -'a'+10); + } else if (**p >= 'A' && **p <= 'F') { + ch = (ch << 4) + (**p -'A'+10); + } else { + efree(str); + return NULL; + } + } + str[i] = (char)ch; + } + (*p)++; + } + str[i] = 0; + return str; +} + #define YYFILL(n) do { } while (0) #define YYCTYPE unsigned char #define YYCURSOR cursor @@ -147,7 +179,7 @@ #define YYMARKER marker -#line 155 "ext/standard/var_unserializer.re" +#line 187 "ext/standard/var_unserializer.re" @@ -390,7 +422,7 @@ 0, 0, 0, 0, 0, 0, 0, 0, }; -#line 394 "ext/standard/var_unserializer.c" +#line 426 "ext/standard/var_unserializer.c" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -418,9 +450,9 @@ if(yych == ':') goto yy87; goto yy3; yy3: -#line 626 "ext/standard/var_unserializer.re" +#line 659 "ext/standard/var_unserializer.re" { return 0; } -#line 424 "ext/standard/var_unserializer.c" +#line 456 "ext/standard/var_unserializer.c" yy4: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); if(yych == ':') goto yy81; @@ -459,13 +491,13 @@ yy13: ++YYCURSOR; goto yy14; yy14: -#line 620 "ext/standard/var_unserializer.re" +#line 653 "ext/standard/var_unserializer.re" { /* this is the case where we have less data than planned */ php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Unexpected end of serialized data"); return 0; /* not sure if it should be 0 or 1 here? */ } -#line 469 "ext/standard/var_unserializer.c" +#line 501 "ext/standard/var_unserializer.c" yy15: yych = *++YYCURSOR; goto yy3; yy16: yych = *++YYCURSOR; @@ -498,7 +530,7 @@ yy22: ++YYCURSOR; goto yy23; yy23: -#line 508 "ext/standard/var_unserializer.re" +#line 541 "ext/standard/var_unserializer.re" { size_t len, len2, len3, maxlen; long elements; @@ -610,7 +642,7 @@ return object_common2(UNSERIALIZE_PASSTHRU, elements); } -#line 614 "ext/standard/var_unserializer.c" +#line 646 "ext/standard/var_unserializer.c" yy24: yych = *++YYCURSOR; if(yych <= ','){ if(yych != '+') goto yy17; @@ -639,7 +671,7 @@ yy29: ++YYCURSOR; goto yy30; yy30: -#line 500 "ext/standard/var_unserializer.re" +#line 533 "ext/standard/var_unserializer.re" { INIT_PZVAL(*rval); @@ -647,7 +679,7 @@ return object_common2(UNSERIALIZE_PASSTHRU, object_common1(UNSERIALIZE_PASSTHRU, ZEND_STANDARD_CLASS_DEF_PTR)); } -#line 651 "ext/standard/var_unserializer.c" +#line 683 "ext/standard/var_unserializer.c" yy31: yych = *++YYCURSOR; if(yych == '+') goto yy32; if(yych <= '/') goto yy17; @@ -671,7 +703,7 @@ yy36: ++YYCURSOR; goto yy37; yy37: -#line 478 "ext/standard/var_unserializer.re" +#line 511 "ext/standard/var_unserializer.re" { long elements = parse_iv(start + 2); /* use iv() not uiv() in order to check data range */ @@ -693,7 +725,7 @@ return finish_nested_data(UNSERIALIZE_PASSTHRU); } -#line 697 "ext/standard/var_unserializer.c" +#line 729 "ext/standard/var_unserializer.c" yy38: yych = *++YYCURSOR; if(yych == '+') goto yy39; if(yych <= '/') goto yy17; @@ -717,7 +749,7 @@ yy43: ++YYCURSOR; goto yy44; yy44: -#line 450 "ext/standard/var_unserializer.re" +#line 482 "ext/standard/var_unserializer.re" { size_t len, maxlen; char *str; @@ -729,11 +761,12 @@ return 0; } - str = (char*)YYCURSOR; - - YYCURSOR += len; + if ((str = unserialize_str(&YYCURSOR, len)) == NULL) { + return 0; + } if (*(YYCURSOR) != '"') { + efree(str); *p = YYCURSOR; return 0; } @@ -742,10 +775,10 @@ *p = YYCURSOR; INIT_PZVAL(*rval); - ZVAL_STRINGL(*rval, str, len, 1); + ZVAL_STRINGL(*rval, str, len, 0); return 1; } -#line 749 "ext/standard/var_unserializer.c" +#line 782 "ext/standard/var_unserializer.c" yy45: yych = *++YYCURSOR; if(yych <= '/'){ if(yych <= ','){ @@ -834,14 +867,14 @@ yy55: ++YYCURSOR; goto yy56; yy56: -#line 443 "ext/standard/var_unserializer.re" +#line 475 "ext/standard/var_unserializer.re" { *p = YYCURSOR; INIT_PZVAL(*rval); ZVAL_DOUBLE(*rval, zend_strtod((const char *)start + 2, NULL)); return 1; } -#line 845 "ext/standard/var_unserializer.c" +#line 878 "ext/standard/var_unserializer.c" yy57: yych = *++YYCURSOR; if(yych <= ','){ if(yych != '+') goto yy17; @@ -901,7 +934,7 @@ yy66: ++YYCURSOR; goto yy67; yy67: -#line 428 "ext/standard/var_unserializer.re" +#line 460 "ext/standard/var_unserializer.re" { *p = YYCURSOR; INIT_PZVAL(*rval); @@ -916,7 +949,7 @@ return 1; } -#line 920 "ext/standard/var_unserializer.c" +#line 953 "ext/standard/var_unserializer.c" yy68: yych = *++YYCURSOR; if(yych == 'N') goto yy65; goto yy17; @@ -945,14 +978,14 @@ yy73: ++YYCURSOR; goto yy74; yy74: -#line 421 "ext/standard/var_unserializer.re" +#line 453 "ext/standard/var_unserializer.re" { *p = YYCURSOR; INIT_PZVAL(*rval); ZVAL_LONG(*rval, parse_iv(start + 2)); return 1; } -#line 956 "ext/standard/var_unserializer.c" +#line 989 "ext/standard/var_unserializer.c" yy75: yych = *++YYCURSOR; if(yych <= '/') goto yy17; if(yych >= '2') goto yy17; @@ -963,25 +996,25 @@ yy77: ++YYCURSOR; goto yy78; yy78: -#line 414 "ext/standard/var_unserializer.re" +#line 446 "ext/standard/var_unserializer.re" { *p = YYCURSOR; INIT_PZVAL(*rval); ZVAL_BOOL(*rval, parse_iv(start + 2)); return 1; } -#line 974 "ext/standard/var_unserializer.c" +#line 1007 "ext/standard/var_unserializer.c" yy79: ++YYCURSOR; goto yy80; yy80: -#line 407 "ext/standard/var_unserializer.re" +#line 439 "ext/standard/var_unserializer.re" { *p = YYCURSOR; INIT_PZVAL(*rval); ZVAL_NULL(*rval); return 1; } -#line 985 "ext/standard/var_unserializer.c" +#line 1018 "ext/standard/var_unserializer.c" yy81: yych = *++YYCURSOR; if(yych <= ','){ if(yych != '+') goto yy17; @@ -1007,7 +1040,7 @@ yy85: ++YYCURSOR; goto yy86; yy86: -#line 384 "ext/standard/var_unserializer.re" +#line 416 "ext/standard/var_unserializer.re" { long id; @@ -1030,7 +1063,7 @@ return 1; } -#line 1034 "ext/standard/var_unserializer.c" +#line 1067 "ext/standard/var_unserializer.c" yy87: yych = *++YYCURSOR; if(yych <= ','){ if(yych != '+') goto yy17; @@ -1056,7 +1089,7 @@ yy91: ++YYCURSOR; goto yy92; yy92: -#line 363 "ext/standard/var_unserializer.re" +#line 395 "ext/standard/var_unserializer.re" { long id; @@ -1077,10 +1110,10 @@ return 1; } -#line 1081 "ext/standard/var_unserializer.c" +#line 1114 "ext/standard/var_unserializer.c" } } -#line 628 "ext/standard/var_unserializer.re" +#line 661 "ext/standard/var_unserializer.re" return 0; Index: ext/standard/var_unserializer.re =================================================================== RCS file: /repository/php-src/ext/standard/var_unserializer.re,v retrieving revision 1.52.2.2 diff -u -r1.52.2.2 var_unserializer.re --- ext/standard/var_unserializer.re 1 Jan 2006 12:26:08 -0000 1.52.2.2 +++ ext/standard/var_unserializer.re 1 Dec 2006 21:39:39 -0000 @@ -138,6 +138,38 @@ /* }}} */ +static char *unserialize_str(const unsigned char **p, int len) +{ + int i, j; + char *str = emalloc(len+1); + + for (i = 0; i < len; i++) { + if (**p != '\\') { + str[i] = (char)**p; + } else { + unsigned char ch = 0; + + for (j = 0; j < 2; j++) { + (*p)++; + if (**p >= '0' && **p <= '9') { + ch = (ch << 4) + (**p -'0'); + } else if (**p >= 'a' && **p <= 'f') { + ch = (ch << 4) + (**p -'a'+10); + } else if (**p >= 'A' && **p <= 'F') { + ch = (ch << 4) + (**p -'A'+10); + } else { + efree(str); + return NULL; + } + } + str[i] = (char)ch; + } + (*p)++; + } + str[i] = 0; + return str; +} + #define YYFILL(n) do { } while (0) #define YYCTYPE unsigned char #define YYCURSOR cursor @@ -458,11 +490,12 @@ return 0; } - str = (char*)YYCURSOR; - - YYCURSOR += len; + if ((str = unserialize_str(&YYCURSOR, len)) == NULL) { + return 0; + } if (*(YYCURSOR) != '"') { + efree(str); *p = YYCURSOR; return 0; } @@ -471,7 +504,7 @@ *p = YYCURSOR; INIT_PZVAL(*rval); - ZVAL_STRINGL(*rval, str, len, 1); + ZVAL_STRINGL(*rval, str, len, 0); return 1; } --Apple-Mail-2--128864400--