Newsgroups: php.internals Path: news.php.net Xref: news.php.net php.internals:26827 Return-Path: Mailing-List: contact internals-help@lists.php.net; run by ezmlm Delivered-To: mailing list internals@lists.php.net Received: (qmail 86437 invoked by uid 1010); 7 Dec 2006 10:56:00 -0000 Delivered-To: ezmlm-scan-internals@lists.php.net Delivered-To: ezmlm-internals@lists.php.net Received: (qmail 86422 invoked from network); 7 Dec 2006 10:56:00 -0000 Received: from unknown (HELO lists.php.net) (127.0.0.1) by localhost with SMTP; 7 Dec 2006 10:56:00 -0000 Authentication-Results: pb1.pair.com header.from=php_lists@realplain.com; sender-id=unknown Authentication-Results: pb1.pair.com smtp.mail=php_lists@realplain.com; spf=permerror; sender-id=unknown Received-SPF: error (pb1.pair.com: domain realplain.com from 209.142.136.132 cause and error) X-PHP-List-Original-Sender: php_lists@realplain.com X-Host-Fingerprint: 209.142.136.132 msa2-mx.centurytel.net Linux 2.4/2.6 Received: from [209.142.136.132] ([209.142.136.132:56823] helo=msa2-mx.centurytel.net) by pb1.pair.com (ecelerity 2.1.1.9-wez r(12769M)) with ESMTP id 8B/F0-13900-D13F7754 for ; Thu, 07 Dec 2006 05:56:00 -0500 Received: from pc1 (d19-232.rt-bras.wnvl.centurytel.net [69.179.146.232]) by msa2-mx.centurytel.net (8.13.6/8.13.6) with SMTP id kB7AtMMU000534 for ; Thu, 7 Dec 2006 04:55:22 -0600 Message-ID: <00db01c719ee$318e7900$0201a8c0@pc1> To: Date: Thu, 7 Dec 2006 04:55:22 -0600 MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="----=_NextPart_000_00D8_01C719BB.E6BAD090" X-Priority: 3 X-MSMail-Priority: Normal X-Mailer: Microsoft Outlook Express 6.00.2800.1807 X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2800.1807 Subject: [PATCH] Unicode number changes From: php_lists@realplain.com ("Matt Wilmas") ------=_NextPart_000_00D8_01C719BB.E6BAD090 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: 7bit Hi Andrei, Antony, ... Attached is a patch for zend_u_strtol() and HANDLE_U_NUMERIC() to only allow ASCII digits and not use u_digit(). (Also switched to hex values for sign characters, etc.) I tested the changes quickly and all appears fine. Provides a pretty good performance increase too. :-) Matt ------=_NextPart_000_00D8_01C719BB.E6BAD090 Content-Type: text/plain; name="unicode_numbers.diff.txt" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="unicode_numbers.diff.txt" Index: zend_hash.c=0A= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=0A= RCS file: /repository/ZendEngine2/zend_hash.c,v=0A= retrieving revision 1.144=0A= diff -u -r1.144 zend_hash.c=0A= --- zend_hash.c 8 Nov 2006 16:02:29 -0000 1.144=0A= +++ zend_hash.c 7 Dec 2006 10:16:21 -0000=0A= @@ -1880,35 +1880,33 @@=0A= } while (0); \=0A= }=0A= =0A= -#define HANDLE_U_NUMERIC(key, length, func) { \=0A= - register UChar *tmp=3Dkey; \=0A= - register int val; \=0A= +#define HANDLE_U_NUMERIC(key, length, func) { \=0A= + register UChar *tmp=3Dkey; \=0A= \=0A= - if (*tmp=3D=3D'-') { \=0A= + if (*tmp=3D=3D0x2D /*'-'*/) { \=0A= tmp++; \=0A= } \=0A= - if ((val =3D u_digit(*tmp, 10)) >=3D 0) do { /* possibly a numeric = index */ \=0A= - UChar *end=3Dkey+length-1; \=0A= + if ((*tmp>=3D0x30 /*'0'*/ && *tmp<=3D0x39 /*'9'*/)) do { /* possibly a = numeric index */ \=0A= + UChar *end=3Dkey+length-1; \=0A= long idx; \=0A= \=0A= - if (val=3D=3D0 && length>2) { /* don't accept numbers with leading = zeros */ \=0A= + if (*tmp++=3D=3D0x30 && length>2) { /* don't accept numbers with = leading zeros */ \=0A= break; \=0A= - } \=0A= - tmp++; \=0A= + } \=0A= while (tmp=3D0x30 /*'0'*/ && *tmp<=3D0x39 /*'9'*/)) { \=0A= break; \=0A= } \=0A= tmp++; \=0A= } \=0A= if (tmp=3D=3Dend && *tmp=3D=3D0) { /* a numeric index */ \=0A= - if (*key=3D=3D'-') { \=0A= - idx =3D zend_u_strtol(key, NULL, 10); \=0A= + if (*key=3D=3D0x2D /*'-'*/) { \=0A= + idx =3D zend_u_strtol(key, NULL, 10); \=0A= if (idx!=3DLONG_MIN) { \=0A= return func; \=0A= } \=0A= } else { \=0A= - idx =3D zend_u_strtol(key, NULL, 10); \=0A= + idx =3D zend_u_strtol(key, NULL, 10); \=0A= if (idx!=3DLONG_MAX) { \=0A= return func; \=0A= } \=0A= Index: zend_strtol.c=0A= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=0A= RCS file: /repository/ZendEngine2/zend_strtol.c,v=0A= retrieving revision 1.3=0A= diff -u -r1.3 zend_strtol.c=0A= --- zend_strtol.c 6 Oct 2006 17:23:05 -0000 1.3=0A= +++ zend_strtol.c 7 Dec 2006 10:16:21 -0000=0A= @@ -55,7 +55,6 @@=0A= register UChar c;=0A= register unsigned long cutoff;=0A= register int neg =3D 0, any, cutlim;=0A= - register int val;=0A= =0A= /*=0A= * Skip white space and pick up leading +/- sign if any.=0A= @@ -65,20 +64,20 @@=0A= do {=0A= c =3D *s++;=0A= } while (u_isspace(c));=0A= - if (c =3D=3D '-') {=0A= + if (c =3D=3D 0x2D /*'-'*/) {=0A= neg =3D 1;=0A= c =3D *s++;=0A= - } else if (c =3D=3D '+')=0A= + } else if (c =3D=3D 0x2B /*'+'*/)=0A= c =3D *s++;=0A= if ((base =3D=3D 0 || base =3D=3D 16) &&=0A= - (c =3D=3D '0')=0A= - && (*s =3D=3D 'x' || *s =3D=3D 'X')) {=0A= + (c =3D=3D 0x30 /*'0'*/)=0A= + && (*s =3D=3D 0x78 /*'x'*/ || *s =3D=3D 0x58 /*'X'*/)) {=0A= c =3D s[1];=0A= s +=3D 2;=0A= base =3D 16;=0A= }=0A= if (base =3D=3D 0)=0A= - base =3D (c =3D=3D '0') ? 8 : 10;=0A= + base =3D (c =3D=3D 0x30 /*'0'*/) ? 8 : 10;=0A= =0A= /*=0A= * Compute the cutoff value between legal numbers and illegal=0A= @@ -101,14 +100,23 @@=0A= cutlim =3D cutoff % (unsigned long)base;=0A= cutoff /=3D (unsigned long)base;=0A= for (acc =3D 0, any =3D 0;; c =3D *s++) {=0A= - if ((val =3D u_digit(c, base)) < 0)=0A= + if (c >=3D 0x30 /*'0'*/ && c <=3D 0x39 /*'9'*/)=0A= + c -=3D 0x30 /*'0'*/;=0A= + else if (c >=3D 0x41 /*'A'*/ && c <=3D 0x5A /*'Z'*/)=0A= + c -=3D 0x41 /*'A'*/ - 10;=0A= + else if (c >=3D 0x61 /*'a'*/ && c <=3D 0x7A /*'z'*/)=0A= + c -=3D 0x61 /*'a'*/ - 10;=0A= + else=0A= break;=0A= - if (any < 0 || acc > cutoff || (acc =3D=3D cutoff && val > cutlim))=0A= + if (c >=3D base)=0A= + break;=0A= +=0A= + if (any < 0 || acc > cutoff || (acc =3D=3D cutoff && c > cutlim))=0A= any =3D -1;=0A= else {=0A= any =3D 1;=0A= acc *=3D base;=0A= - acc +=3D val;=0A= + acc +=3D c;=0A= }=0A= }=0A= if (any < 0) {=0A= @@ -116,7 +124,7 @@=0A= errno =3D ERANGE;=0A= } else if (neg)=0A= acc =3D -acc;=0A= - if (endptr !=3D 0)=0A= + if (endptr !=3D NULL)=0A= *endptr =3D (UChar *)(any ? s - 1 : nptr);=0A= return (acc);=0A= }=0A= ------=_NextPart_000_00D8_01C719BB.E6BAD090--