Newsgroups: php.internals Path: news.php.net Xref: news.php.net php.internals:10528 Return-Path: Mailing-List: contact internals-help@lists.php.net; run by ezmlm Delivered-To: mailing list internals@lists.php.net Received: (qmail 56349 invoked by uid 1010); 16 Jun 2004 16:37:51 -0000 Delivered-To: ezmlm-scan-internals@lists.php.net Delivered-To: ezmlm-internals@lists.php.net Received: (qmail 56304 invoked by uid 1007); 16 Jun 2004 16:37:51 -0000 To: internals@lists.php.net Date: Wed, 16 Jun 2004 19:36:13 +0300 Organization: none Content-Type: text/plain; format=flowed; delsp=yes; charset=koi8-r MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID: User-Agent: Opera M2/7.50 (Win32, build 3778) X-Posted-By: 217.23.116.150 Subject: strrchr() bugs From: valyala@tut.by ("Alexander Valyalkin") Today I've revised strrchr() function in PHP4.3.7 sources. It wasn't binary-safe. For example: 1) strrpos('abcd', '') returns 4. Expected FALSE 2) strrpos("a\0abcde", 'a') returns 6. Expected 2 3) strrpos("a\0bcde", "\0") returns 6. Expected 1 4) strrpos('', '\0') returns 0. Expected FALSE My version of strrchr() is binary-safe and faster. Below I provide standalone test with old and new strrpos() functions. Anyone can compile it and compare results. If you are very lazy or dont know how compile source file, below you could see my results of this test: ***************************** Test number 1 old strrpos time: 4132 Char chr(0) is found in the string [] (len=0) at pos=0 new strrpos time: 134 Char chr(0) is not found in the string [] (len=0) ***************************** Test number 2 old strrpos time: 176 Char chr(97) is not found in the string [] (len=0) new strrpos time: 163 Char chr(97) is not found in the string [] (len=0) ***************************** Test number 3 old strrpos time: 260 Char chr(0) is found in the string [test] (len=4) at pos=4 new strrpos time: 195 Char chr(0) is not found in the string [test] (len=4) ***************************** Test number 4 old strrpos time: 314 Char chr(0) is found in the string [te] (len=5) at pos=5 new strrpos time: 222 Char chr(0) is found in the string [te] (len=5) at pos=2 ***************************** Test number 5 old strrpos time: 193 Char chr(97) is found in the string [aa] (len=7) at pos=6 new strrpos time: 134 Char chr(97) is found in the string [aa] (len=7) at pos=4 ***************************** Test number 6 old strrpos time: 972 Char chr(99) is found in the string [aaaaaaaaaaaaaabbbbbbbbbbbbbbbbbcccccaaaaaaaabbbbbb] (len=50) at pos=35 new strrpos time: 337 Char chr(99) is found in the string [aaaaaaaaaaaaaabbbbbbbbbbbbbbbbbcccccaaaaaaaabbbbbb] (len=50) at pos=35 sources of my test =================cut================ #include #include #include /* for _rdtsc() */ /* some faked PHP API definitions */ #define PHP_FUNCTION(a) size_t a(zval *s1, zval *s2, err_type *errno) #define ZEND_NUM_ARGS() 2 #define SUCCESS 1 #define FAILURE 0 #define zend_get_parameters_ex(foo, p1, p2) f1((p1), (p2), &s1, &s2) #define WRONG_PARAM_COUNT { *errno = WRONG_PARAMS; return 0; } #define convert_to_string_ex(a) #define IS_STRING 1 #define Z_TYPE_PP(a) IS_STRING #define Z_STRVAL_P(a) ((a)->s) #define Z_STRVAL_PP(a) Z_STRVAL_P(*(a)) #define Z_STRLEN_P(a) ((a)->len) #define Z_STRLEN_PP(a) Z_STRLEN_P(*(a)) #define convert_to_long_ex(a) #define Z_LVAL_PP(a) 1 #define RETURN_LONG(num) { *errno = NO_ERROR; return (num); } #define RETURN_FALSE { *errno = NOT_FOUND; return 0; } typedef enum { NO_ERROR = 0, WRONG_PARAMS, NOT_FOUND } err_type; typedef struct { char *s; size_t len; } zval; typedef struct { char is_not_empty; zval haystack; zval needle; } my_testcase; int f1(zval ***haystack, zval ***needle, zval **s1, zval **s2) { *haystack = s1; *needle = s2; return SUCCESS; } /**************** tescases **********************/ my_testcase test[] = { /* Empty haystack & empty needle. Old strrpos() fails. */ {1, {"", 0}, {"", 0}}, /* Empty haystack & not empty needle. Old strrpos() works fine. */ {1, {"", 0}, {"a", 1}}, /* Not empty haystack & empty needle. Old strrpos() fails. */ {1, {"test", 4}, {"", 0}}, /* Haystack consists \0 char. Needle is \0. Old strrpos() fails. */ {1, {"te\0st", 5}, {"\0", 1}}, /* Haystack consists \0 char. Needle is not \0. Old strrpos() fails. */ {1, {"aa\0aabb", 7}, {"a", 1}}, /* Quite long haystack for speed comparison. */ {1, {"aaaaaaaaaaaaaabbbbbbbbbbbbbbbbbcccccaaaaaaaabbbbbb", 50}, {"c", 1}}, /* Do not modify the string below. It is an end marker. */ {0, {"", 0}, {"", 0}} }; /**************** old strrpos *******************/ PHP_FUNCTION(strrpos_old) { zval **haystack, **needle; char *found = NULL; if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &haystack, &needle) == FAILURE) { WRONG_PARAM_COUNT; } convert_to_string_ex(haystack); if (Z_TYPE_PP(needle) == IS_STRING) { found = strrchr(Z_STRVAL_PP(haystack), *Z_STRVAL_PP(needle)); } else { convert_to_long_ex(needle); found = strrchr(Z_STRVAL_PP(haystack), (char) Z_LVAL_PP(needle)); } if (found) { RETURN_LONG(Z_STRLEN_PP(haystack) - strlen(found)); } else { RETURN_FALSE; } } /****************** new strrpos **********************/ PHP_FUNCTION(strrpos_new) { zval **haystack, **needle; char *ptr1, *ptr2; char needle_char; if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &haystack, &needle) == FAILURE) { WRONG_PARAM_COUNT; } /* deal with haystack */ convert_to_string_ex(haystack); ptr1 = Z_STRVAL_PP(haystack); ptr2 = ptr1 + Z_STRLEN_PP(haystack); if (ptr1 == ptr2) { /* haystack is empty */ RETURN_FALSE; } /* deal with needle */ if (Z_TYPE_PP(needle) != IS_STRING) { convert_to_long_ex(needle); needle_char = (char) Z_LVAL_PP(needle); } else if (Z_STRLEN_PP(needle) == 0) { /* needle is empty */ RETURN_FALSE; } else { needle_char = *(Z_STRVAL_PP(needle)); } /* find position of last occurence of a [needle_char] in a string [haystack] */ while (ptr1 < ptr2 && *(--ptr2) != needle_char) { /* empty cycle */ } if (*ptr2 != needle_char) { /* cant find position of [needle_char] */ RETURN_FALSE; } else { /* position has been found */ RETURN_LONG(ptr2 - ptr1); } } /****************** auxiliary funcs ********************/ void handle_result(size_t pos, err_type errno, zval *haystack, zval *needle) { switch (errno) { case NO_ERROR : printf("Char chr(%d) is found in the string [%s] (len=%d) at pos=%d\n\n", (unsigned char) *(needle->s), haystack->s, haystack->len, pos); break; case WRONG_PARAMS : printf("wrong parameters count\n\n"); break; case NOT_FOUND : printf("Char chr(%d) is not found in the string [%s] (len=%d)\n\n", (unsigned char) *(needle->s), haystack->s, haystack->len); break; default : printf("Unknown error\n\n"); break; } } /********************* main func **************************/ int main(int argc,char *argv[]) { zval *needle, *haystack; err_type errno; size_t pos; unsigned long long t; my_testcase *testcase = test; int i; i = 0; while (testcase->is_not_empty) { printf("*****************************\n"); printf("Test number %d\n\n", ++i); needle = &testcase->needle; haystack = &testcase->haystack; /* test old strrpos */ t = _rdtsc(); pos = strrpos_old(haystack, needle, &errno); printf("old strrpos time: %lld\n", _rdtsc() - t); handle_result(pos, errno, haystack, needle); /* test new strrpos */ t = _rdtsc(); pos = strrpos_new(haystack, needle, &errno); printf("new strrpos time: %lld\n", _rdtsc() - t); handle_result(pos, errno, haystack, needle); /* goto the next testcase */ testcase++; } return 0; } =================cut================ unified diff =================cut================ --- string.c Thu May 13 20:44:32 2004 +++ string_strrpos.c Wed Jun 16 19:09:13 2004 @@ -1486,24 +1486,43 @@ PHP_FUNCTION(strrpos) { zval **haystack, **needle; - char *found = NULL; + char *ptr1, *ptr2; + char needle_char; if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &haystack, &needle) == FAILURE) { WRONG_PARAM_COUNT; } + + /* deal with haystack */ convert_to_string_ex(haystack); + ptr1 = Z_STRVAL_PP(haystack); + ptr2 = ptr1 + Z_STRLEN_PP(haystack); + if (ptr1 == ptr2) { + /* haystack is empty */ + RETURN_FALSE; + } - if (Z_TYPE_PP(needle) == IS_STRING) { - found = strrchr(Z_STRVAL_PP(haystack), *Z_STRVAL_PP(needle)); - } else { + /* deal with needle */ + if (Z_TYPE_PP(needle) != IS_STRING) { convert_to_long_ex(needle); - found = strrchr(Z_STRVAL_PP(haystack), (char) Z_LVAL_PP(needle)); + needle_char = (char) Z_LVAL_PP(needle); + } else if (Z_STRLEN_PP(needle) == 0) { + /* needle is empty */ + RETURN_FALSE; + } else { + needle_char = *(Z_STRVAL_PP(needle)); } - if (found) { - RETURN_LONG(Z_STRLEN_PP(haystack) - strlen(found)); - } else { + /* find position of last occurence of a [needle_char] in a string [haystack] */ + while (ptr1 < ptr2 && *(--ptr2) != needle_char) { + /* empty cycle */ + } + if (*ptr2 != needle_char) { + /* cant find position of [needle_char] */ RETURN_FALSE; + } else { + /* position has been found */ + RETURN_LONG(ptr2 - ptr1); } } /* }}} */ =================cut================ -- Using Opera's revolutionary e-mail client: http://www.opera.com/m2/