Today I've revised strrchr()
function in PHP4.3.7 sources.
It wasn't binary-safe. For example:
- strrpos('abcd', '') returns 4. Expected
FALSE
- strrpos("a\0abcde", 'a') returns 6. Expected 2
- strrpos("a\0bcde", "\0") returns 6. Expected 1
- strrpos('', '\0') returns 0. Expected
FALSE
My version of strrchr()
is binary-safe and faster.
Below I provide standalone test with old and new strrpos()
functions. Anyone can compile it and compare results.
If you are very lazy or dont know how compile source file, below
you could see my results of this test:
Test number 1
old strrpos time: 4132
Char chr(0) is found in the string [] (len=0) at pos=0
new strrpos time: 134
Char chr(0) is not found in the string [] (len=0)
Test number 2
old strrpos time: 176
Char chr(97) is not found in the string [] (len=0)
new strrpos time: 163
Char chr(97) is not found in the string [] (len=0)
Test number 3
old strrpos time: 260
Char chr(0) is found in the string [test] (len=4) at pos=4
new strrpos time: 195
Char chr(0) is not found in the string [test] (len=4)
Test number 4
old strrpos time: 314
Char chr(0) is found in the string [te] (len=5) at pos=5
new strrpos time: 222
Char chr(0) is found in the string [te] (len=5) at pos=2
Test number 5
old strrpos time: 193
Char chr(97) is found in the string [aa] (len=7) at pos=6
new strrpos time: 134
Char chr(97) is found in the string [aa] (len=7) at pos=4
Test number 6
old strrpos time: 972
Char chr(99) is found in the string
[aaaaaaaaaaaaaabbbbbbbbbbbbbbbbbcccccaaaaaaaabbbbbb] (len=50) at pos=35
new strrpos time: 337
Char chr(99) is found in the string
[aaaaaaaaaaaaaabbbbbbbbbbbbbbbbbcccccaaaaaaaabbbbbb] (len=50) at pos=35
sources of my test
=================cut================
#include <stdio.h>
#include <string.h>
#include <time.h> /* for _rdtsc() */
/* some faked PHP API definitions */
#define PHP_FUNCTION(a) size_t a(zval *s1, zval *s2, err_type errno)
#define ZEND_NUM_ARGS() 2
#define SUCCESS 1
#define FAILURE 0
#define zend_get_parameters_ex(foo, p1, p2) f1((p1), (p2), &s1, &s2)
#define WRONG_PARAM_COUNT { errno = WRONG_PARAMS; return 0; }
#define convert_to_string_ex(a)
#define IS_STRING 1
#define Z_TYPE_PP(a) IS_STRING
#define Z_STRVAL_P(a) ((a)->s)
#define Z_STRVAL_PP(a) Z_STRVAL_P((a))
#define Z_STRLEN_P(a) ((a)->len)
#define Z_STRLEN_PP(a) Z_STRLEN_P((a))
#define convert_to_long_ex(a)
#define Z_LVAL_PP(a) 1
#define RETURN_LONG(num) { *errno = NO_ERROR; return (num); }
#define RETURN_FALSE { *errno = NOT_FOUND; return 0; }
typedef enum { NO_ERROR = 0, WRONG_PARAMS, NOT_FOUND } err_type;
typedef struct {
char *s;
size_t len;
} zval;
typedef struct {
char is_not_empty;
zval haystack;
zval needle;
} my_testcase;
int f1(zval ***haystack, zval ***needle, zval **s1, zval **s2) {
*haystack = s1;
*needle = s2;
return SUCCESS;
}
/**************** tescases **********************/
my_testcase test[] = {
/* Empty haystack & empty needle.
Old strrpos()
fails.
/
{1, {"", 0}, {"", 0}},
/ Empty haystack & not empty needle.
Old strrpos()
works fine.
/
{1, {"", 0}, {"a", 1}},
/ Not empty haystack & empty needle.
Old strrpos()
fails.
/
{1, {"test", 4}, {"", 0}},
/ Haystack consists \0 char. Needle is \0.
Old strrpos()
fails.
/
{1, {"te\0st", 5}, {"\0", 1}},
/ Haystack consists \0 char. Needle is not \0.
Old strrpos()
fails.
/
{1, {"aa\0aabb", 7}, {"a", 1}},
/ Quite long haystack for speed comparison.
/
{1, {"aaaaaaaaaaaaaabbbbbbbbbbbbbbbbbcccccaaaaaaaabbbbbb", 50}, {"c", 1}},
/ Do not modify the string below. It is an end marker.
*/
{0, {"", 0}, {"", 0}}
};
/**************** old strrpos *******************/
PHP_FUNCTION(strrpos_old)
{
zval **haystack, **needle;
char *found = NULL;
if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &haystack,
&needle) == FAILURE) {
WRONG_PARAM_COUNT;
}
convert_to_string_ex(haystack);
if (Z_TYPE_PP(needle) == IS_STRING) {
found = strrchr(Z_STRVAL_PP(haystack), *Z_STRVAL_PP(needle));
} else {
convert_to_long_ex(needle);
found = strrchr(Z_STRVAL_PP(haystack), (char) Z_LVAL_PP(needle));
}
if (found) {
RETURN_LONG(Z_STRLEN_PP(haystack) - strlen(found));
} else {
RETURN_FALSE;
}
}
/****************** new strrpos **********************/
PHP_FUNCTION(strrpos_new)
{
zval **haystack, **needle;
char *ptr1, *ptr2;
char needle_char;
if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &haystack,
&needle) == FAILURE) {
WRONG_PARAM_COUNT;
}
/* deal with haystack */
convert_to_string_ex(haystack);
ptr1 = Z_STRVAL_PP(haystack);
ptr2 = ptr1 + Z_STRLEN_PP(haystack);
if (ptr1 == ptr2) {
/* haystack is empty */
RETURN_FALSE;
}
/* deal with needle */
if (Z_TYPE_PP(needle) != IS_STRING) {
convert_to_long_ex(needle);
needle_char = (char) Z_LVAL_PP(needle);
} else if (Z_STRLEN_PP(needle) == 0) {
/* needle is empty */
RETURN_FALSE;
} else {
needle_char = *(Z_STRVAL_PP(needle));
}
/* find position of last occurence of a [needle_char] in a string
[haystack] */
while (ptr1 < ptr2 && (--ptr2) != needle_char) {
/ empty cycle */
}
if (ptr2 != needle_char) {
/ cant find position of [needle_char] /
RETURN_FALSE;
} else {
/ position has been found */
RETURN_LONG(ptr2 - ptr1);
}
}
/****************** auxiliary funcs ********************/
void handle_result(size_t pos, err_type errno, zval *haystack, zval
*needle)
{
switch (errno) {
case NO_ERROR :
printf("Char chr(%d) is found in the string [%s] (len=%d) at pos=%d\n\n",
(unsigned char) *(needle->s), haystack->s, haystack->len, pos);
break;
case WRONG_PARAMS :
printf("wrong parameters count\n\n");
break;
case NOT_FOUND :
printf("Char chr(%d) is not found in the string [%s] (len=%d)\n\n",
(unsigned char) *(needle->s), haystack->s, haystack->len);
break;
default :
printf("Unknown error\n\n");
break;
}
}
/********************* main func **************************/
int main(int argc,char *argv[])
{
zval *needle, *haystack;
err_type errno;
size_t pos;
unsigned long long t;
my_testcase *testcase = test;
int i;
i = 0;
while (testcase->is_not_empty)
{
printf("*****************************\n");
printf("Test number %d\n\n", ++i);
needle = &testcase->needle;
haystack = &testcase->haystack;
/* test old strrpos */
t = _rdtsc();
pos = strrpos_old(haystack, needle, &errno);
printf("old strrpos time: %lld\n", _rdtsc() - t);
handle_result(pos, errno, haystack, needle);
/* test new strrpos */
t = _rdtsc();
pos = strrpos_new(haystack, needle, &errno);
printf("new strrpos time: %lld\n", _rdtsc() - t);
handle_result(pos, errno, haystack, needle);
/* goto the next testcase */
testcase++;
}
return 0;
}
=================cut================
unified diff
=================cut================
--- string.c Thu May 13 20:44:32 2004
+++ string_strrpos.c Wed Jun 16 19:09:13 2004
@@ -1486,24 +1486,43 @@
PHP_FUNCTION(strrpos)
{
zval **haystack, **needle;
- char *found = NULL;
-
char *ptr1, *ptr2;
-
char needle_char;
if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &haystack,
&needle) == FAILURE) {
WRONG_PARAM_COUNT;
} -
/* deal with haystack */
convert_to_string_ex(haystack); -
ptr1 = Z_STRVAL_PP(haystack);
-
ptr2 = ptr1 + Z_STRLEN_PP(haystack);
-
if (ptr1 == ptr2) {
-
/* haystack is empty */
-
RETURN_FALSE;
-
}
- if (Z_TYPE_PP(needle) == IS_STRING) {
-
found = strrchr(Z_STRVAL_PP(haystack), *Z_STRVAL_PP(needle));
- } else {
- /* deal with needle */
- if (Z_TYPE_PP(needle) != IS_STRING) {
convert_to_long_ex(needle);
-
found = strrchr(Z_STRVAL_PP(haystack), (char) Z_LVAL_PP(needle));
-
needle_char = (char) Z_LVAL_PP(needle);
- } else if (Z_STRLEN_PP(needle) == 0) {
-
/* needle is empty */
-
RETURN_FALSE;
- } else {
-
}needle_char = *(Z_STRVAL_PP(needle));
- if (found) {
-
RETURN_LONG(Z_STRLEN_PP(haystack) - strlen(found));
- } else {
- /* find position of last occurence of a [needle_char] in a string
[haystack] */ - while (ptr1 < ptr2 && *(--ptr2) != needle_char) {
-
/* empty cycle */
- }
- if (*ptr2 != needle_char) {
-
/* cant find position of [needle_char] */ RETURN_FALSE;
- } else {
-
/* position has been found */
-
}RETURN_LONG(ptr2 - ptr1);
}
/* }}} */
=================cut================
--
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/
Alexander Valyalkin wrote:
Today I've revised
strrchr()
function in PHP4.3.7 sources.
It wasn't binary-safe. For example:
- strrpos('abcd', '') returns 4. Expected
FALSE
Although one could argue whether searching for empty
strings makes sense at all the result is not completely
wrong. There is an empty string at the end of the haystack ...
So for BC reasons this behavior should not be changed.
- strrpos("a\0abcde", 'a') returns 6. Expected 2
- strrpos("a\0bcde", "\0") returns 6. Expected 1
- strrpos('', '\0') returns 0. Expected
FALSE
these actualy look like bugs ...
My version of
strrchr()
is binary-safe and faster.
Below I provide standalone test with old and newstrrpos()
functions. Anyone can compile it and compare results.
Please, NO MORE STANDALONE TESTS. Integrate your new implementation,
add a regression test (*.phpt) for it and file a bug on bugs.php.net.
Add a unified diff patch against a PHP release version or against
current CVS.
Wait! First write a regression test, then try to fix the code
so that the regression fails with the old implementation and passes
with the new. When not sure about the intended behavior it might be
better to just report a bug and maybe add a regression test.
If the current behavior is there for a reason (like BC issues)
you will find out before implementing unneeded stuff.
Chances are way higher that someone listens to you ...
If you are very lazy or dont know how compile source file, below
you could see my results of this test:
All (good) programmers are lazy. And all of the subscribers of
this list know how to compile source files. Some even have social
skills. You should start working on yours.
Thank you in advance for your cooperation ...
--
Hartmut Holzgraefe <hartmut@php.net
- strrpos('abcd', '') returns 4. Expected
FALSE
Although one could argue whether searching for empty
strings makes sense at all the result is not completely
wrong. There is an empty string at the end of the haystack ...
And at the beginning of the haystack and also in between each and
every letter. :)
Logically, it would probably be best to return 0 because that's where
the first empty string occurs.
So for BC reasons this behavior should not be changed.
Agreed. We shouldn't mess with this function.
-adam
--
adam@trachtenberg.com
author of o'reilly's php cookbook
avoid the holiday rush, buy your copy today!
Adam Maccabee Trachtenberg wrote:
Logically, it would probably be best to return 0 because that's where
the first empty string occurs.
this is strrchr()
, not strchr()
;)
--
Hartmut Holzgraefe <hartmut@php.net
Hartmut Holzgraefe wrote:
Adam Maccabee Trachtenberg wrote:
Logically, it would probably be best to return 0 because that's where
the first empty string occurs.this is
strrchr()
, notstrchr()
;)
Actually it's strrpos()
, not str[r]chr() [look at Alexander's code],
which has been rewritten in PHP 5.
--
Ard
Ard Biesheuvel wrote:
Hartmut Holzgraefe wrote:
Adam Maccabee Trachtenberg wrote:
Logically, it would probably be best to return 0 because that's where
the first empty string occurs.this is
strrchr()
, notstrchr()
;)Actually it's
strrpos()
, not str[r]chr() [look at Alexander's code],
which has been rewritten in PHP 5.
It should still find the last emtpy string, not the first,
shouldn't it? ;)
--
Hartmut Holzgraefe <hartmut@php.net
It should still find the last emtpy string, not the first,
shouldn't it? ;)
Yup. I think we've now spent more time discussing this function than
people have spent using it. :)
-adam
--
adam@trachtenberg.com
author of o'reilly's php cookbook
avoid the holiday rush, buy your copy today!
- strrpos('abcd', '') returns 4. Expected
FALSE
Although one could argue whether searching for empty
strings makes sense at all the result is not completely
wrong. There is an empty string at the end of the haystack ...And at the beginning of the haystack and also in between each and
every letter. :)Logically, it would probably be best to return 0 because that's where
the first empty string occurs.
Erm, no. It's strRpos (reverse), so it should find the one at the end
first.
Derick
Alexander Valyalkin wrote:
Today I've revised
strrchr()
function in PHP4.3.7 sources.
It wasn't binary-safe. For example:
... which is probably why it has already been completely rewritten in PHP 5.
--
Ard