My versions of strspn()
& strcspn()
use faster and clear algorithm.
Below I provide simple test application, which compares speed and
results of old strspn()
with new one & unified diff in the bottom.
===============cut==================
#include <stdio.h>
#include <string.h>
#include <time.h> /* for _rdtsc() */
#define S1_LEN 1024 /* length of test string /
#define S2_LEN 256 / length of mask. Maximum is 265 */
#define PHPAPI
PHPAPI size_t php_strspn_new(char *s1, char *s2, char *s1_end, char
*s2_end)
{
unsigned char *p1, *p2;
char char_table[256];
if (s1 == s1_end || s2 == s2_end) {
/* there is no chars in string [s1] or in the mask [s2] */
return 0;
}
/* create a char table from mask [s2] */
memset(char_table, 0, 256);
p1 = (unsigned char *) s2;
p2 = (unsigned char *) s2_end;
while (p1 < p2) {
char_table[*p1++] = 1;
}
/* compute the length of the initial segment of string [s1] which
consists entirely of characters in mask [s2]
*/
p1 = (unsigned char *) s1;
p2 = (unsigned char *) s1_end;
while (p1 < p2 && char_table[*p1++]) {
/* empty cycle */
}
if (char_table[*(--p1)]) {
p1++;
}
return ((char *)p1 - s1);
}
PHPAPI size_t php_strspn_old(char *s1, char *s2, char *s1_end, char
*s2_end)
{
register const char *p = s1, *spanp;
register char c = *p;
cont:
for (spanp = s2; p != s1_end && spanp != s2_end;)
if (*spanp++ == c) {
c = *(++p);
goto cont;
}
return (p - s1);
}
int main(int argc,char *argv[])
{
char s1[S1_LEN], s2[S2_LEN], *s1_end = s1 + S1_LEN, *s2_end = s2 + S2_LEN;
size_t n, i;
unsigned long long t;
memset(s1, S2_LEN - 1, S1_LEN); /* create test string */
for (i = 0; i < S2_LEN; i++) s2[i] = i; /* create test mask */
t = _rdtsc();
n = php_strspn_old(s1, s2, s1_end, s2_end);
printf("Old strspn time=%lld, result=%d\n", _rdtsc() - t, n);
t = _rdtsc();
n = php_strspn_new(s1, s2, s1_end, s2_end);
printf("New strspn time=%lld, result=%d\n", _rdtsc() - t, n);
return 0;
}
===============cut==================
unified diff
==========cut==========
--- string.c Thu May 13 20:44:32 2004
+++ string_strspn.c Tue Jun 15 15:22:26 2004
@@ -1296,16 +1296,36 @@
*/
PHPAPI size_t php_strspn(char *s1, char *s2, char *s1_end, char *s2_end)
{
- register const char *p = s1, *spanp;
- register char c = *p;
- unsigned char *p1, *p2;
- char char_table[256];
-cont:
- for (spanp = s2; p != s1_end && spanp != s2_end;)
-
if (*spanp++ == c) {
-
c = *(++p);
-
goto cont;
- /* handling some trivial cases */
- if (s1 == s1_end || s2 == s2_end) {
-
/* there is no chars in string [s1] or in the mask [s2] */
-
return 0;
- }
- /* create a char table from mask [s2] */
- memset(char_table, 0, 256);
- p1 = (unsigned char *) s2;
- p2 = (unsigned char *) s2_end;
- while (p1 < p2) {
-
char_table[*p1++] = 1;
- }
- /* compute the length of the initial segment of string [s1] which
-
consists entirely of characters in mask [s2]
- */
- p1 = (unsigned char *) s1;
- p2 = (unsigned char *) s1_end;
- while (p1 < p2 && char_table[*p1++]) {
-
/* empty cycle */ }
- return (p - s1);
- if (char_table[*(--p1)]) {
-
p1++;
- }
- return ((char )p1 - s1);
}
/ }}} */
@@ -1313,18 +1333,40 @@
*/
PHPAPI size_t php_strcspn(char *s1, char *s2, char *s1_end, char *s2_end)
{
- register const char *p, *spanp;
- register char c = *s1;
- unsigned char *p1, *p2;
- char char_table[256];
- for (p = s1;;) {
-
spanp = s2;
-
do {
-
if (*spanp == c || p == s1_end)
-
return p - s1;
-
} while (spanp++ < s2_end);
-
c = *++p;
- /* handling some trivial cases */
- if (s2 == s2_end) {
-
/* empty mask [s2] */
-
}return s1_end - s1;
- /* NOTREACHED */
- if (s1 == s1_end) {
-
/* there is no chars in string [s1] */
-
return 0;
- }
- /* create a char table from mask [s2] */
- memset(char_table, 1, 256);
- p1 = (unsigned char *) s2;
- p2 = (unsigned char *) s2_end;
- while (p1 < p2) {
-
char_table[*p1++] = 0;
- }
- /* compute the length of the initial segment of string [s1] which
-
does NOT contain any of the characters in mask [s2]
- */
- p1 = (unsigned char *) s1;
- p2 = (unsigned char *) s1_end;
- while (p1 < p2 && char_table[*p1++]) {
-
/* empty cycle */
- }
- if (char_table[*(--p1)]) {
-
p1++;
- }
- return ((char )p1 - s1);
}
/ }}} */
==========cut==========
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/
Privet Alex,
is it possible to show some results from benchmarking?
Thanks a lot,
Andrey
Quoting Alexander Valyalkin valyala@tut.by:
My versions of
strspn()
&strcspn()
use faster and clear algorithm.Below I provide simple test application, which compares speed and
results of oldstrspn()
with new one & unified diff in the bottom.===============cut==================
#include <stdio.h>
#include <string.h>
#include <time.h> /* for _rdtsc() */#define S1_LEN 1024 /* length of test string /
#define S2_LEN 256 / length of mask. Maximum is 265 */
#define PHPAPIPHPAPI size_t php_strspn_new(char *s1, char *s2, char *s1_end, char
*s2_end)
{
unsigned char *p1, *p2;
char char_table[256];if (s1 == s1_end || s2 == s2_end) {
/* there is no chars in string [s1] or in the mask [s2] */
return 0;
}/* create a char table from mask [s2] */
memset(char_table, 0, 256);
p1 = (unsigned char *) s2;
p2 = (unsigned char *) s2_end;
while (p1 < p2) {
char_table[*p1++] = 1;
}/* compute the length of the initial segment of string [s1] which
consists entirely of characters in mask [s2]
*/
p1 = (unsigned char *) s1;
p2 = (unsigned char *) s1_end;
while (p1 < p2 && char_table[p1++]) {
/ empty cycle /
}
if (char_table[(--p1)]) {
p1++;
}return ((char *)p1 - s1);
}PHPAPI size_t php_strspn_old(char *s1, char *s2, char *s1_end, char
*s2_end)
{
register const char *p = s1, *spanp;
register char c = *p;cont:
for (spanp = s2; p != s1_end && spanp != s2_end;)
if (*spanp++ == c) {
c = *(++p);
goto cont;
}
return (p - s1);
}int main(int argc,char *argv[])
{
char s1[S1_LEN], s2[S2_LEN], *s1_end = s1 + S1_LEN, *s2_end = s2 + S2_LEN;
size_t n, i;
unsigned long long t;memset(s1, S2_LEN - 1, S1_LEN); /* create test string /
for (i = 0; i < S2_LEN; i++) s2[i] = i; / create test mask */t = _rdtsc();
n = php_strspn_old(s1, s2, s1_end, s2_end);
printf("Old strspn time=%lld, result=%d\n", _rdtsc() - t, n);t = _rdtsc();
n = php_strspn_new(s1, s2, s1_end, s2_end);
printf("New strspn time=%lld, result=%d\n", _rdtsc() - t, n);return 0;
}
===============cut==================unified diff
==========cut==========
--- string.c Thu May 13 20:44:32 2004
+++ string_strspn.c Tue Jun 15 15:22:26 2004
@@ -1296,16 +1296,36 @@
*/
PHPAPI size_t php_strspn(char *s1, char *s2, char *s1_end, char *s2_end)
{
- register const char *p = s1, *spanp;
- register char c = *p;
- unsigned char *p1, *p2;
- char char_table[256];
-cont:
- for (spanp = s2; p != s1_end && spanp != s2_end;)
if (*spanp++ == c) {
c = *(++p);
goto cont;
- /* handling some trivial cases */
- if (s1 == s1_end || s2 == s2_end) {
/* there is no chars in string [s1] or in the mask [s2] */
return 0;
- }
- /* create a char table from mask [s2] */
- memset(char_table, 0, 256);
- p1 = (unsigned char *) s2;
- p2 = (unsigned char *) s2_end;
- while (p1 < p2) {
char_table[*p1++] = 1;
- }
- /* compute the length of the initial segment of string [s1] which
consists entirely of characters in mask [s2]
- */
- p1 = (unsigned char *) s1;
- p2 = (unsigned char *) s1_end;
- while (p1 < p2 && char_table[*p1++]) {
/* empty cycle */ }
- return (p - s1);
- if (char_table[*(--p1)]) {
p1++;
- }
- return ((char )p1 - s1);
}
/ }}} */@@ -1313,18 +1333,40 @@
*/
PHPAPI size_t php_strcspn(char *s1, char *s2, char *s1_end, char *s2_end)
{
- register const char *p, *spanp;
- register char c = *s1;
- unsigned char *p1, *p2;
- char char_table[256];
- for (p = s1;;) {
spanp = s2;
do {
if (*spanp == c || p == s1_end)
return p - s1;
} while (spanp++ < s2_end);
c = *++p;
- /* handling some trivial cases */
- if (s2 == s2_end) {
/* empty mask [s2] */
}return s1_end - s1;
- /* NOTREACHED */
- if (s1 == s1_end) {
/* there is no chars in string [s1] */
return 0;
- }
- /* create a char table from mask [s2] */
- memset(char_table, 1, 256);
- p1 = (unsigned char *) s2;
- p2 = (unsigned char *) s2_end;
- while (p1 < p2) {
char_table[*p1++] = 0;
- }
- /* compute the length of the initial segment of string [s1] which
does NOT contain any of the characters in mask [s2]
- */
- p1 = (unsigned char *) s1;
- p2 = (unsigned char *) s1_end;
- while (p1 < p2 && char_table[*p1++]) {
/* empty cycle */
- }
- if (char_table[*(--p1)]) {
p1++;
- }
- return ((char )p1 - s1);
}
/ }}} */==========cut==========
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/
Privet Alex,
is it possible to show some results from benchmarking?Thanks a lot,
Andrey
Just compile my standalone test & view results :)
--
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/
On Wed, 16 Jun 2004 11:11:25 +0300
"Alexander Valyalkin" valyala@tut.by wrote:
On Wed, 16 Jun 2004 00:41:19 +0300, Andrey Hristov php@hristov.com
wrote:Privet Alex,
is it possible to show some results from benchmarking?Thanks a lot,
AndreyJust compile my standalone test & view results :)
This "standalone test" has nothing to do with PHP perfomance and
stability.
WBR,
Antony Dovgal aka tony2001
tony2001@phpclub.net || antony@dovgal.com
Alexander Valyalkin wrote:
Privet Alex,
is it possible to show some results from benchmarking?Just compile my standalone test & view results :)
As noted before: standalone benchmarks can only tell you whether ot not
it is worth the effort to implement a change within PHP and benchmark
it as a whole. Some changes may not show a difference as the code
is not on a critical path, some may only improve performance on certain
platforms while hitting it on others, sometimes even in very unexpected
circumstances. (i could continue almost forever ...)
Besides that the good old rules of engineering applies:
"If it ain't broken -> don't fix it"
and
"Never change a running system!"
I'm sure there are lots of places within PHP that deserve code reviews
and performance improvements, but especially now, in a feature freeze,
it is important to act in accordance to the rules above. Any changes in
code come with the risk of introducing new errors. Especially those
that seem "simple" bear this risk. To keep this risk as low as possible
only bug fixes are allowed in a feature freeze. A "lack of performance"
is in this case only considered a bug if it substantialy hurts
performance.
PHP has become a very complex piece of software. Its developement
process has also become very complex. Do you really think you can
just jump in out of nowhere, throw patches at us, telling us that we
are all too stupid to write efficient code and still be taken serious
as the great savior?
--
Hartmut Holzgraefe <hartmut@php.net