Newsgroups: php.internals Path: news.php.net Xref: news.php.net php.internals:10435 Return-Path: Mailing-List: contact internals-help@lists.php.net; run by ezmlm Delivered-To: mailing list internals@lists.php.net Received: (qmail 70418 invoked by uid 1010); 14 Jun 2004 10:50:55 -0000 Delivered-To: ezmlm-scan-internals@lists.php.net Delivered-To: ezmlm-internals@lists.php.net Received: (qmail 70384 invoked by uid 1007); 14 Jun 2004 10:50:55 -0000 To: internals@lists.php.net Date: Mon, 14 Jun 2004 13:49:52 +0300 Organization: none Content-Type: text/plain; format=flowed; delsp=yes; charset=koi8-r MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID: User-Agent: Opera M2/7.50 (Win32, build 3778) X-Posted-By: 217.23.116.150 Subject: implode() speedup From: valyala@tut.by ("Alexander Valyalkin") The implode() function uses very bad algorithm, which uses memory reallocation very often. The speed of functions falls quickly with increase of length of imploded array & data in it. My version of implode() uses much better algorithm: 1) counts the amount of memory needed to result string 2) allocates memory 3) implodes array to the allocated memory Below is standalone test application and unified diff: the test: =========cut======== #include #include #include /* faked PHP API, which has been used in php_implode() */ #define PHPAPI #define HashPosition unsigned int #define Z_ARRVAL_P(a) (a) #define Z_STRLEN_P(a) ((a)->len) #define Z_STRVAL_P(a) ((a)->p) #define Z_STRLEN_PP(pp) Z_STRLEN_P(*(pp)) #define Z_STRVAL_PP(pp) Z_STRVAL_P(*(pp)) #define RETURN_STRINGL(str, str_len, foo) \ { \ zval *q = return_value; \ q->p = (str); \ q->len = (str_len); \ return; \ } #define RETURN_EMPTY_STRING() \ { \ zval *q = return_value; \ q->p = emalloc(1); \ *(q->p) = '\0'; \ q->len = 0; \ return; \ } #define RETURN_FALSE RETURN_EMPTY_STRING() #define zend_hash_internal_pointer_reset_ex(foo, pos) *(pos) = 0 #define SUCCESS 1 #define FAILED 0 #define SEPARATE_ZVAL(foo) #define convert_to_string(foo) #define zend_hash_move_forward_ex(foo, pos) (*pos)++ #define emalloc(len) malloc(len) #define efree(p) free(p) #define TSRMLS_FETCH() #define TSRMLS_CC #define php_error_docref(foo, bar, s) printf("Error: [%s]\n", s) typedef struct { char *p; /* pointer to a data */ size_t len; /* length of a data */ } zval; int zend_hash_num_elements(zval *a) { int i = 0; while (a[i].p != NULL) i++; return i; } int zend_hash_get_current_data_ex(zval *arr, void **z_ptr, HashPosition *pos) { static zval *z, **zp; zp = &z; z = arr + *pos; *z_ptr = zp; return z->p == NULL ? FAILED : SUCCESS; } /******************************************/ /* test array */ zval arr[] = { {"foo", 3}, {"bar", 3}, {"baz", 3}, {NULL, 0} }; /* delimiter */ zval delim = {":", 1}; /******************************************/ PHPAPI void php_implode(zval *delim, zval *arr, zval *return_value) { zval **tmp; HashPosition pos; int numelems, i; size_t result_len, delim_len, tmp_len; char *result_str, *delim_str, *ptr; char **str_arr; size_t *len_arr; numelems = zend_hash_num_elements(Z_ARRVAL_P(arr)); if (numelems < 1) RETURN_EMPTY_STRING(); str_arr = (char **) emalloc(numelems * sizeof(char *)); len_arr = (size_t *) emalloc(numelems * sizeof(size_t)); if (str_arr == NULL || len_arr == NULL) { TSRMLS_FETCH(); php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory"); RETURN_FALSE; } delim_str = Z_STRVAL_P(delim); delim_len = Z_STRLEN_P(delim); /* the first pass: calculate size of memory to allocate */ i = 0; result_len = 0; zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(arr), &pos); while (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr), (void **) &tmp, &pos) == SUCCESS) { SEPARATE_ZVAL(tmp); convert_to_string(*tmp); str_arr[i] = Z_STRVAL_PP(tmp); /* pointer to string */ len_arr[i] = Z_STRLEN_PP(tmp); /* length of the string */ result_len += len_arr[i]; i++; if (delim_len && i < numelems) result_len += delim_len; zend_hash_move_forward_ex(Z_ARRVAL_P(arr), &pos); } if (result_len == 0) { efree(len_arr); efree(str_arr); RETURN_EMPTY_STRING(); } /* allocate memory */ ptr = result_str = emalloc(result_len + 1); if (ptr == NULL) { efree(len_arr); efree(str_arr); TSRMLS_FETCH(); php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory"); RETURN_FALSE; } /* the second pass: implode array into allocated memory */ for (i = 0; i < numelems - 1; i++) { if (tmp_len = len_arr[i]) { memcpy(ptr, str_arr[i], tmp_len); ptr += tmp_len; } if (delim_len) { memcpy(ptr, delim_str, delim_len); ptr += delim_len; } } /* copy the last element of the array */ if (tmp_len = len_arr[i]) { memcpy(ptr, str_arr[i], tmp_len); ptr += tmp_len; } *ptr = '\0'; efree(len_arr); efree(str_arr); RETURN_STRINGL(result_str, result_len, 0); } /***********************************************/ int main(int argc,char *argv[]) { zval result; php_implode(&delim, arr, &result); printf("str=[%s], len=%lu", Z_STRVAL_P(&result), Z_STRLEN_P(&result)); efree(result.p); return 0; } =========cut======== unified diff: =========cut========= --- string.c Thu May 13 20:44:32 2004 +++ string_implode.c Mon Jun 14 13:28:19 2004 @@ -827,31 +827,75 @@ { zval **tmp; HashPosition pos; - smart_str implstr = {0}; - int numelems, i = 0; + int numelems, i; + size_t result_len, delim_len, tmp_len; + char *result_str, *delim_str, *ptr; + char **str_arr; + size_t *len_arr; numelems = zend_hash_num_elements(Z_ARRVAL_P(arr)); - - if(numelems == 0) { - RETURN_EMPTY_STRING(); + if (numelems < 1) RETURN_EMPTY_STRING(); + str_arr = (char **) emalloc(numelems * sizeof(char *)); + len_arr = (size_t *) emalloc(numelems * sizeof(size_t)); + if (str_arr == NULL || len_arr == NULL) { + TSRMLS_FETCH(); + php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory"); + RETURN_FALSE; } + delim_str = Z_STRVAL_P(delim); + delim_len = Z_STRLEN_P(delim); + /* the first pass: calculate size of memory to allocate */ + i = 0; + result_len = 0; zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(arr), &pos); while (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr), (void **) &tmp, &pos) == SUCCESS) { SEPARATE_ZVAL(tmp); convert_to_string(*tmp); - - smart_str_appendl(&implstr, Z_STRVAL_PP(tmp), Z_STRLEN_PP(tmp)); - if (++i != numelems) { - smart_str_appendl(&implstr, Z_STRVAL_P(delim), Z_STRLEN_P(delim)); - } + str_arr[i] = Z_STRVAL_PP(tmp); /* pointer to string */ + len_arr[i] = Z_STRLEN_PP(tmp); /* length of the string */ + result_len += len_arr[i]; + i++; + if (delim_len && i < numelems) result_len += delim_len; zend_hash_move_forward_ex(Z_ARRVAL_P(arr), &pos); } - smart_str_0(&implstr); - - RETURN_STRINGL(implstr.c, implstr.len, 0); + if (result_len == 0) { + efree(len_arr); + efree(str_arr); + RETURN_EMPTY_STRING(); + } + /* allocate memory */ + ptr = result_str = emalloc(result_len + 1); + if (ptr == NULL) { + efree(len_arr); + efree(str_arr); + TSRMLS_FETCH(); + php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory"); + RETURN_FALSE; + } + /* the second pass: implode array into allocated memory */ + for (i = 0; i < numelems - 1; i++) { + if (tmp_len = len_arr[i]) { + memcpy(ptr, str_arr[i], tmp_len); + ptr += tmp_len; + } + if (delim_len) { + memcpy(ptr, delim_str, delim_len); + ptr += delim_len; + } + } + /* copy the last element of the array */ + if (tmp_len = len_arr[i]) { + memcpy(ptr, str_arr[i], tmp_len); + ptr += tmp_len; + } + *ptr = '\0'; + + efree(len_arr); + efree(str_arr); + RETURN_STRINGL(result_str, result_len, 0); } /* }}} */ =========cut========= -- Using Opera's revolutionary e-mail client: http://www.opera.com/m2/