The implode()
function uses very bad algorithm, which uses memory
reallocation very often. The speed of functions falls quickly with
increase of length of imploded array & data in it.
My version of implode()
uses much better algorithm:
- counts the amount of memory needed to result string
- allocates memory
- implodes array to the allocated memory
Below is standalone test application and unified diff:
the test:
=========cut========
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* faked PHP API, which has been used in php_implode() /
#define PHPAPI
#define HashPosition unsigned int
#define Z_ARRVAL_P(a) (a)
#define Z_STRLEN_P(a) ((a)->len)
#define Z_STRVAL_P(a) ((a)->p)
#define Z_STRLEN_PP(pp) Z_STRLEN_P((pp))
#define Z_STRVAL_PP(pp) Z_STRVAL_P(*(pp))
#define RETURN_STRINGL(str, str_len, foo)
{
zval *q = return_value;
q->p = (str);
q->len = (str_len);
return;
}
#define RETURN_EMPTY_STRING()
{
zval *q = return_value;
q->p = emalloc(1);
*(q->p) = '\0';
q->len = 0;
return;
}
#define RETURN_FALSE RETURN_EMPTY_STRING()
#define zend_hash_internal_pointer_reset_ex(foo, pos) *(pos) = 0
#define SUCCESS 1
#define FAILED 0
#define SEPARATE_ZVAL(foo)
#define convert_to_string(foo)
#define zend_hash_move_forward_ex(foo, pos) (*pos)++
#define emalloc(len) malloc(len)
#define efree(p) free(p)
#define TSRMLS_FETCH()
#define TSRMLS_CC
#define php_error_docref(foo, bar, s) printf("Error: [%s]\n", s)
typedef struct {
char p; / pointer to a data /
size_t len; / length of a data */
} zval;
int zend_hash_num_elements(zval *a) {
int i = 0;
while (a[i].p != NULL) i++;
return i;
}
int zend_hash_get_current_data_ex(zval *arr, void **z_ptr, HashPosition
*pos) {
static zval *z, **zp;
zp = &z;
z = arr + *pos;
*z_ptr = zp;
return z->p == NULL
? FAILED : SUCCESS;
}
/*****************************************/
/ test array */
zval arr[] = {
{"foo", 3},
{"bar", 3},
{"baz", 3},
{NULL, 0}
};
/* delimiter /
zval delim = {":", 1};
/*****************************************/
PHPAPI void php_implode(zval *delim, zval *arr, zval *return_value)
{
zval **tmp;
HashPosition pos;
int numelems, i;
size_t result_len, delim_len, tmp_len;
char *result_str, *delim_str, *ptr;
char **str_arr;
size_t *len_arr;
numelems = zend_hash_num_elements(Z_ARRVAL_P(arr));
if (numelems < 1) RETURN_EMPTY_STRING();
str_arr = (char **) emalloc(numelems * sizeof(char *));
len_arr = (size_t *) emalloc(numelems * sizeof(size_t));
if (str_arr == `NULL` || len_arr == NULL) {
TSRMLS_FETCH();
php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory");
RETURN_FALSE;
}
delim_str = Z_STRVAL_P(delim);
delim_len = Z_STRLEN_P(delim);
/* the first pass: calculate size of memory to allocate */
i = 0;
result_len = 0;
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(arr), &pos);
while (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr),
(void **) &tmp,
&pos) == SUCCESS) {
SEPARATE_ZVAL(tmp);
convert_to_string(*tmp);
str_arr[i] = Z_STRVAL_PP(tmp); /* pointer to string */
len_arr[i] = Z_STRLEN_PP(tmp); /* length of the string */
result_len += len_arr[i];
i++;
if (delim_len && i < numelems) result_len += delim_len;
zend_hash_move_forward_ex(Z_ARRVAL_P(arr), &pos);
}
if (result_len == 0) {
efree(len_arr);
efree(str_arr);
RETURN_EMPTY_STRING();
}
/* allocate memory */
ptr = result_str = emalloc(result_len + 1);
if (ptr == NULL) {
efree(len_arr);
efree(str_arr);
TSRMLS_FETCH();
php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory");
RETURN_FALSE;
}
/* the second pass: implode array into allocated memory */
for (i = 0; i < numelems - 1; i++) {
if (tmp_len = len_arr[i]) {
memcpy(ptr, str_arr[i], tmp_len);
ptr += tmp_len;
}
if (delim_len) {
memcpy(ptr, delim_str, delim_len);
ptr += delim_len;
}
}
/* copy the last element of the array */
if (tmp_len = len_arr[i]) {
memcpy(ptr, str_arr[i], tmp_len);
ptr += tmp_len;
}
*ptr = '\0';
efree(len_arr);
efree(str_arr);
RETURN_STRINGL(result_str, result_len, 0);
}
/***********************************************/
int main(int argc,char *argv[])
{
zval result;
php_implode(&delim, arr, &result);
printf("str=[%s], len=%lu", Z_STRVAL_P(&result), Z_STRLEN_P(&result));
efree(result.p);
return 0;
}
=========cut========
unified diff:
=========cut=========
--- string.c Thu May 13 20:44:32 2004
+++ string_implode.c Mon Jun 14 13:28:19 2004
@@ -827,31 +827,75 @@
{
zval **tmp;
HashPosition pos;
- smart_str implstr = {0};
- int numelems, i = 0;
-
int numelems, i;
-
size_t result_len, delim_len, tmp_len;
-
char *result_str, *delim_str, *ptr;
-
char **str_arr;
-
size_t *len_arr;
numelems = zend_hash_num_elements(Z_ARRVAL_P(arr));
- if(numelems == 0) {
-
RETURN_EMPTY_STRING();
-
if (numelems < 1) RETURN_EMPTY_STRING();
-
str_arr = (char **) emalloc(numelems * sizeof(char *));
-
len_arr = (size_t *) emalloc(numelems * sizeof(size_t));
-
if (str_arr ==
NULL
|| len_arr == NULL) { -
TSRMLS_FETCH();
-
php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory");
-
RETURN_FALSE;
}
-
delim_str = Z_STRVAL_P(delim);
-
delim_len = Z_STRLEN_P(delim);
-
/* the first pass: calculate size of memory to allocate */
-
i = 0;
-
result_len = 0;
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(arr), &pos);
while (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr),
(void **) &tmp,
&pos) == SUCCESS) {
SEPARATE_ZVAL(tmp);
convert_to_string(*tmp);
-
smart_str_appendl(&implstr, Z_STRVAL_PP(tmp), Z_STRLEN_PP(tmp));
-
if (++i != numelems) {
-
smart_str_appendl(&implstr, Z_STRVAL_P(delim),
Z_STRLEN_P(delim));
-
}
-
str_arr[i] = Z_STRVAL_PP(tmp); /* pointer to string */
-
len_arr[i] = Z_STRLEN_PP(tmp); /* length of the string */
-
result_len += len_arr[i];
-
i++;
-
}if (delim_len && i < numelems) result_len += delim_len; zend_hash_move_forward_ex(Z_ARRVAL_P(arr), &pos);
- smart_str_0(&implstr);
- RETURN_STRINGL(implstr.c, implstr.len, 0);
- if (result_len == 0) {
-
efree(len_arr);
-
efree(str_arr);
-
RETURN_EMPTY_STRING();
- }
- /* allocate memory */
- ptr = result_str = emalloc(result_len + 1);
- if (ptr == NULL) {
-
efree(len_arr);
-
efree(str_arr);
-
TSRMLS_FETCH();
-
php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory");
-
RETURN_FALSE;
- }
- /* the second pass: implode array into allocated memory */
- for (i = 0; i < numelems - 1; i++) {
-
if (tmp_len = len_arr[i]) {
-
memcpy(ptr, str_arr[i], tmp_len);
-
ptr += tmp_len;
-
}
-
if (delim_len) {
-
memcpy(ptr, delim_str, delim_len);
-
ptr += delim_len;
-
}
- }
- /* copy the last element of the array */
- if (tmp_len = len_arr[i]) {
-
memcpy(ptr, str_arr[i], tmp_len);
-
ptr += tmp_len;
- }
- *ptr = '\0';
- efree(len_arr);
- efree(str_arr);
- RETURN_STRINGL(result_str, result_len, 0);
}
/* }}} */
=========cut=========
--
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/
I've read CODING_STANDARDS (thanks to Christian Schneider) and corrected
my code according to your standards.
Also I've improved speed of algorithm. Now it is not duplicate the
string value of array item, if it is has string type already:
if (Z_TYPE_PP(tmp) != IS_STRING) {
/* create new instance of a var, then convert it to a string */
SEPARATE_ZVAL(tmp);
convert_to_string(*tmp);
}
unified diff:
=========cut===========
--- string.c Thu May 13 20:44:32 2004
+++ string_implode.c Mon Jun 14 14:28:29 2004
@@ -827,31 +827,84 @@
{
zval **tmp;
HashPosition pos;
- smart_str implstr = {0};
- int numelems, i = 0;
-
int numelems, i;
-
size_t result_len, delim_len, tmp_len;
-
char *result_str, *delim_str, *ptr;
-
char **str_arr;
-
size_t *len_arr;
numelems = zend_hash_num_elements(Z_ARRVAL_P(arr));
- if(numelems == 0) {
-
RETURN_EMPTY_STRING();
-
if (numelems < 1) RETURN_EMPTY_STRING();
-
str_arr = (char **) emalloc(numelems * sizeof(char *));
-
len_arr = (size_t *) emalloc(numelems * sizeof(size_t));
-
if (str_arr ==
NULL
|| len_arr == NULL) { -
TSRMLS_FETCH();
-
php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory");
-
RETURN_FALSE;
}
-
delim_str = Z_STRVAL_P(delim);
-
delim_len = Z_STRLEN_P(delim);
-
/* the first pass: calculate size of memory to allocate */
-
i = 0;
-
result_len = 0;
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(arr), &pos);
while (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr),
(void **) &tmp,
&pos) == SUCCESS) { -
if (Z_TYPE_PP(tmp) != IS_STRING) {
-
/* create new instance of a var, then convert it to a string
*/
SEPARATE_ZVAL(tmp);
convert_to_string(*tmp);
-
smart_str_appendl(&implstr, Z_STRVAL_PP(tmp), Z_STRLEN_PP(tmp));
-
if (++i != numelems) {
-
smart_str_appendl(&implstr, Z_STRVAL_P(delim),
Z_STRLEN_P(delim));
}
-
str_arr[i] = Z_STRVAL_PP(tmp); /* save pointer to string for the
second pass (see below) */
-
len_arr[i] = Z_STRLEN_PP(tmp); /* length of the string */
-
result_len += len_arr[i];
-
i++;
-
}if (delim_len && i < numelems) result_len += delim_len; zend_hash_move_forward_ex(Z_ARRVAL_P(arr), &pos);
- smart_str_0(&implstr);
- if (result_len == 0) {
-
/* result_len could be 0 in two cases:
-
1) both delimiter & array values have nul length
-
2) array contains one element with nul length
-
*/
-
efree(len_arr);
-
efree(str_arr);
-
RETURN_EMPTY_STRING();
- }
- /* allocate memory */
- ptr = result_str = emalloc(result_len + 1);
- if (ptr == NULL) {
-
efree(len_arr);
-
efree(str_arr);
-
TSRMLS_FETCH();
-
php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory");
-
RETURN_FALSE;
- }
- RETURN_STRINGL(implstr.c, implstr.len, 0);
- /* the second pass: implode array into allocated memory */
- for (i = 0; i < numelems - 1; i++) {
-
if (tmp_len = len_arr[i]) {
-
memcpy(ptr, str_arr[i], tmp_len);
-
ptr += tmp_len;
-
}
-
if (delim_len) {
-
memcpy(ptr, delim_str, delim_len);
-
ptr += delim_len;
-
}
- }
- /* copy the last element of the array */
- if (tmp_len = len_arr[i]) {
-
memcpy(ptr, str_arr[i], tmp_len);
-
ptr += tmp_len;
- }
- *ptr = '\0';
- efree(len_arr);
- efree(str_arr);
- RETURN_STRINGL(result_str, result_len, 0);
}
/* }}} */
=========cut===========
--
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/