Newsgroups: php.internals Path: news.php.net Xref: news.php.net php.internals:50346 Return-Path: Mailing-List: contact internals-help@lists.php.net; run by ezmlm Delivered-To: mailing list internals@lists.php.net Received: (qmail 76329 invoked from network); 18 Nov 2010 14:27:05 -0000 Received: from unknown (HELO lists.php.net) (127.0.0.1) by localhost with SMTP; 18 Nov 2010 14:27:05 -0000 Authentication-Results: pb1.pair.com header.from=dmitry@zend.com; sender-id=pass Authentication-Results: pb1.pair.com smtp.mail=dmitry@zend.com; spf=pass; sender-id=pass Received-SPF: pass (pb1.pair.com: domain zend.com designates 212.25.124.185 as permitted sender) X-PHP-List-Original-Sender: dmitry@zend.com X-Host-Fingerprint: 212.25.124.185 il-mr1.zend.com Received: from [212.25.124.185] ([212.25.124.185:42079] helo=il-mr1.zend.com) by pb1.pair.com (ecelerity 2.1.1.9-wez r(12769M)) with ESMTP id 37/51-01108-5B735EC4 for ; Thu, 18 Nov 2010 09:27:04 -0500 Received: from il-gw1.zend.com (unknown [10.1.1.22]) by il-mr1.zend.com (Postfix) with ESMTP id 2A01B50518; Thu, 18 Nov 2010 16:21:24 +0200 (IST) Received: from ws.home (10.1.10.8) by il-ex2.zend.net (10.1.1.22) with Microsoft SMTP Server id 14.0.689.0; Thu, 18 Nov 2010 16:26:53 +0200 Message-ID: <4CE537B0.1030607@zend.com> Date: Thu, 18 Nov 2010 17:26:56 +0300 User-Agent: Thunderbird 2.0.0.23 (X11/20090825) MIME-Version: 1.0 To: PHP Internals , Moriyoshi Koizumi CC: Andi Gutmans , Zeev Suraski Content-Type: multipart/mixed; boundary="------------030504000004030200030404" Subject: --enable-zend-multibyte From: dmitry@zend.com (Dmitry Stogov) --------------030504000004030200030404 Content-Type: text/plain; charset="ISO-8859-1"; format=flowed Content-Transfer-Encoding: 7bit Hi, The proposed patch allows compiling PHP with --enable-zend-multibyte and then enable or disable multibyte support at run-time using zend.multibyte=0/1 in php.ini. As result the single binary will be able to support multibyte encodings and run without zend-multibyte overhead dependent on configuration. The patch doesn't affect PHP compiled without --enable-zend-multibyte. I'm going to commit it into trunk before alpha. Any objections? Thanks. Dmitry. --------------030504000004030200030404 Content-Type: text/plain; name="mb.diff.txt" Content-Disposition: inline; filename="mb.diff.txt" Content-Transfer-Encoding: quoted-printable Index: ext/standard/info.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- ext/standard/info.c (revision 305494) +++ ext/standard/info.c (working copy) @@ -760,7 +760,7 @@ php_info_print_table_row(2, "Zend Memory Manager", is_zend_mm(TSRMLS_C= ) ? "enabled" : "disabled" ); =20 #ifdef ZEND_MULTIBYTE - php_info_print_table_row(2, "Zend Multibyte Support", "enabled"); + php_info_print_table_row(2, "Zend Multibyte Support", CG(multibyte) ? = "enabled" : "disabled"); #else php_info_print_table_row(2, "Zend Multibyte Support", "disabled"); #endif Index: ext/mbstring/mbstring.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- ext/mbstring/mbstring.c (revision 305494) +++ ext/mbstring/mbstring.c (working copy) @@ -1132,6 +1132,9 @@ { int *list, size; =20 + if (!CG(multibyte)) { + return FAILURE; + } if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &siz= e, 1 TSRMLS_CC)) { if (MBSTRG(script_encoding_list) !=3D NULL) { free(MBSTRG(script_encoding_list)); @@ -1442,8 +1445,10 @@ PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif #ifdef ZEND_MULTIBYTE - zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(inter= nal_encoding)) TSRMLS_CC); - php_mb_set_zend_encoding(TSRMLS_C); + if (CG(multibyte)) { + zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(inte= rnal_encoding)) TSRMLS_CC); + php_mb_set_zend_encoding(TSRMLS_C); + } #endif /* ZEND_MULTIBYTE */ =20 return SUCCESS; @@ -1570,7 +1575,7 @@ MBSTRG(current_internal_encoding) =3D no_encoding; #ifdef ZEND_MULTIBYTE /* TODO: make independent from mbstring.encoding_translation? */ - if (MBSTRG(encoding_translation)) { + if (CG(multibyte) && MBSTRG(encoding_translation)) { zend_multibyte_set_internal_encoding(name TSRMLS_CC); } #endif /* ZEND_MULTIBYTE */ Index: Zend/zend.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- Zend/zend.c (revision 305494) +++ Zend/zend.c (working copy) @@ -93,6 +93,7 @@ ZEND_INI_ENTRY("error_reporting", NULL, ZEND_INI_ALL, OnUpdateErro= rReporting) STD_ZEND_INI_BOOLEAN("zend.enable_gc", "1", ZEND_INI_ALL, OnUpdateG= CEnabled, gc_enabled, zend_gc_globals, gc_globals) #ifdef ZEND_MULTIBYTE + STD_ZEND_INI_BOOLEAN("zend.multibyte", "0", ZEND_INI_PERDIR, OnUpdateB= ool, multibyte, zend_compiler_globals, compiler_globals) STD_ZEND_INI_BOOLEAN("detect_unicode", "1", ZEND_INI_ALL, OnUpdateBool= , detect_unicode, zend_compiler_globals, compiler_globals) #endif ZEND_INI_END() Index: Zend/zend_language_scanner.l =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- Zend/zend_language_scanner.l (revision 305494) +++ Zend/zend_language_scanner.l (working copy) @@ -181,7 +181,7 @@ lex_state->filename =3D zend_get_compiled_filename(TSRMLS_C); lex_state->lineno =3D CG(zend_lineno); =20 -#ifdef ZEND_MULTIBYTE +#ifdef ZEND_MULTIBYTE=09 lex_state->script_org =3D SCNG(script_org); lex_state->script_org_size =3D SCNG(script_org_size); lex_state->script_filtered =3D SCNG(script_filtered); @@ -270,27 +270,32 @@ =20 if (size !=3D -1) { #ifdef ZEND_MULTIBYTE - if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != =3D 0) { - return FAILURE; - } + if (CG(multibyte)) { + if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) = !=3D 0) { + return FAILURE; + } =20 - SCNG(yy_in) =3D NULL; + SCNG(yy_in) =3D NULL; =20 - zend_multibyte_set_filter(NULL TSRMLS_CC); + zend_multibyte_set_filter(NULL TSRMLS_CC); =20 - if (!SCNG(input_filter)) { - SCNG(script_filtered) =3D (unsigned char*)emalloc(SCNG(script_org_siz= e)+1); - memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)= +1); - SCNG(script_filtered_size) =3D SCNG(script_org_size); + if (!SCNG(input_filter)) { + SCNG(script_filtered) =3D (unsigned char*)emalloc(SCNG(script_org_si= ze)+1); + memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size= )+1); + SCNG(script_filtered_size) =3D SCNG(script_org_size); + } else { + SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_siz= e), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); + if (SCNG(script_filtered) =3D=3D NULL) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script = from the detected " + "encoding \"%s\" to a compatible encoding", LANG_SCNG(script_enco= ding)->name); + } + } + SCNG(yy_start) =3D SCNG(script_filtered) - offset; + yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_si= ze) TSRMLS_CC); } else { - SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size= ), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); - if (SCNG(script_filtered) =3D=3D NULL) { - zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script f= rom the detected " - "encoding \"%s\" to a compatible encoding", LANG_SCNG(script_encod= ing)->name); - } + SCNG(yy_start) =3D buf - offset; + yy_scan_buffer(buf, size TSRMLS_CC); } - SCNG(yy_start) =3D SCNG(script_filtered) - offset; - yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_siz= e) TSRMLS_CC); #else /* !ZEND_MULTIBYTE */ SCNG(yy_start) =3D buf - offset; yy_scan_buffer(buf, size TSRMLS_CC); @@ -438,20 +443,24 @@ SCNG(yy_start) =3D NULL; =20 #ifdef ZEND_MULTIBYTE - SCNG(script_org) =3D (unsigned char *)estrdup(str->value.str.val); - SCNG(script_org_size) =3D str->value.str.len; + if (CG(multibyte)) { + SCNG(script_org) =3D (unsigned char *)estrdup(str->value.str.val); + SCNG(script_org_size) =3D str->value.str.len; =20 - zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC); + zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC); =20 - if (!SCNG(input_filter)) { - SCNG(script_filtered) =3D (unsigned char*)emalloc(SCNG(script_org_size= )+1); - memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+= 1); - SCNG(script_filtered_size) =3D SCNG(script_org_size); + if (!SCNG(input_filter)) { + SCNG(script_filtered) =3D (unsigned char*)emalloc(SCNG(script_org_siz= e)+1); + memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)= +1); + SCNG(script_filtered_size) =3D SCNG(script_org_size); + } else { + SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size= ), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); + } + + yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_siz= e) TSRMLS_CC);=09 } else { - SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size)= , SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); + yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC); } - - yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size= ) TSRMLS_CC);=09 #else /* !ZEND_MULTIBYTE */ yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC); #endif /* ZEND_MULTIBYTE */ Index: Zend/zend_compile.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- Zend/zend_compile.c (revision 305494) +++ Zend/zend_compile.c (working copy) @@ -149,14 +149,12 @@ =20 /* NULL, name length, filename length, last accepting char position len= gth */ result->value.str.len =3D 1+name_length+strlen(filename)+char_pos_len; -#ifdef ZEND_MULTIBYTE + /* must be binary safe */ result->value.str.val =3D (char *) safe_emalloc(result->value.str.len,= 1, 1); result->value.str.val[0] =3D '\0'; sprintf(result->value.str.val+1, "%s%s%s", name, filename, char_pos_bu= f); -#else - zend_spprintf(&result->value.str.val, 0, "%c%s%s%s", '\0', name, filena= me, char_pos_buf); -#endif /* ZEND_MULTIBYTE */ + result->type =3D IS_STRING; Z_SET_REFCOUNT_P(result, 1); } @@ -5861,51 +5859,53 @@ CG(declarables).ticks =3D val->u.constant; #ifdef ZEND_MULTIBYTE } else if (!zend_binary_strcasecmp(var->u.constant.value.str.val, var->= u.constant.value.str.len, "encoding", sizeof("encoding")-1)) { - zend_encoding *new_encoding, *old_encoding; - zend_encoding_filter old_input_filter; + if (CG(multibyte)) { + zend_encoding *new_encoding, *old_encoding; + zend_encoding_filter old_input_filter; =20 - if ((Z_TYPE(val->u.constant) & IS_CONSTANT_TYPE_MASK) =3D=3D IS_CONSTA= NT) { - zend_error(E_COMPILE_ERROR, "Cannot use constants as encoding"); - } - - /* - * Check that the pragma comes before any opcodes. If the compilation - * got as far as this, the previous portion of the script must have be= en - * parseable according to the .ini script_encoding setting. We still - * want to tell them to put declare() at the top. - */ - { - int num =3D CG(active_op_array)->last; - /* ignore ZEND_EXT_STMT and ZEND_TICKS */ - while (num > 0 && - (CG(active_op_array)->opcodes[num-1].opcode =3D=3D ZEND_EXT_ST= MT || - CG(active_op_array)->opcodes[num-1].opcode =3D=3D ZEND_TICKS)= ) { - --num; + if ((Z_TYPE(val->u.constant) & IS_CONSTANT_TYPE_MASK) =3D=3D IS_CONST= ANT) { + zend_error(E_COMPILE_ERROR, "Cannot use constants as encoding"); } =20 - if (num > 0) { - zend_error(E_COMPILE_ERROR, "Encoding declaration pragma must be the= very first statement in the script"); + /* + * Check that the pragma comes before any opcodes. If the compilation + * got as far as this, the previous portion of the script must have b= een + * parseable according to the .ini script_encoding setting. We still + * want to tell them to put declare() at the top. + */ + { + int num =3D CG(active_op_array)->last; + /* ignore ZEND_EXT_STMT and ZEND_TICKS */ + while (num > 0 && + (CG(active_op_array)->opcodes[num-1].opcode =3D=3D ZEND_EXT_S= TMT || + CG(active_op_array)->opcodes[num-1].opcode =3D=3D ZEND_TICKS= )) { + --num; + } + + if (num > 0) { + zend_error(E_COMPILE_ERROR, "Encoding declaration pragma must be th= e very first statement in the script"); + } } - } - CG(encoding_declared) =3D 1; + CG(encoding_declared) =3D 1; =20 - convert_to_string(&val->u.constant); - new_encoding =3D zend_multibyte_fetch_encoding(val->u.constant.value.s= tr.val); - if (!new_encoding) { - zend_error(E_COMPILE_WARNING, "Unsupported encoding [%s]", val->u.con= stant.value.str.val); - } else { - old_input_filter =3D LANG_SCNG(input_filter); - old_encoding =3D LANG_SCNG(script_encoding); - zend_multibyte_set_filter(new_encoding TSRMLS_CC); + convert_to_string(&val->u.constant); + new_encoding =3D zend_multibyte_fetch_encoding(val->u.constant.value.= str.val); + if (!new_encoding) { + zend_error(E_COMPILE_WARNING, "Unsupported encoding [%s]", val->u.co= nstant.value.str.val); + } else { + old_input_filter =3D LANG_SCNG(input_filter); + old_encoding =3D LANG_SCNG(script_encoding); + zend_multibyte_set_filter(new_encoding TSRMLS_CC); =20 - /* need to re-scan if input filter changed */ - if (old_input_filter !=3D LANG_SCNG(input_filter) || - ((old_input_filter =3D=3D zend_multibyte_script_encoding_filter) && - (new_encoding !=3D old_encoding))) { - zend_multibyte_yyinput_again(old_input_filter, old_encoding TSRMLS_C= C); + /* need to re-scan if input filter changed */ + if (old_input_filter !=3D LANG_SCNG(input_filter) || + ((old_input_filter =3D=3D zend_multibyte_script_encoding_filter) && + (new_encoding !=3D old_encoding))) { + zend_multibyte_yyinput_again(old_input_filter, old_encoding TSRMLS_= CC); + } } } - efree(val->u.constant.value.str.val); + zval_dtor(&val->u.constant); #else /* !ZEND_MULTIBYTE */ } else if (!zend_binary_strcasecmp(var->u.constant.value.str.val, var->= u.constant.value.str.len, "encoding", sizeof("encoding")-1)) { /* Do not generate any kind of warning for encoding declares */ Index: Zend/tests/multibyte/multibyte_encoding_002.phpt =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- Zend/tests/multibyte/multibyte_encoding_002.phpt (revision 305496) +++ Zend/tests/multibyte/multibyte_encoding_002.phpt (working copy) @@ -10,6 +10,7 @@ } ?> --INI-- +zend.multibyte=3D1 mbstring.internal_encoding=3Diso-8859-1 --FILE-- =EF>=BF --INI-- +zend.multibyte=3D1 mbstring.internal_encoding=3Diso-8859-1 --FILE-- =FF=FE< Index: Zend/tests/multibyte/multibyte_encoding_004.phpt =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- Zend/tests/multibyte/multibyte_encoding_004.phpt (revision 305496) +++ Zend/tests/multibyte/multibyte_encoding_004.phpt (working copy) @@ -10,6 +10,7 @@ } ?> --INI-- +zend.multibyte=3D1 mbstring.script_encoding=3DShift_JIS mbstring.internal_encoding=3DShift_JIS --FILE-- Index: Zend/tests/multibyte/multibyte_encoding_005.phpt =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- Zend/tests/multibyte/multibyte_encoding_005.phpt (revision 305496) +++ Zend/tests/multibyte/multibyte_encoding_005.phpt (working copy) @@ -10,6 +10,7 @@ } ?> --INI-- +zend.multibyte=3D1 mbstring.encoding_translation =3D On mbstring.script_encoding=3DShift_JIS mbstring.internal_encoding=3DUTF-8 Index: Zend/tests/multibyte/multibyte_encoding_001.phpt =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- Zend/tests/multibyte/multibyte_encoding_001.phpt (revision 305496) +++ Zend/tests/multibyte/multibyte_encoding_001.phpt (working copy) @@ -10,6 +10,7 @@ } ?> --INI-- +zend.multibyte=3D1 mbstring.internal_encoding=3DSJIS --FILE--