Newsgroups: php.internals Path: news.php.net Xref: news.php.net php.internals:3827 Return-Path: Mailing-List: contact internals-help@lists.php.net; run by ezmlm Delivered-To: mailing list internals@lists.php.net Received: (qmail 98460 invoked from network); 8 Aug 2003 12:35:16 -0000 Received: from unknown (HELO luzifer.megami.de) (80.132.89.41) by pb1.pair.com with SMTP; 8 Aug 2003 12:35:16 -0000 Received: (qmail 28200 invoked by uid 1001); 8 Aug 2003 12:35:14 -0000 Date: Fri, 8 Aug 2003 14:35:14 +0200 To: andrei@php.net Cc: internals@lists.php.net Message-ID: <20030808123514.GA27149@luzifer.megami.de> Reply-To: sica-php@baldur.org Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="r5Pyd7+fXNt84Ff3" Content-Disposition: inline User-Agent: Mutt/1.5.4i Subject: weird behaviour with ext/tokenizer and heredoc From: sica-php@baldur.org (Patrick Preuster) --r5Pyd7+fXNt84Ff3 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Hello, when playing around with ext/tokenizer, I found a weird behaviour with the T_END_HEREDOC token. I got "FOO;;" instead of the expected "FOO" as token. As a consequence I wrote a patch for the Zend Engine that should fix ext/tokenizer and makes the special stuff for T_END_HEREDOC in zend_highlight.c unnecessary. I tested my changes all over but as I'm not really sure if I didn't break something, I did a second patch for ext/tokenizer to fix T_END_HEREDOC and leave the Zend Engine untouched. -- Patrick Preuster sica-php@baldur.org, sica@FreeNode "Heaven doesn't want us and Hell is afraid we'll take over!" --r5Pyd7+fXNt84Ff3 Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="ze.diff" --- zend_language_scanner.bak 2003-08-08 12:05:21.000000000 +0200 +++ zend_language_scanner.c 2003-08-08 12:05:56.000000000 +0200 @@ -4811,7 +4811,7 @@ zendlval->value.str.val = estrndup(yytext, yyleng); /* unput destroys yytext */ zendlval->value.str.len = yyleng; if (unput_semicolon) { - unput(';'); + yyless(label_len); } efree(CG(heredoc)); CG(heredoc)=NULL; --- zend_highlight.bak 2003-08-08 12:06:14.000000000 +0200 +++ zend_highlight.c 2003-08-08 12:07:06.000000000 +0200 @@ -155,14 +155,7 @@ zend_printf("", last_color); } } - switch (token_type) { - case T_END_HEREDOC: - zend_html_puts(token.value.str.val, token.value.str.len TSRMLS_CC); - break; - default: - zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC); - break; - } + zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC); if (token.type == IS_STRING) { switch (token_type) { @@ -176,16 +169,8 @@ efree(token.value.str.val); break; } - } else if (token_type == T_END_HEREDOC) { - zend_bool has_semicolon=(strchr(token.value.str.val, ';')?1:0); - - efree(token.value.str.val); - if (has_semicolon) { - /* the following semicolon was unput(), ignore it */ - lex_scan(&token TSRMLS_CC); - } - } - token.type = 0; + } + token.type = 0; } if (last_color != syntax_highlighter_ini->highlight_html) { zend_printf("\n"); --r5Pyd7+fXNt84Ff3 Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="tokenizer.diff" --- tokenizer.bak 2003-08-08 12:36:55.000000000 +0200 +++ tokenizer.c 2003-08-08 12:45:28.000000000 +0200 @@ -325,7 +325,7 @@ zval token; zval *keyword; int token_type; - zend_bool destroy; + zend_bool destroy, has_semicolon; array_init(return_value); @@ -342,7 +342,29 @@ break; } - if (token_type >= 256) { + if (token_type == T_END_HEREDOC) { + has_semicolon = (strchr(token.value.str.val, ';') ? 1 : 0); + efree(token.value.str.val); + + if (has_semicolon) { + MAKE_STD_ZVAL(keyword); + array_init(keyword); + add_next_index_long(keyword, token_type); + add_next_index_stringl(keyword, zendtext, token.value.str.len-2, 1); + add_next_index_zval(return_value, keyword); + + lex_scan(&token TSRMLS_CC); + add_next_index_stringl(return_value, zendtext, zendleng, 1); + } + else { + MAKE_STD_ZVAL(keyword); + array_init(keyword); + add_next_index_long(keyword, token_type); + add_next_index_stringl(keyword, zendtext, zendleng, 1); + add_next_index_zval(return_value, keyword); + } + } + else if (token_type >= 256) { MAKE_STD_ZVAL(keyword); array_init(keyword); add_next_index_long(keyword, token_type); --r5Pyd7+fXNt84Ff3--