Newsgroups: php.internals Path: news.php.net Xref: news.php.net php.internals:43700 Return-Path: Mailing-List: contact internals-help@lists.php.net; run by ezmlm Delivered-To: mailing list internals@lists.php.net Received: (qmail 47728 invoked from network); 14 Apr 2009 14:59:35 -0000 Received: from unknown (HELO lists.php.net) (127.0.0.1) by localhost with SMTP; 14 Apr 2009 14:59:35 -0000 Authentication-Results: pb1.pair.com header.from=dmitry@zend.com; sender-id=pass Authentication-Results: pb1.pair.com smtp.mail=dmitry@zend.com; spf=pass; sender-id=pass Received-SPF: pass (pb1.pair.com: domain zend.com designates 212.25.124.185 as permitted sender) X-PHP-List-Original-Sender: dmitry@zend.com X-Host-Fingerprint: 212.25.124.185 il-mr1.zend.com Received: from [212.25.124.185] ([212.25.124.185:47338] helo=il-mr1.zend.com) by pb1.pair.com (ecelerity 2.1.1.9-wez r(12769M)) with ESMTP id 96/00-47392-4D4A4E94 for ; Tue, 14 Apr 2009 10:59:35 -0400 Received: from il-gw1.zend.com (unknown [10.1.1.21]) by il-mr1.zend.com (Postfix) with ESMTP id 4758B50361; Tue, 14 Apr 2009 18:22:57 +0300 (IDT) Received: from ws.home ([10.1.10.30]) by il-gw1.zend.com with Microsoft SMTPSVC(6.0.3790.3959); Tue, 14 Apr 2009 17:59:13 +0300 Message-ID: <49E4A4CD.5010809@zend.com> Date: Tue, 14 Apr 2009 18:59:25 +0400 User-Agent: Thunderbird 2.0.0.21 (X11/20090320) MIME-Version: 1.0 To: PHP Internals , Lukas Kahwe Smith , =?ISO-8859-1?Q?Johannes_Schl=FCter?= , Stas Malyshev , Andi Gutmans , Sara Golemon Content-Type: multipart/mixed; boundary="------------080209010600050304080503" X-OriginalArrivalTime: 14 Apr 2009 14:59:14.0010 (UTC) FILETIME=[934C13A0:01C9BD11] Subject: Support for "Transfer-Encoding: chunked" in http stream wrapper From: dmitry@zend.com (Dmitry Stogov) --------------080209010600050304080503 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Hi, The attached patch implements automatic decoding of chunked transfer-encoding. It fixes http://bugs.php.net/bug.php?id=47021 but also affects all php stream functions (e.g. file_get_contents("http://...");) Some PHP applications which check for Transfer-Encoding HTTP header and perform manual decoding might be broken. Any objections against committing the patch into PHP_5_3? My be someone has ideas about patch improvements? Thanks. Dmitry. --------------080209010600050304080503 Content-Type: text/plain; name="chunked.diff.txt" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="chunked.diff.txt" Index: ext/standard/http_fopen_wrapper.c =================================================================== RCS file: /repository/php-src/ext/standard/http_fopen_wrapper.c,v retrieving revision 1.99.2.12.2.9.2.12 diff -u -p -d -r1.99.2.12.2.9.2.12 http_fopen_wrapper.c --- ext/standard/http_fopen_wrapper.c 31 Dec 2008 11:15:45 -0000 1.99.2.12.2.9.2.12 +++ ext/standard/http_fopen_wrapper.c 14 Apr 2009 14:40:12 -0000 @@ -84,6 +84,8 @@ #define HTTP_WRAPPER_HEADER_INIT 1 #define HTTP_WRAPPER_REDIRECTED 2 +static void php_add_chunked_filter(php_stream *stream TSRMLS_DC); + php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char *path, char *mode, int options, char **opened_path, php_stream_context *context, int redirect_max, int flags STREAMS_DC TSRMLS_DC) { php_stream *stream = NULL; @@ -111,6 +113,7 @@ php_stream *php_stream_url_wrap_http_ex( char *user_headers = NULL; int header_init = ((flags & HTTP_WRAPPER_HEADER_INIT) != 0); int redirected = ((flags & HTTP_WRAPPER_REDIRECTED) != 0); + int chunked = 0; tmp_line[0] = '\0'; @@ -597,6 +600,8 @@ php_stream *php_stream_url_wrap_http_ex( } else if (!strncasecmp(http_header_line, "Content-Length: ", 16)) { file_size = atoi(http_header_line + 16); php_stream_notify_file_size(context, file_size, http_header_line, 0); + } else if (!strncasecmp(http_header_line, "Transfer-Encoding: chunked", sizeof("Transfer-Encoding: chunked"))) { + chunked = 1; } if (http_header_line[0] == '\0') { @@ -740,6 +745,9 @@ out: * the stream */ stream->position = 0; + if (chunked) { + php_add_chunked_filter(stream TSRMLS_CC); + } } return stream; @@ -780,6 +788,193 @@ PHPAPI php_stream_wrapper php_stream_htt 1 /* is_url */ }; +typedef enum _php_chunked_filter_state { + CHUNKED_HEADER, + CHUNKED_HEADER_1, + CHUNKED_HEADER_2, + CHUNKED_HEADER_3, + CHUNKED_HEADER_R, + CHUNKED_HEADER_N, + CHUNKED_BODY, + CHUNKED_BODY_R, + CHUNKED_BODY_N, + CHUNKED_ERROR +} php_chunked_filter_state; + +typedef struct _php_chunked_filter_data { + php_chunked_filter_state state; + int chunk_size; + int persistent; +} php_chunked_filter_data; + +static int php_dechunk(char *buf, int len, php_chunked_filter_data *data) +{ + char *p = buf; + char *end = p + len; + char *out = buf; + char *out_len = 0; + + while (p < end) { + switch (data->state) { + case CHUNKED_HEADER: + data->chunk_size = 0; + case CHUNKED_HEADER_1: + case CHUNKED_HEADER_2: + case CHUNKED_HEADER_3: + while (p < end && data->state < CHUNKED_HEADER_R) { + if (*p >= '0' && *p <= '9') { + data->chunk_size = (data->chunk_size * 16) + (*p - '0'); + } else if (*p >= 'A' && *p <= 'F') { + data->chunk_size = (data->chunk_size * 16) + (*p - 'A' + 10); + } else if (*p >= 'a' && *p <= 'f') { + data->chunk_size = (data->chunk_size * 16) + (*p - 'a' + 10); + } else { + break; + } + p++; + data->state++; + } + if (data->state == CHUNKED_HEADER) { + /* not a hex number */ + data->state = CHUNKED_ERROR; + continue; + } else if (p == end) { + data->state = CHUNKED_HEADER_R; + return out_len; + } + case CHUNKED_HEADER_R: + if (*p == '\r') { + p++; + if (p == end) { + data->state = CHUNKED_HEADER_N; + return out_len; + } + } + case CHUNKED_HEADER_N: + if (*p == '\n') { + p++; + if (data->chunk_size == 0) { + /* EOF */ + data->state = CHUNKED_ERROR; + continue; + } else if (p == end) { + data->state = CHUNKED_BODY; + return out_len; + } + } else { + data->state = CHUNKED_ERROR; + continue; + } + case CHUNKED_BODY: + if (end - p >= data->chunk_size) { + if (p != out) { + memmove(out, p, data->chunk_size); + } + out += data->chunk_size; + out_len += data->chunk_size; + p += data->chunk_size; + if (p == end) { + data->state = CHUNKED_BODY_R; + return out_len; + } + } else { + if (p != out) { + memmove(out, p, end - p); + } + out_len += end - p; + return out_len; + } + case CHUNKED_BODY_R: + if (*p == '\r') { + p++; + if (p == end) { + data->state = CHUNKED_BODY_N; + return out_len; + } + } + case CHUNKED_BODY_N: + if (*p == '\n') { + p++; + data->state = CHUNKED_HEADER; + continue; + } else { + data->state = CHUNKED_ERROR; + continue; + } + case CHUNKED_ERROR: + if (p != out) { + memmove(out, p, end - p); + } + out_len += end - p; + return out_len; + } + } + return out_len; +} + +static php_stream_filter_status_t php_chunked_filter( + php_stream *stream, + php_stream_filter *thisfilter, + php_stream_bucket_brigade *buckets_in, + php_stream_bucket_brigade *buckets_out, + size_t *bytes_consumed, + int flags + TSRMLS_DC) +{ + php_stream_bucket *bucket; + size_t consumed = 0; + php_chunked_filter_data *data = (php_chunked_filter_data *) thisfilter->abstract; + char *buf; + int len; + + while (buckets_in->head) { + bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC); + consumed += bucket->buflen; + + bucket->buflen = php_dechunk(bucket->buf, bucket->buflen, data); + + php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); + } + + if (bytes_consumed) { + *bytes_consumed = consumed; + } + + return PSFS_PASS_ON; +} + +static void php_chunked_dtor(php_stream_filter *thisfilter TSRMLS_DC) +{ + if (thisfilter && thisfilter->abstract) { + php_chunked_filter_data *data = (php_chunked_filter_data *) thisfilter->abstract; + pefree(data, data->persistent); + } +} + +static php_stream_filter_ops php_chunked_ops = { + php_chunked_filter, + php_chunked_dtor, + "chunked" +}; + +static void php_add_chunked_filter(php_stream *stream TSRMLS_DC) +{ + int persistent = php_stream_is_persistent(stream); + php_chunked_filter_data *data = pemalloc(sizeof(php_chunked_filter_data), persistent); + php_stream_filter *temp_filter = php_stream_filter_alloc(&php_chunked_ops, data, persistent); + + if (temp_filter) { + data->state = CHUNKED_HEADER; + data->chunk_size = 0; + data->persistent = persistent; + php_stream_filter_append(&stream->readfilters, temp_filter); + } else { + pefree(data, persistent); + } +} + + + /* * Local variables: * tab-width: 4 --------------080209010600050304080503--