When I try to print huge files (greate than 500Mb) using readfile()
function, my computer crashes.
Why? I found in the PHP 4.3.7 sources the file /main/strems.c and function
_php_stream_passthru() in it. The readfile()
uses this function to print
content of the file.
Below you can see source of the function with my comments:
PHPAPI size_t _php_stream_passthru(php_stream * stream STREAMS_DC
TSRMLS_DC)
{
size_t bcount = 0;
int ready = 0;
char buf[8192];
#ifdef HAVE_MMAP
int fd;
#endif
#ifdef HAVE_MMAP
if (!php_stream_is(stream, PHP_STREAM_IS_SOCKET)
&& stream->filterhead == NULL
&& php_stream_tell(stream) == 0
&& SUCCESS == php_stream_cast(stream, PHP_STREAM_AS_FD,
(void*)&fd, 0))
{
struct stat sbuf;
off_t off; /* !!! offset == 0 (see above condition
[php_stream_tell(stream) == 0] ) */
void *p;
size_t len;
fstat(fd, &sbuf); /* !!! there is no error check after this line */
if (sbuf.st_size > sizeof(buf)) {
off = php_stream_tell(stream); /* !!! offset == 0 (see above)
/
len = sbuf.st_size - off;
/ suppose len > 1 Gb, machine has 128Mb RAM and 128Mb swap.
What happens after the next line? /
p = mmap(0, len, PROT_READ, MAP_SHARED, fd, off); / !!! why
MAP_SHARED, not MAP_PRIVATE ?
First parameter of the mmap is (void *) type,
not (int) */
if (p != (void ) MAP_FAILED) {
BG(mmap_file) = p; / !!! what sense of this and next
string? Thread safety?
I don't understand how it works here /
BG(mmap_len) = len;
PHPWRITE(p, len);
BG(mmap_file) = NULL; / !!! thread safety? ok. why there
is not BG(mmap_len) = 0 on the next line ? /
munmap(p, len); / !!! missing error check after munmap */
bcount += len;
ready = 1;
}
}
}
#endif
if(!ready) {
int b;
while ((b = php_stream_read(stream, buf, sizeof(buf))) > 0) {
PHPWRITE(buf, b);
bcount += b;
}
}
return bcount;
}
And here you can see my version of the function:
===================================================================
PHPAPI size_t _php_stream_passthru(php_stream * stream STREAMS_DC
TSRMLS_DC)
{
size_t bcount = 0; /* counter of printed out bytes */
int is_mapped = 0;
char buf[8192];
size_t buf_len = sizeof(buf);
#ifdef HAVE_MMAP
int fd;
if (!php_stream_is(stream, PHP_STREAM_IS_SOCKET)
&& stream->filterhead == `NULL`
&& php_stream_tell(stream) == 0
&& SUCCESS == php_stream_cast(stream, PHP_STREAM_AS_FD,
(void*)&fd, 0))
{
is_mapped = 1;
buf_len = 1024 * 1024; /* default length of the mapped memory */
struct stat sbuf;
void p; / pinter to the mapped part of file /
size_t len;
/ get the length of local file connected to descriptor fd /
fstat(fd, &sbuf);
if (errno) {
/ cannot get length of file /
php_error_docref(NULL TSRMLS_CC, E_ERROR, "cannot get length
of the file");
return bcount;
}
len = (size_t) sbuf.st_size;
/ print to the output buffer file contents /
while (bcount < len) {
if (len - bcount < buf_len) buf_len = len - bcount;
p = mmap(NULL, buf_len, PROT_READ, MAP_PRIVATE, fd, (off_t)
bcount); / try to map part of the file to memory */
if (p == (void ) MAP_FAILED) {
/ error when mapping part of the file to memory /
php_error_docref(NULL TSRMLS_CC, E_ERROR, "mmap error:
cannot map part of the file to memory");
break;
}
PHPWRITE(p, buf_len);
munmap(p, buf_len); / try to unmap allocated memory /
if (errno) {
/ error when unmapping memory /
php_error_docref(NULL TSRMLS_CC, E_ERROR, "mmap error:
cannot unmap allocated memory");
break;
}
bcount += buf_len;
}
}
#endif
if (!is_mapped) {
/ print to the output buffer stream contents */
while ((buf_len = php_stream_read(stream, buf, sizeof(buf))) > 0) {
PHPWRITE(buf, buf_len);
bcount += buf_len;
}
}
return bcount;
}
--
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/
Can you send a unified diff?
At 02:08 PM 6/7/2004 +0300, Alexander Valyalkin wrote:
When I try to print huge files (greate than 500Mb) using
readfile()
function, my computer crashes.
Why? I found in the PHP 4.3.7 sources the file /main/strems.c and function
_php_stream_passthru() in it. Thereadfile()
uses this function to print
content of the file.
Below you can see source of the function with my comments:PHPAPI size_t _php_stream_passthru(php_stream * stream STREAMS_DC
TSRMLS_DC)
{
size_t bcount = 0;
int ready = 0;
char buf[8192];
#ifdef HAVE_MMAP
int fd;
#endif#ifdef HAVE_MMAP
if (!php_stream_is(stream, PHP_STREAM_IS_SOCKET)
&& stream->filterhead ==NULL
&& php_stream_tell(stream) == 0
&& SUCCESS == php_stream_cast(stream, PHP_STREAM_AS_FD,
(void*)&fd, 0))
{
struct stat sbuf;
off_t off; /* !!! offset == 0 (see above condition
[php_stream_tell(stream) == 0] ) */
void *p;
size_t len;fstat(fd, &sbuf); /* !!! there is no error check after this line */ if (sbuf.st_size > sizeof(buf)) { off = php_stream_tell(stream); /* !!! offset == 0 (see above)
/
len = sbuf.st_size - off;
/ suppose len > 1 Gb, machine has 128Mb RAM and 128Mb swap.
What happens after the next line? /
p = mmap(0, len, PROT_READ, MAP_SHARED, fd, off); / !!! why
MAP_SHARED, not MAP_PRIVATE ?
First parameter of the mmap is (void *) type,
not (int) */
if (p != (void ) MAP_FAILED) {
BG(mmap_file) = p; / !!! what sense of this and next
string? Thread safety?
I don't understand how it works here /
BG(mmap_len) = len;
PHPWRITE(p, len);
BG(mmap_file) = NULL; / !!! thread safety? ok. why there
is not BG(mmap_len) = 0 on the next line ? /
munmap(p, len); / !!! missing error check after munmap */
bcount += len;
ready = 1;
}
}
}
#endif
if(!ready) {
int b;while ((b = php_stream_read(stream, buf, sizeof(buf))) > 0) { PHPWRITE(buf, b); bcount += b; } } return bcount;
}
And here you can see my version of the function:
===================================================================
PHPAPI size_t _php_stream_passthru(php_stream * stream STREAMS_DC
TSRMLS_DC)
{
size_t bcount = 0; /* counter of printed out bytes */
int is_mapped = 0;
char buf[8192];
size_t buf_len = sizeof(buf);
#ifdef HAVE_MMAP
int fd;if (!php_stream_is(stream, PHP_STREAM_IS_SOCKET) && stream->filterhead == `NULL` && php_stream_tell(stream) == 0 && SUCCESS == php_stream_cast(stream, PHP_STREAM_AS_FD,
(void*)&fd, 0))
{
is_mapped = 1;
buf_len = 1024 * 1024; /* default length of the mapped memory */
struct stat sbuf;
void p; / pinter to the mapped part of file /
size_t len;
/ get the length of local file connected to descriptor fd /
fstat(fd, &sbuf);
if (errno) {
/ cannot get length of file /
php_error_docref(NULL TSRMLS_CC, E_ERROR, "cannot get length
of the file");
return bcount;
}
len = (size_t) sbuf.st_size;
/ print to the output buffer file contents /
while (bcount < len) {
if (len - bcount < buf_len) buf_len = len - bcount;
p = mmap(NULL, buf_len, PROT_READ, MAP_PRIVATE, fd, (off_t)
bcount); / try to map part of the file to memory */
if (p == (void ) MAP_FAILED) {
/ error when mapping part of the file to memory /
php_error_docref(NULL TSRMLS_CC, E_ERROR, "mmap error:
cannot map part of the file to memory");
break;
}
PHPWRITE(p, buf_len);
munmap(p, buf_len); / try to unmap allocated memory /
if (errno) {
/ error when unmapping memory /
php_error_docref(NULL TSRMLS_CC, E_ERROR, "mmap error:
cannot unmap allocated memory");
break;
}
bcount += buf_len;
}
}
#endif
if (!is_mapped) {
/ print to the output buffer stream contents */
while ((buf_len = php_stream_read(stream, buf, sizeof(buf))) > 0) {
PHPWRITE(buf, buf_len);
bcount += buf_len;
}
}
return bcount;
}--
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/
Can you send a unified diff?
At 02:08 PM 6/7/2004 +0300, Alexander Valyalkin wrote:
No, because I dont know how create it :)
Please, explain how can I create unified diff under windows.
I'm newbie in PHP source development, but now decided to help you to
improve quality of source code.
--
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/
Can you send a unified diff?
==================cut=================
1065,1066c1065,1066
< size_t bcount = 0;
< int ready = 0;
size_t bcount = 0; /* counter of printed out bytes */ int is_mapped = 0;
1067a1068
size_t buf_len = sizeof(buf);
1070d1070
< #endif
1072d1071
< #ifdef HAVE_MMAP
1077a1077,1078
is_mapped = 1; buf_len = 1024 * 1024; /* default length of the mapped memory */
1079,1080c1080
< off_t off;
< void *p;
void *p; /* pinter to the mapped part of file */
1082c1082
<
/* get the length of local file connected to descriptor fd */
1084,1096c1084,1104
<
< if (sbuf.st_size > sizeof(buf)) {
< off = php_stream_tell(stream);
< len = sbuf.st_size - off;
< p = mmap(0, len, PROT_READ, MAP_SHARED, fd, off);
< if (p != (void *) MAP_FAILED) {
< BG(mmap_file) = p;
< BG(mmap_len) = len;
< PHPWRITE(p, len);
< BG(mmap_file) = NULL;
< munmap(p, len);
< bcount += len;
< ready = 1;
if (errno) { /* cannot get length of file */ php_error_docref(NULL TSRMLS_CC, E_ERROR, "cannot get length
of the file");
return bcount;
}
len = (size_t) sbuf.st_size;
/* print to the output buffer file contents /
while (bcount < len) {
if (len - bcount < buf_len) buf_len = len - bcount;
p = mmap(NULL, buf_len, PROT_READ, MAP_PRIVATE, fd, (off_t)
bcount); / try to map part of the file to memory */
if (p == (void ) MAP_FAILED) {
/ error when mapping part of the file to memory /
php_error_docref(NULL TSRMLS_CC, E_ERROR, "mmap error:
cannot map part of the file to memory");
break;
}
PHPWRITE(p, buf_len);
munmap(p, buf_len); / try to unmap allocated memory /
if (errno) {
/ error when unmapping memory */
php_error_docref(NULL TSRMLS_CC, E_ERROR, "mmap error:
cannot unmap allocated memory");
break;
1097a1106
bcount += buf_len;
1101,1106c1110,1114
< if(!ready) {
< int b;
<
< while ((b = php_stream_read(stream, buf, sizeof(buf))) > 0) {
< PHPWRITE(buf, b);
< bcount += b;
if (!is_mapped) { /* print to the output buffer stream contents */ while ((buf_len = php_stream_read(stream, buf, sizeof(buf))) >
- {
PHPWRITE(buf, buf_len);
bcount += buf_len;
==================cut=================
--
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/
On Mon, 07 Jun 2004 18:55:52 +0300
"Alexander Valyalkin" valyala@tut.by wrote:
On Mon, 07 Jun 2004 17:07:06 +0300, Andi Gutmans andi@zend.com
wrote:Can you send a unified diff?
cvs diff -u
, plz
or diff -u
WBR,
Antony Dovgal aka tony2001
tony2001@phpclub.net || antony@dovgal.com
On Mon, 7 Jun 2004 20:02:54 +0400, Antony Dovgal tony2001@phpclub.net
wrote:
On Mon, 07 Jun 2004 18:55:52 +0300
"Alexander Valyalkin" valyala@tut.by wrote:On Mon, 07 Jun 2004 17:07:06 +0300, Andi Gutmans andi@zend.com
wrote:Can you send a unified diff?
cvs diff -u
, plz
ordiff -u
WBR,
Antony Dovgal aka tony2001
tony2001@phpclub.net || antony@dovgal.com
==============cut================
--- streams.c Wed May 12 13:46:30 2004
+++ streams_new.c Tue Jun 08 12:31:22 2004
@@ -1062,48 +1062,56 @@
PHPAPI size_t _php_stream_passthru(php_stream * stream STREAMS_DC
TSRMLS_DC)
{
- size_t bcount = 0;
- int ready = 0;
- size_t bcount = 0; /* counter of printed out bytes */
- int is_mapped = 0;
char buf[8192]; - size_t buf_len = sizeof(buf);
#ifdef HAVE_MMAP
int fd;
-#endif
-#ifdef HAVE_MMAP
if (!php_stream_is(stream, PHP_STREAM_IS_SOCKET)
&& stream->filterhead == NULL
&& php_stream_tell(stream) == 0
&& SUCCESS == php_stream_cast(stream, PHP_STREAM_AS_FD,
(void*)&fd, 0))
{
-
is_mapped = 1;
-
buf_len = 1024 * 1024; /* default length of the mapped memory */ struct stat sbuf;
-
off_t off;
-
void *p;
-
void *p; /* pinter to the mapped part of file */ size_t len;
-
/* get the length of local file connected to descriptor fd */ fstat(fd, &sbuf);
-
if (sbuf.st_size > sizeof(buf)) {
-
off = php_stream_tell(stream);
-
len = sbuf.st_size - off;
-
p = mmap(0, len, PROT_READ, MAP_SHARED, fd, off);
-
if (p != (void *) MAP_FAILED) {
-
BG(mmap_file) = p;
-
BG(mmap_len) = len;
-
PHPWRITE(p, len);
-
BG(mmap_file) = NULL;
-
munmap(p, len);
-
bcount += len;
-
ready = 1;
-
if (errno) {
-
/* cannot get length of file */
-
php_error_docref(NULL TSRMLS_CC, E_ERROR, "cannot get length
of the file");
-
return bcount;
-
}
-
len = (size_t) sbuf.st_size;
-
/* print to the output buffer file contents */
-
while (bcount < len) {
-
if (len - bcount < buf_len) buf_len = len - bcount;
-
p = mmap(NULL, buf_len, PROT_READ, MAP_SHARED, fd, (off_t)
bcount); /* try to map part of the file to memory */
-
if (p == (void *) MAP_FAILED) {
-
/* error when mapping part of the file to memory */
-
php_error_docref(NULL TSRMLS_CC, E_ERROR, "mmap error:
cannot map part of the file to memory");
-
break;
-
}
-
PHPWRITE(p, buf_len);
-
munmap(p, buf_len); /* try to unmap allocated memory */
-
if (errno) {
-
/* error when unmapping memory */
-
php_error_docref(NULL TSRMLS_CC, E_ERROR, "mmap error:
cannot unmap allocated memory");
-
break; }
-
}bcount += buf_len; }
#endif
- if(!ready) {
-
int b;
-
while ((b = php_stream_read(stream, buf, sizeof(buf))) > 0) {
-
PHPWRITE(buf, b);
-
bcount += b;
- if (!is_mapped) {
-
/* print to the output buffer stream contents */
-
while ((buf_len = php_stream_read(stream, buf, sizeof(buf))) > 0)
{
-
PHPWRITE(buf, buf_len);
-
}bcount += buf_len; }
return bcount;
==============cut================
--
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/
/* get the length of local file connected to descriptor fd */ fstat(fd, &sbuf);
[snip]
if (errno) {
Oh-oh! You need to check the return value of fstat()
. Errno is only set
if fstat()
fails (returns -1), otherwise errno keeps its old (junk)
value.
Best regards,
Morten
--
Morten K. Poulsen <morten-VxKok5x5@afdelingp.dk
On Tue, 08 Jun 2004 12:51:33 +0200, Morten K. Poulsen
morten-VxKok5x5@afdelingp.dk wrote:
/* get the length of local file connected to descriptor fd */ fstat(fd, &sbuf);
[snip]
if (errno) {
Oh-oh! You need to check the return value of
fstat()
. Errno is only set
iffstat()
fails (returns -1), otherwise errno keeps its old (junk)
value.Best regards,
Morten
Thanks for useful remark. Here is corrected code & unified diff:
=====================cut===================
PHPAPI size_t _php_stream_passthru(php_stream * stream STREAMS_DC
TSRMLS_DC)
{
size_t bcount = 0; /* counter of printed out bytes */
int is_mapped = 0;
char buf[8192];
size_t buf_len = sizeof(buf);
#ifdef HAVE_MMAP
int fd;
if (!php_stream_is(stream, PHP_STREAM_IS_SOCKET)
&& stream->filterhead == `NULL`
&& php_stream_tell(stream) == 0
&& SUCCESS == php_stream_cast(stream, PHP_STREAM_AS_FD,
(void*)&fd, 0))
{
is_mapped = 1;
buf_len = 1024 * 1024; /* default length of the mapped memory */
struct stat sbuf;
void p; / pinter to the mapped part of file /
size_t len;
/ get the length of local file connected to descriptor fd /
if (fstat(fd, &sbuf)) {
/ cannot get length of file /
php_error_docref(NULL TSRMLS_CC, E_ERROR, "cannot get length
of the file");
return bcount;
}
len = (size_t) sbuf.st_size;
/ print to the output buffer file contents /
while (bcount < len) {
if (len - bcount < buf_len) buf_len = len - bcount;
p = mmap(NULL, buf_len, PROT_READ, MAP_SHARED, fd, (off_t)
bcount); / try to map part of the file to memory */
if (p == (void ) MAP_FAILED) {
/ error when mapping part of the file to memory /
php_error_docref(NULL TSRMLS_CC, E_ERROR, "mmap error:
cannot map part of the file to memory");
break;
}
PHPWRITE(p, buf_len);
if (munmap(p, buf_len)) {
/ error when unmapping memory /
php_error_docref(NULL TSRMLS_CC, E_ERROR, "mmap error:
cannot unmap allocated memory");
break;
}
bcount += buf_len;
}
}
#endif
if (!is_mapped) {
/ print to the output buffer stream contents */
while ((buf_len = php_stream_read(stream, buf, sizeof(buf))) > 0) {
PHPWRITE(buf, buf_len);
bcount += buf_len;
}
}
return bcount;
}
=====================cut===================
unfified diff
=====================cut===================
--- streams.c Wed May 12 13:46:30 2004
+++ streams_new.c Wed Jun 09 12:37:31 2004
@@ -1062,48 +1062,54 @@
PHPAPI size_t _php_stream_passthru(php_stream * stream STREAMS_DC
TSRMLS_DC)
{
- size_t bcount = 0;
- int ready = 0;
- size_t bcount = 0; /* counter of printed out bytes */
- int is_mapped = 0;
char buf[8192]; - size_t buf_len = sizeof(buf);
#ifdef HAVE_MMAP
int fd;
-#endif
-#ifdef HAVE_MMAP
if (!php_stream_is(stream, PHP_STREAM_IS_SOCKET)
&& stream->filterhead == NULL
&& php_stream_tell(stream) == 0
&& SUCCESS == php_stream_cast(stream, PHP_STREAM_AS_FD,
(void*)&fd, 0))
{
-
is_mapped = 1;
-
buf_len = 1024 * 1024; /* default length of the mapped memory */ struct stat sbuf;
-
off_t off;
-
void *p;
-
void *p; /* pinter to the mapped part of file */ size_t len;
-
fstat(fd, &sbuf);
-
if (sbuf.st_size > sizeof(buf)) {
-
off = php_stream_tell(stream);
-
len = sbuf.st_size - off;
-
p = mmap(0, len, PROT_READ, MAP_SHARED, fd, off);
-
if (p != (void *) MAP_FAILED) {
-
BG(mmap_file) = p;
-
BG(mmap_len) = len;
-
PHPWRITE(p, len);
-
BG(mmap_file) = NULL;
-
munmap(p, len);
-
bcount += len;
-
ready = 1;
-
/* get the length of local file connected to descriptor fd */
-
if (fstat(fd, &sbuf)) {
-
/* cannot get length of file */
-
php_error_docref(NULL TSRMLS_CC, E_ERROR, "cannot get length
of the file");
-
return bcount;
-
}
-
len = (size_t) sbuf.st_size;
-
/* print to the output buffer file contents */
-
while (bcount < len) {
-
if (len - bcount < buf_len) buf_len = len - bcount;
-
p = mmap(NULL, buf_len, PROT_READ, MAP_SHARED, fd, (off_t)
bcount); /* try to map part of the file to memory */
-
if (p == (void *) MAP_FAILED) {
-
/* error when mapping part of the file to memory */
-
php_error_docref(NULL TSRMLS_CC, E_ERROR, "mmap error:
cannot map part of the file to memory");
-
break;
-
}
-
PHPWRITE(p, buf_len);
-
if (munmap(p, buf_len)) {
-
/* error when unmapping memory */
-
php_error_docref(NULL TSRMLS_CC, E_ERROR, "mmap error:
cannot unmap allocated memory");
-
break; }
-
}bcount += buf_len; }
#endif
- if(!ready) {
-
int b;
-
while ((b = php_stream_read(stream, buf, sizeof(buf))) > 0) {
-
PHPWRITE(buf, b);
-
bcount += b;
- if (!is_mapped) {
-
/* print to the output buffer stream contents */
-
while ((buf_len = php_stream_read(stream, buf, sizeof(buf))) > 0)
{
-
PHPWRITE(buf, buf_len);
-
}bcount += buf_len; }
return bcount;
=====================cut===================
--
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/
IMO Adding additional error reporting is a good idea, but we should keep
MAP_SHARED as the mmap mode. In Apache environment if the file is mapped by
more then one process the memory needed to map the file is shared, rather
then each fork allocating it's own copy. Unless we decide to make this a SAPI
based option, I think we should keep it at MAP_SHARED.
Ilia
On Mon, 7 Jun 2004 10:19:09 -0400, Ilia Alshanetsky ilia@prohost.org
wrote:
IMO Adding additional error reporting is a good idea, but we should keep
MAP_SHARED as the mmap mode. In Apache environment if the file is mapped
by
more then one process the memory needed to map the file is shared, rather
then each fork allocating it's own copy. Unless we decide to make this a
SAPI
based option, I think we should keep it at MAP_SHARED.Ilia
Thanks for good explanation
--
Using Opera's revolutionary e-mail client: http://www.opera.com/m2/