[This patch has the client compress files via the zlib library rather than an external program. Note that compressing files is only needed for compatibility with older CVS servers (CVS 1.8 and older). If one has a CVS 1.9 or later client and server, CVS compresses the entire data stream, which accomplishes the same thing as compressing files and more. With this patch, we could get rid of the definitions of LINES_CRLF_TERMINATED and convert_file (except in the BROKEN_READWRITE_CONVERSION case), although diffs to do so are not included. Although I've tried a big import (65M) and a checkout of the same tree (with -z and an old server, so this code gets tested, as can be verified in the CVS_CLIENT_LOG), the fact that I found two subtle bugs in the course of doing so didn't exactly provide encouragement, which is why I haven't made the patch official yet. -kingdon, Dec 1997. Note that this patch is updated from the 26 Nov 1997 one, which had the subtle bugs referred to above.] 7 Dec 1997 Jim Kingdon * zlib.c, server.h (gunzip_and_write): New function. * client.c (update_entries): Call it instead of a gunzip subprocess. * zlib.c, server.h (read_and_gzip): New function. * client.c (send_modified): Call it instead of a gzip subprocess. Index: client.c =================================================================== RCS file: /home2/cvsroot/ccvs/src/client.c,v retrieving revision 1.227 diff -c -r1.227 client.c *** client.c 1997/12/01 05:36:11 1.227 --- client.c 1997/12/07 22:01:22 *************** *** 1612,1618 **** else { int fd; - pid_t gzip_pid = 0; fd = CVS_OPEN (temp_filename, (O_WRONLY | O_CREAT | O_TRUNC --- 1612,1617 ---- *************** *** 1632,1712 **** goto discard_file_and_return; } - if (use_gzip) - fd = filter_through_gunzip (fd, 0, &gzip_pid); - if (size > 0) { read_from_server (buf, size); ! if (write (fd, buf, size) != size) error (1, errno, "writing %s", short_pathname); } if (close (fd) < 0) error (1, errno, "writing %s", short_pathname); - if (gzip_pid > 0) - { - int gzip_status; - - if (waitpid (gzip_pid, &gzip_status, 0) == -1) - error (1, errno, "waiting for gzip process %ld", - (long) gzip_pid); - else if (gzip_status != 0) - error (1, 0, "gzip process exited %d", gzip_status); - } - - gzip_pid = -1; } - /* Since gunzip writes files without converting LF to CRLF - (a reasonable behavior), we now have a patch file in LF - format. Leave the file as is if we're just going to feed - it to patch; patch can handle it. However, if it's the - final source file, convert it. */ - patch_failed = 0; if (data->contents == UPDATE_ENTRIES_UPDATE) { - #ifdef LINES_CRLF_TERMINATED - - /* `bin' is non-zero iff `options' contains "-kb", meaning - treat this file as binary. */ - - if (use_gzip && (! bin)) - { - convert_file (temp_filename, O_RDONLY | OPEN_BINARY, - filename, O_WRONLY | O_CREAT | O_TRUNC); - if ( CVS_UNLINK (temp_filename) < 0) - error (0, errno, "warning: couldn't delete %s", - temp_filename); - } - else - #ifdef BROKEN_READWRITE_CONVERSION - { - /* If only stdio, not open/write/etc., do text/binary - conversion, use convert_file which can compensate - (FIXME: we could just use stdio instead which would - avoid the whole problem). */ - if (!bin) - { - convert_file (temp_filename, O_RDONLY | OPEN_BINARY, - filename, O_WRONLY | O_CREAT | O_TRUNC); - if (CVS_UNLINK (temp_filename) < 0) - error (0, errno, "warning: couldn't delete %s", - temp_filename); - } - else - rename_file (temp_filename, filename); - } - #else - rename_file (temp_filename, filename); - #endif - - #else /* ! LINES_CRLF_TERMINATED */ rename_file (temp_filename, filename); - #endif /* LINES_CRLF_TERMINATED */ } else if (data->contents == UPDATE_ENTRIES_PATCH) { --- 1631,1655 ---- goto discard_file_and_return; } if (size > 0) { read_from_server (buf, size); ! if (use_gzip) ! gunzip_and_write (fd, short_pathname, buf, size); ! else if (write (fd, buf, size) != size) error (1, errno, "writing %s", short_pathname); } if (close (fd) < 0) error (1, errno, "writing %s", short_pathname); } patch_failed = 0; if (data->contents == UPDATE_ENTRIES_UPDATE) { rename_file (temp_filename, filename); } else if (data->contents == UPDATE_ENTRIES_PATCH) { *************** *** 4352,4456 **** if (file_gzip_level && sb.st_size > 100) { ! int nread, newsize = 0, gzip_status; ! pid_t gzip_pid; ! char *bufp = buf; ! int readsize = 8192; ! #ifdef LINES_CRLF_TERMINATED ! char *tempfile; ! int converting; ! #endif /* LINES_CRLF_TERMINATED */ ! ! #ifdef LINES_CRLF_TERMINATED ! if (vers == NULL) ! /* "Can't happen". */ ! converting = 1; ! else ! /* Otherwise, we convert things unless they're binary. */ ! converting = (! bin); ! ! if (converting) ! { ! /* gzip reads and writes files without munging CRLF ! sequences, as it should, but files should be ! transmitted in LF form. Convert CRLF to LF before ! gzipping, on systems where this is necessary. ! ! If Windows NT supported fork, we could do this by ! pushing another filter on in front of gzip. But it ! doesn't. I'd have to write a trivial little program to ! do the conversion and have CVS spawn it off. But ! little executables like that always get lost. ! ! Alternatively, this cruft could go away if we switched ! to a gzip library instead of a subprocess; then we ! could tell gzip to open the file with CRLF translation ! enabled. */ ! if (close (fd) < 0) ! error (0, errno, "warning: can't close %s", short_pathname); ! ! tempfile = cvs_temp_name (); ! convert_file (file, O_RDONLY, ! tempfile, ! O_WRONLY | O_CREAT | O_TRUNC | OPEN_BINARY); ! ! /* This OPEN_BINARY doesn't make any difference, I think, because ! gzip will deal with the inherited handle as it pleases. But I ! do remember something obscure in the manuals about propagating ! the translation mode to created processes via environment ! variables, ick. */ ! fd = CVS_OPEN (tempfile, O_RDONLY | OPEN_BINARY); ! if (fd < 0) ! error (1, errno, "reading %s", short_pathname); ! } ! #endif /* LINES_CRLF_TERMINATED */ ! fd = filter_through_gzip (fd, 1, file_gzip_level, &gzip_pid); - /* FIXME: is there any reason to go through all this realloc'ing - when we could just be writing the data to the network as we read - it from gzip? */ - while (1) - { - if ((bufp - buf) + readsize >= bufsize) - { - /* - * We need to expand the buffer if gzip ends up expanding - * the file. - */ - newsize = bufp - buf; - while (newsize + readsize >= bufsize) - bufsize *= 2; - buf = xrealloc (buf, bufsize); - bufp = buf + newsize; - } - nread = read (fd, bufp, readsize); - if (nread < 0) - error (1, errno, "reading from gzip pipe"); - else if (nread == 0) - /* eof */ - break; - bufp += nread; - } - newsize = bufp - buf; if (close (fd) < 0) error (0, errno, "warning: can't close %s", short_pathname); - - if (waitpid (gzip_pid, &gzip_status, 0) != gzip_pid) - error (1, errno, "waiting for gzip proc %ld", (long) gzip_pid); - else if (gzip_status != 0) - error (1, errno, "gzip exited %d", gzip_status); - - #if LINES_CRLF_TERMINATED - if (converting) - { - if ( CVS_UNLINK (tempfile) < 0) - error (0, errno, - "warning: can't remove temp file %s", tempfile); - free (tempfile); - tempfile = NULL; - } - #endif /* LINES_CRLF_TERMINATED */ { char tmp[80]; --- 4295,4308 ---- if (file_gzip_level && sb.st_size > 100) { ! int newsize = 0; ! read_and_gzip (fd, short_pathname, (unsigned char **)&buf, ! &bufsize, &newsize, ! file_gzip_level); if (close (fd) < 0) error (0, errno, "warning: can't close %s", short_pathname); { char tmp[80]; Index: server.h =================================================================== RCS file: /home2/cvsroot/ccvs/src/server.h,v retrieving revision 1.16 diff -c -r1.16 server.h *** server.h 1997/07/25 12:44:59 1.16 --- server.h 1997/12/07 22:01:23 *************** *** 162,164 **** --- 162,169 ---- /* Table of requests ending with an entry with a NULL name. */ extern struct request requests[]; + + /* Gzip library, see zlib.c. */ + extern void gunzip_and_write PROTO ((int, char *, unsigned char *, size_t)); + extern void read_and_gzip PROTO ((int, char *, unsigned char **, size_t *, + size_t *, int)); Index: zlib.c =================================================================== RCS file: /home2/cvsroot/ccvs/src/zlib.c,v retrieving revision 1.4 diff -c -r1.4 zlib.c *** zlib.c 1997/04/16 15:55:04 1.4 --- zlib.c 1997/12/07 23:23:03 *************** *** 426,429 **** --- 426,622 ---- return buf_shutdown (cb->buf); } + + + /* Here is our librarified gzip implementation. It is very minimal + but attempts to be RFC1952 compliant. */ + /* Note that currently only the client uses the gzip library. If we + make the server use it too (which should be straightforward), then + filter_stream_through_program, filter_through_gzip, and + filter_through_gunzip can go away. */ + + /* BUF should contain SIZE bytes of gzipped data (RFC1952/RFC1951). + We are to uncompress the data and write the result to the file + descriptor FD. If something goes wrong, give an error message + mentioning FULLNAME as the name of the file for FD (and make it a + fatal error if we can't recover from it). */ + + void + gunzip_and_write (fd, fullname, buf, size) + int fd; + char *fullname; + unsigned char *buf; + size_t size; + { + size_t pos; + z_stream zstr; + int zstatus; + unsigned char outbuf[32768]; + unsigned long crc; + + if (buf[0] != 31 || buf[1] != 139) + error (1, 0, "gzipped data does not start with gzip identification"); + if (buf[2] != 8) + error (1, 0, "only the deflate compression method is supported"); + + /* Skip over the fixed header, and then skip any of the variable-length + fields. */ + pos = 10; + if (buf[3] & 4) + pos += buf[pos] + (buf[pos + 1] << 8) + 2; + if (buf[3] & 8) + pos += strlen (buf + pos) + 1; + if (buf[3] & 16) + pos += strlen (buf + pos) + 1; + if (buf[3] & 2) + pos += 2; + + memset (&zstr, 0, sizeof zstr); + /* Passing a negative argument tells zlib not to look for a zlib + (RFC1950) header. This is an undocumented feature; I suppose if + we wanted to be anal we could synthesize a header instead, + but why bother? */ + zstatus = inflateInit2 (&zstr, -15); + + if (zstatus != Z_OK) + compress_error (1, zstatus, &zstr, fullname); + + /* I don't see why we should have to include the 8 byte trailer in + avail_in. But I see that zlib/gzio.c does, and it seemed to fix + a fairly rare bug in which we'd get a Z_BUF_ERROR for no obvious + reason. */ + zstr.avail_in = size - pos; + zstr.next_in = buf + pos; + + crc = crc32 (0, NULL, 0); + + do + { + zstr.avail_out = sizeof (outbuf); + zstr.next_out = outbuf; + zstatus = inflate (&zstr, Z_NO_FLUSH); + if (zstatus != Z_STREAM_END && zstatus != Z_OK) + compress_error (1, zstatus, &zstr, fullname); + if (write (fd, outbuf, sizeof (outbuf) - zstr.avail_out) < 0) + error (1, errno, "writing decompressed file %s", fullname); + crc = crc32 (crc, outbuf, sizeof (outbuf) - zstr.avail_out); + } while (zstatus != Z_STREAM_END); + zstatus = inflateEnd (&zstr); + if (zstatus != Z_OK) + compress_error (0, zstatus, &zstr, fullname); + + if (crc != (buf[zstr.total_in + 10] + + (buf[zstr.total_in + 11] << 8) + + (buf[zstr.total_in + 12] << 16) + + (buf[zstr.total_in + 13] << 24))) + error (1, 0, "CRC error uncompressing %s", fullname); + + if (zstr.total_out != (buf[zstr.total_in + 14] + + (buf[zstr.total_in + 15] << 8) + + (buf[zstr.total_in + 16] << 16) + + (buf[zstr.total_in + 17] << 24))) + error (1, 0, "invalid length uncompressing %s", fullname); + } + + /* Read all of FD and put the gzipped data (RFC1952/RFC1951) into *BUF, + replacing previous contents of *BUF. *BUF is malloc'd and *SIZE is + its allocated size. Put the actual number of bytes of data in + *LEN. If something goes wrong, give an error message mentioning + FULLNAME as the name of the file for FD (and make it a fatal error + if we can't recover from it). LEVEL is the compression level (1-9). */ + + void + read_and_gzip (fd, fullname, buf, size, len, level) + int fd; + char *fullname; + unsigned char **buf; + size_t *size; + size_t *len; + int level; + { + z_stream zstr; + int zstatus; + unsigned char inbuf[8192]; + int nread; + unsigned long crc; + + if (*size < 1024) + { + *size = 1024; + *buf = (unsigned char *) xrealloc (*buf, *size); + } + (*buf)[0] = 31; + (*buf)[1] = 139; + (*buf)[2] = 8; + (*buf)[3] = 0; + (*buf)[4] = (*buf)[5] = (*buf)[6] = (*buf)[7] = 0; + /* Could set this based on level, but why bother? */ + (*buf)[8] = 0; + (*buf)[9] = 255; + + memset (&zstr, 0, sizeof zstr); + zstatus = deflateInit2 (&zstr, level, Z_DEFLATED, -15, 8, + Z_DEFAULT_STRATEGY); + crc = crc32 (0, NULL, 0); + if (zstatus != Z_OK) + compress_error (1, zstatus, &zstr, fullname); + zstr.avail_out = *size; + zstr.next_out = *buf + 10; + + while (1) + { + int finish; + + nread = read (fd, inbuf, sizeof inbuf); + if (nread < 0) + error (1, errno, "cannot read %s", fullname); + else if (nread == 0) + /* End of file. */ + finish = 1; + crc = crc32 (crc, inbuf, nread); + zstr.next_in = inbuf; + zstr.avail_in = nread; + + do + { + size_t offset; + + /* I don't see this documented anywhere, but deflate seems + to tend to dump core sometimes if we pass it Z_FINISH and + a small (e.g. 2147 byte) avail_out. So we insist on at + least 4096 bytes (that is what zlib/gzio.c uses). */ + + if (zstr.avail_out < 4096) + { + offset = zstr.next_out - *buf; + *size *= 2; + *buf = xrealloc (*buf, *size); + zstr.next_out = *buf + offset; + zstr.avail_out = *size - offset; + } + + zstatus = deflate (&zstr, finish ? Z_FINISH : 0); + if (zstatus == Z_STREAM_END) + goto done; + else if (zstatus != Z_OK) + compress_error (0, zstatus, &zstr, fullname); + } while (zstr.avail_out == 0); + } + done: + *(*buf + zstr.total_out + 10) = crc & 0xff; + *(*buf + zstr.total_out + 11) = (crc >> 8) & 0xff; + *(*buf + zstr.total_out + 12) = (crc >> 16) & 0xff; + *(*buf + zstr.total_out + 13) = (crc >> 24) & 0xff; + + *(*buf + zstr.total_out + 14) = zstr.total_in & 0xff; + *(*buf + zstr.total_out + 15) = (zstr.total_in >> 8) & 0xff; + *(*buf + zstr.total_out + 16) = (zstr.total_in >> 16) & 0xff; + *(*buf + zstr.total_out + 17) = (zstr.total_in >> 24) & 0xff; + + *len = zstr.total_out + 18; + + zstatus = deflateEnd (&zstr); + if (zstatus != Z_OK) + compress_error (0, zstatus, &zstr, fullname); + } #endif /* defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT) */