diff --git a/src/java.base/share/classes/java/util/zip/GZIPInputStream.java b/src/java.base/share/classes/java/util/zip/GZIPInputStream.java index 88d08386e8c..8586dc7f63b 100644 --- a/src/java.base/share/classes/java/util/zip/GZIPInputStream.java +++ b/src/java.base/share/classes/java/util/zip/GZIPInputStream.java @@ -58,6 +58,19 @@ import java.util.Objects; * The {@link #close} method should be called to release resources used by this * stream, either directly, or with the {@code try}-with-resources statement. * + * @implNote + * After reading a member trailer, the {@linkplain #read(byte[], int, int) read} method calls + * {@link InputStream#available()} on the underlying stream to determine whether additional + * bytes are available that may represent a subsequent member. If the + * {@systemProperty jdk.util.gzip.tryReadAheadAfterTrailer} system property is set + * to {@code true}, then the call to {@code InputStream.available()} is skipped and the + * implementation instead attempts to read a subsequent member in the stream. + * {@code GZIPInputStream} depends on the return value of {@code InputStream.available()} + * to reliably process a stream with a series of members. Consequently, it may be necessary + * to set this property in environments that process streams with a series of members. By default, + * the {@code jdk.util.gzip.tryReadAheadAfterTrailer} system property is not set, and + * {@code InputStream.available()} gets called. + * * @spec https://www.rfc-editor.org/info/rfc1952 * RFC 1952: GZIP file format specification version 4.3 * @@ -66,6 +79,12 @@ import java.util.Objects; * @since 1.1 */ public class GZIPInputStream extends InflaterInputStream { + + // system property which configures whether we skip the call to InputStream.available() + // when checking for additional GZIP members in a stream + private static final boolean alwaysReadNextMember = + Boolean.getBoolean("jdk.util.gzip.tryReadAheadAfterTrailer"); + /** * GZIP header magic number. */ @@ -119,7 +138,11 @@ public class GZIPInputStream extends InflaterInputStream { super(in, createInflater(in, size), size); usesDefaultInflater = true; try { - readHeader(in); + // we don't expect the stream to be at EOF + // and if it is, then we want readHeader to + // raise an exception, so we pass "true" for + // the "failOnEOF" param. + readHeader(in, true); } catch (IOException ioe) { this.inf.end(); throw ioe; @@ -194,10 +217,15 @@ public class GZIPInputStream extends InflaterInputStream { } int n = super.read(buf, off, len); if (n == -1) { - if (readTrailer()) + if (hasNoMoreMembers()) { eos = true; - else + } else { + // When a next member is available, hasNoMoreMembers() will read + // its header and will position the stream at the next member's + // deflated data. We now decompress and return that member's + // decompressed data. return this.read(buf, off, len); + } } else { crc.update(buf, off, n); } @@ -221,12 +249,40 @@ public class GZIPInputStream extends InflaterInputStream { /* * Reads GZIP member header and returns the total byte number * of this member header. + * If failOnEOF is false and if the given InputStream has already + * reached EOF when this method was invoked, then this method returns + * -1 (indicating that there's no GZIP member header). + * In all other cases of malformed header or EOF being detected + * when reading the header, this method will throw an IOException. */ - private int readHeader(InputStream this_in) throws IOException { - CheckedInputStream in = new CheckedInputStream(this_in, crc); + private int readHeader(InputStream stream, boolean failOnEOF) throws IOException { + CheckedInputStream in = new CheckedInputStream(stream, crc); crc.reset(); + + int magic; + if (!failOnEOF) { + // read an unsigned short value representing the GZIP magic header. + // this is the same as calling readUShort(in), except that here, + // when reading the first byte, we don't raise an EOFException + // if the stream has already reached EOF. + + // read unsigned byte + int b = in.read(); + if (b == -1) { // EOF + crc.reset(); + return -1; // represents no header bytes available + } + checkUnexpectedByte(b); + // read the next unsigned byte to form the unsigned + // short. we throw the usual EOFException/ZipException + // from this point on if there is no more data or + // the data doesn't represent a header. + magic = (readUByte(in) << 8) | b; + } else { + magic = readUShort(in); + } // Check header magic - if (readUShort(in) != GZIP_MAGIC) { + if (magic != GZIP_MAGIC) { throw new ZipException("Not in GZIP format"); } // Check compression method @@ -268,44 +324,66 @@ public class GZIPInputStream extends InflaterInputStream { return n; } - /* - * Reads GZIP member trailer and returns true if the eos - * reached, false if there are more (concatenated gzip - * data set) + /** + * Reads the current GZIP member's trailer and returns true if the end-of-stream is + * reached. After reading the current member's trailer, if the stream has a subsequent + * GZIP member, then this method reads that member's header and returns false indicating + * that there is another member in the stream. */ - private boolean readTrailer() throws IOException { - InputStream in = this.in; - int n = inf.getRemaining(); - if (n > 0) { - in = new SequenceInputStream( - new ByteArrayInputStream(buf, len - n, n), - new FilterInputStream(in) { - public void close() throws IOException {} - }); + private boolean hasNoMoreMembers() throws IOException { + final int numRemainingInInflater = inf.getRemaining(); + InputStream stream = this.in; + if (numRemainingInInflater > 0) { + stream = new SequenceInputStream( + new ByteArrayInputStream(buf, len - numRemainingInInflater, numRemainingInInflater), + new FilterInputStream(stream) { + public void close() {} + }); } - // Uses left-to-right evaluation order - if ((readUInt(in) != crc.getValue()) || - // rfc1952; ISIZE is the input size modulo 2^32 - (readUInt(in) != (inf.getBytesWritten() & 0xffffffffL))) - throw new ZipException("Corrupt GZIP trailer"); - - // If there are more bytes available in "in" or - // the leftover in the "inf" is > 26 bytes: - // this.trailer(8) + next.header.min(10) + next.trailer(8) - // try concatenated case - if (this.in.available() > 0 || n > 26) { - int m = 8; // this.trailer - try { - m += readHeader(in); // next.header - } catch (IOException ze) { - return true; // ignore any malformed, do nothing + // first read the current member's trailer + readTrailer(stream); + // decide whether to read next member's header + final boolean readNextMember = alwaysReadNextMember + || this.in.available() > 0 + || numRemainingInInflater > 26; // current member's trailer == 8 bytes + // + minimum of 10 bytes header for next member + // + mandatory 8 bytes from next member's trailer + // == at least 26 bytes needed for next member to + // be present + if (!readNextMember) { + return true; // no need to read next member + } + // read next member's header + int m = 8; // this.trailer + try { + int numNextHeaderBytes = readHeader(stream, false); // next.header (if available) + if (numNextHeaderBytes == -1) { + return true; // end of stream reached, no more members } - inf.reset(); - if (n > m) - inf.setInput(buf, len - n + m, n - m); - return false; + m += numNextHeaderBytes; + } catch (IOException ze) { + return true; // ignore any malformed, consider it as no more members in the stream + } + inf.reset(); // reset the inflater for fresh input data from the next member + if (numRemainingInInflater > m) { + // position the inflater's input buffer to the start of next member's deflated data + inf.setInput(buf, len - numRemainingInInflater + m, numRemainingInInflater - m); + } + return false; // next member exists + } + + /** + * Reads the current member's trailer + * + * @param stream the InputStream containing the trailer + */ + private void readTrailer(final InputStream stream) throws IOException { + // Uses left-to-right evaluation order + if ((readUInt(stream) != crc.getValue()) || + // rfc1952; ISIZE is the input size modulo 2^32 + (readUInt(stream) != (inf.getBytesWritten() & 0xffffffffL))) { + throw new ZipException("Corrupt GZIP trailer"); } - return true; } /* @@ -332,14 +410,18 @@ public class GZIPInputStream extends InflaterInputStream { if (b == -1) { throw new EOFException(); } - if (b < -1 || b > 255) { - // Report on this.in, not argument in; see read{Header, Trailer}. - throw new IOException(this.in.getClass().getName() - + ".read() returned value out of range -1..255: " + b); - } + checkUnexpectedByte(b); return b; } + private void checkUnexpectedByte(final int b) throws IOException { + if (b < -1 || b > 255) { + // report the InputStream type which returned this unexpected byte + throw new IOException(this.in.getClass().getName() + + ".read() returned value out of range -1..255: " + b); + } + } + /* * Skips bytes of input data blocking until all bytes are skipped. * Does not assume that the input stream is capable of seeking. diff --git a/test/jdk/java/util/zip/GZIP/GZIPInputStreamCallsAvailable.java b/test/jdk/java/util/zip/GZIP/GZIPInputStreamCallsAvailable.java new file mode 100644 index 00000000000..e39b47dfc8e --- /dev/null +++ b/test/jdk/java/util/zip/GZIP/GZIPInputStreamCallsAvailable.java @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.List; +import java.util.Random; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +import jdk.test.lib.RandomFactory; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import static java.nio.charset.StandardCharsets.US_ASCII; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; + +/* + * @test + * @summary Verify the behaviour of GZIPInputStream when dealing with InputStream.available() + * on the underlying stream and the jdk.util.gzip.tryReadAheadAfterTrailer + * system property being enabled/disabled + * @key randomness + * @library /test/lib + * @build jdk.test.lib.RandomFactory + * @run junit/othervm -Djdk.util.gzip.tryReadAheadAfterTrailer=true GZIPInputStreamCallsAvailable + * @run junit/othervm -Djdk.util.gzip.tryReadAheadAfterTrailer=false GZIPInputStreamCallsAvailable + * @run junit GZIPInputStreamCallsAvailable + */ +class GZIPInputStreamCallsAvailable { + + private static final boolean AVAILABLE_METHOD_INVOCATION_SKIPPED = + Boolean.getBoolean("jdk.util.gzip.tryReadAheadAfterTrailer"); + private static final Random random = RandomFactory.getRandom(); + + private record TestData(byte[] uncompressed, byte[] compressed) { + } + + static List numGZIPMembers() { + return List.of(1, + 33, + random.nextInt(2, 1001) // a reasonably large number of members + ); + } + + /* + * Verify that GZIPInputStream reads and returns the correct decompressed data when: + * - the underlying InputStream.available() returns an accurate value + * - and when the GZIPInputStream isn't expected to call the underlying InputStream.available() + * method + */ + @ParameterizedTest + @MethodSource("numGZIPMembers") + void testMultipleMembers(final int numMembers) throws IOException { + final TestData testData = createGZIPStream(numMembers); + final InputStream underlyingStream = AVAILABLE_METHOD_INVOCATION_SKIPPED + // stream whose available() method isn't expected to be invoked + ? new AlwaysThrowFromAvailable(new ByteArrayInputStream(testData.compressed)) + // stream whose available() will be invoked and returns an accurate value + : new ByteArrayInputStream(testData.compressed); + try (GZIPInputStream gzip = new GZIPInputStream(underlyingStream)) { + final byte[] decompressed = gzip.readAllBytes(); + assertArrayEquals(testData.uncompressed, decompressed, "unexpected decompressed data"); + } + } + + /* + * Creates and returns bytes representing a GZIP stream consisting of the given number of + * members. + */ + private static TestData createGZIPStream(final int numMembers) throws IOException { + final String content = "foo bar hello world from " + GZIPInputStreamCallsAvailable.class; + final ByteArrayOutputStream uncompressed = new ByteArrayOutputStream(); + final ByteArrayOutputStream gzipped = new ByteArrayOutputStream(); + for (int i = 1; i <= numMembers; i++) { + final ByteArrayOutputStream member = new ByteArrayOutputStream(); + try (final OutputStream gzip = new GZIPOutputStream(member)) { + final byte[] memberRawBytes = ("member-" + i + " " + content).getBytes(US_ASCII); + gzip.write(memberRawBytes); + // keep track of the uncompressed content too so that it can be compared for + // equality with the decompressed content + uncompressed.write(memberRawBytes); + } + // write out the GZIP member to the stream which accumulates all the members + gzipped.write(member.toByteArray()); + } + return new TestData(uncompressed.toByteArray(), gzipped.toByteArray()); + } + + private static class AlwaysThrowFromAvailable extends FilterInputStream { + public AlwaysThrowFromAvailable(InputStream in) { + super(in); + } + + @Override + public int available() { + throw new AssertionError(this.getClass().getName() + + ".available() wasn't expected to be invoked"); + } + } +} diff --git a/test/jdk/java/util/zip/GZIP/GZIPOverBlockingStreams.java b/test/jdk/java/util/zip/GZIP/GZIPOverBlockingStreams.java index 81f55f2f0dd..b6dea98c28d 100644 --- a/test/jdk/java/util/zip/GZIP/GZIPOverBlockingStreams.java +++ b/test/jdk/java/util/zip/GZIP/GZIPOverBlockingStreams.java @@ -63,6 +63,9 @@ import static org.junit.jupiter.api.Assertions.fail; * @library /test/lib * @build jdk.test.lib.net.URIBuilder jdk.test.lib.RandomFactory * @run junit GZIPOverBlockingStreams + * @comment verify it behaves the same when jdk.util.gzip.tryReadAheadAfterTrailer system property + * is set to false + * @run junit/othervm -Djdk.util.gzip.tryReadAheadAfterTrailer=false GZIPOverBlockingStreams */ class GZIPOverBlockingStreams {