8322256: Define and document GZIPInputStream concatenated stream semantics

Reviewed-by: lancea, alanb, simonis
This commit is contained in:
Jaikiran Pai 2026-06-02 04:37:09 +00:00
parent f640edebf0
commit 207dc4e82f
3 changed files with 272 additions and 95 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1996, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1996, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,17 +34,55 @@ import java.io.EOFException;
import java.util.Objects;
/**
* This class implements a stream filter for reading compressed data in
* the GZIP file format.
* This class implements a stream filter for decompressing GZIP file format data.
*
* <h2><a id="gzip_file_format">GZIP file format</a></h2>
* The GZIP file format is specified by RFC 1952. The format, as specified in section 2.2 of
* the RFC, consists of a series of "members" that appear one after another in the stream with
* no additional information before, between, or after them. Each member consists of a header,
* followed by data that is compressed using the {@code deflate} algorithm, and then a trailer.
* <p>
* This class is capable of reading a stream consisting of a series of members.
* <p>
* Reading from the stream may read and buffer bytes from the underlying stream.
* This includes bytes that follow a member's trailer. Whether or not any additional bytes
* have been read past a member's trailer, the read methods on this class yield decompressed
* data from at most one member; data from multiple members is not combined in
* a single read operation.
*
* <h2><a id="thread_safety">Thread safety</a></h2>
* {@code GZIPInputStream} is not safe for use by multiple concurrent threads. Any multithreaded
* concurrent use must be guarded by appropriate synchronization.
*
* @apiNote
* The {@link #close} method should be called to release resources used by this
* stream, either directly, or with the {@code try}-with-resources statement.
*
* @spec https://www.rfc-editor.org/info/rfc1952
* RFC 1952: GZIP file format specification version 4.3
*
* @see InflaterInputStream
*
* @see InflaterInputStream
* @author David Connelly
* @since 1.1
*
*/
public class GZIPInputStream extends InflaterInputStream {
/**
* CRC-32 for uncompressed data.
* GZIP header magic number.
*/
public static final int GZIP_MAGIC = 0x8b1f;
/*
* File header flags.
*/
private static final int FHCRC = 2; // Header CRC
private static final int FEXTRA = 4; // Extra field
private static final int FNAME = 8; // File name
private static final int FCOMMENT = 16; // File comment
private final byte[] tmpbuf = new byte[128];
/**
* CRC-32 for decompressed data.
*/
protected CRC32 crc = new CRC32();
@ -66,13 +104,15 @@ public class GZIPInputStream extends InflaterInputStream {
/**
* Creates a new input stream with the specified buffer size.
*
* @param in the input stream
* @param size the input buffer size
*
* @throws ZipException if a GZIP format error has occurred or the
* compression method used is unsupported
* @throws NullPointerException if {@code in} is null
* @throws IOException if an I/O error has occurred
* @throws IOException if an I/O error occurs when reading the member header
* from the underlying stream
* @throws IllegalArgumentException if {@code size <= 0}
*/
public GZIPInputStream(InputStream in, int size) throws IOException {
@ -103,25 +143,27 @@ public class GZIPInputStream extends InflaterInputStream {
/**
* Creates a new input stream with a default buffer size.
*
* @param in the input stream
*
* @throws ZipException if a GZIP format error has occurred or the
* compression method used is unsupported
* @throws NullPointerException if {@code in} is null
* @throws IOException if an I/O error has occurred
* @throws IOException if an I/O error occurs when reading the member header
* from the underlying stream
*/
public GZIPInputStream(InputStream in) throws IOException {
this(in, 512);
}
/**
* Reads uncompressed data into an array of bytes, returning the number of inflated
* Reads decompressed data into an array of bytes, returning the number of decompressed
* bytes. If {@code len} is not zero, the method will block until some input can be
* decompressed; otherwise, no bytes are read and {@code 0} is returned.
* <p>
* If this method returns a nonzero integer <i>n</i> then {@code buf[off]}
* through {@code buf[off+}<i>n</i>{@code -1]} contain the uncompressed
* data. The content of elements {@code buf[off+}<i>n</i>{@code ]} through
* through {@code buf[off+}<i>n</i>{@code -1]} contain the decompressed
* data. The content of elements {@code buf[off+}<i>n</i>{@code ]} through
* {@code buf[off+}<i>len</i>{@code -1]} is undefined, contrary to the
* specification of the {@link java.io.InputStream InputStream} superclass,
* so an implementation is free to modify these elements during the inflate
@ -131,18 +173,20 @@ public class GZIPInputStream extends InflaterInputStream {
*
* @param buf the buffer into which the data is read
* @param off the start offset in the destination array {@code buf}
* @param len the maximum number of bytes read
* @return the actual number of bytes inflated, or -1 if the end of the
* compressed input stream is reached
* @param len the maximum number of bytes to read into {@code buf}
* @return the actual number of bytes decompressed from a GZIP member, or -1 if the
* end-of-stream is reached
*
* @throws NullPointerException If {@code buf} is {@code null}.
* @throws IndexOutOfBoundsException If {@code off} is negative,
* {@code len} is negative, or {@code len} is greater than
* {@code buf.length - off}
* @throws ZipException if the compressed input data is corrupt.
* @throws IOException if an I/O error has occurred.
* @throws IOException if the stream is closed or an I/O error has occurred.
*
* @see ##gzip_file_format GZIP file format
*/
@Override
public int read(byte[] buf, int off, int len) throws IOException {
ensureOpen();
if (eos) {
@ -165,6 +209,7 @@ public class GZIPInputStream extends InflaterInputStream {
* with the stream.
* @throws IOException if an I/O error has occurred
*/
@Override
public void close() throws IOException {
if (!closed) {
super.close();
@ -173,20 +218,6 @@ public class GZIPInputStream extends InflaterInputStream {
}
}
/**
* GZIP header magic number.
*/
public static final int GZIP_MAGIC = 0x8b1f;
/*
* File header flags.
*/
private static final int FTEXT = 1; // Extra text
private static final int FHCRC = 2; // Header CRC
private static final int FEXTRA = 4; // Extra field
private static final int FNAME = 8; // File name
private static final int FCOMMENT = 16; // File comment
/*
* Reads GZIP member header and returns the total byte number
* of this member header.
@ -309,8 +340,6 @@ public class GZIPInputStream extends InflaterInputStream {
return b;
}
private byte[] tmpbuf = new byte[128];
/*
* Skips bytes of input data blocking until all bytes are skipped.
* Does not assume that the input stream is capable of seeking.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -22,15 +22,21 @@
*/
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.function.Executable;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
/*
* @test
@ -51,7 +57,7 @@ public class BasicGZIPInputStreamTest {
@ParameterizedTest
@MethodSource("npeFromConstructors")
public void testNPEFromConstructors(final Executable constructor) {
Assertions.assertThrows(NullPointerException.class, constructor,
assertThrows(NullPointerException.class, constructor,
"GZIPInputStream constructor did not throw NullPointerException");
}
@ -71,7 +77,7 @@ public class BasicGZIPInputStreamTest {
@ParameterizedTest
@MethodSource("iaeFromConstructors")
public void testIAEFromConstructors(final Executable constructor) {
Assertions.assertThrows(IllegalArgumentException.class, constructor,
assertThrows(IllegalArgumentException.class, constructor,
"GZIPInputStream constructor did not throw IllegalArgumentException");
}
@ -89,7 +95,29 @@ public class BasicGZIPInputStreamTest {
@ParameterizedTest
@MethodSource("ioeFromConstructors")
public void testIOEFromConstructors(final Executable constructor) {
Assertions.assertThrows(IOException.class, constructor,
assertThrows(IOException.class, constructor,
"GZIPInputStream constructor did not throw IOException");
}
/*
* Verifies that GZIPInputStream.read() throws IOException when invoked on a closed
* stream
*/
@Test
void testClosedStreamRead() throws Exception {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (GZIPOutputStream gzos = new GZIPOutputStream(baos)) {
gzos.write(new byte[] {0x42, 0x42}); // GZIP compress these input bytes
}
final byte[] gzipCompressed = baos.toByteArray();
// create the GZIPInputStream to test
final GZIPInputStream in = new GZIPInputStream(new ByteArrayInputStream(gzipCompressed));
in.close();
final IOException ioe = assertThrows(IOException.class, () -> in.read(new byte[1], 0, 1));
final String exMessage = ioe.getMessage();
if (exMessage == null || !exMessage.contains("Stream closed")) {
// unexpected exception message, propagate the original exception
throw ioe;
}
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2010, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -21,81 +21,201 @@
* questions.
*/
/* @test
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import jdk.test.lib.RandomFactory;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
/*
* @test
* @bug 4691425
* @summary Test the read and write of GZIPInput/OutputStream, including
* concatenated .gz inputstream
* @key randomness
* @library /test/lib
* @build jdk.test.lib.RandomFactory
* @run junit ${test.main.class}
*/
class GZIPInputStreamRead {
import java.io.*;
import java.util.*;
import java.util.zip.*;
private static final Random random = RandomFactory.getRandom();
public class GZIPInputStreamRead {
public static void main(String[] args) throws Throwable {
Random rnd = new Random();
for (int i = 1; i < 100; i++) {
int members = rnd.nextInt(10) + 1;
/*
* Generates GZIP content containing multiple members and then verifies
* that using GZIPInputStream to decompress that content generates the correct
* expected decompressed data.
*/
@Test
void testMultipleMembers() throws Exception {
final int numMembers = random.nextInt(10) + 1;
final ByteArrayOutputStream rawUncompressedBaos = new ByteArrayOutputStream();
final ByteArrayOutputStream gzipCompressedBaos = new ByteArrayOutputStream();
// generate GZIP content with multiple members
for (int j = 0; j < numMembers; j++) {
byte[] src = new byte[random.nextInt(8192) + 1];
random.nextBytes(src);
rawUncompressedBaos.write(src);
ByteArrayOutputStream srcBAOS = new ByteArrayOutputStream();
ByteArrayOutputStream dstBAOS = new ByteArrayOutputStream();
for (int j = 0; j < members; j++) {
byte[] src = new byte[rnd.nextInt(8192) + 1];
rnd.nextBytes(src);
srcBAOS.write(src);
try (GZIPOutputStream gzos = new GZIPOutputStream(dstBAOS)) {
gzos.write(src);
}
}
byte[] srcBytes = srcBAOS.toByteArray();
byte[] dstBytes = dstBAOS.toByteArray();
// try different size of buffer to read the
// GZIPInputStream
/* just for fun when running manually
for (int j = 1; j < 10; j++) {
test(srcBytes, dstBytes, j);
}
*/
for (int j = 0; j < 10; j++) {
int readBufSZ = rnd.nextInt(2048) + 1;
test(srcBytes,
dstBytes,
readBufSZ,
512); // the defualt buffer size
test(srcBytes,
dstBytes,
readBufSZ,
rnd.nextInt(4096) + 1);
try (GZIPOutputStream gzos = new GZIPOutputStream(gzipCompressedBaos)) {
gzos.write(src);
}
}
final byte[] uncompressedRawBytes = rawUncompressedBaos.toByteArray();
final byte[] gzipCompressedBytes = gzipCompressedBaos.toByteArray();
// decompress using GZIPInputStream and verify the decompressed output.
// use different input buffer size for GZIPInputStream when running the verification.
for (int j = 0; j < 10; j++) {
final int readBufSZ = random.nextInt(2048) + 1;
verifyDecompressed(uncompressedRawBytes,
gzipCompressedBytes,
readBufSZ,
512); // the default input buffer size
verifyDecompressed(uncompressedRawBytes,
gzipCompressedBytes,
readBufSZ,
random.nextInt(4096) + 1);
}
}
private static void test(byte[] src, byte[] dst,
int readBufSize, int gzisBufSize)
throws Throwable
{
try (ByteArrayInputStream bais = new ByteArrayInputStream(dst);
GZIPInputStream gzis = new GZIPInputStream(bais, gzisBufSize))
{
byte[] result = new byte[src.length + 10];
/*
* Generates GZIP content containing one member followed by some arbitrary non-member data.
* The test then verifies that using GZIPInputStream to decompress that content generates
* the correct expected decompressed data.
*/
@Test
void testNonMemberAfterTrailer() throws Exception {
final byte[] rawUncompressed = new byte[random.nextInt(1234)];
random.nextBytes(rawUncompressed);
final ByteArrayOutputStream gzipCompressedPlusExtra = new ByteArrayOutputStream();
// generate a valid GZIP member
try (GZIPOutputStream gzos = new GZIPOutputStream(gzipCompressedPlusExtra)) {
gzos.write(rawUncompressed); // GZIP compress
}
final int numCompressedBytes = gzipCompressedPlusExtra.size();
// past the GZIP trailer, write some additional bytes that doesn't represent a GZIP member
final byte[] notGZIPMagic = ByteBuffer.allocate(Integer.BYTES).
putInt(GZIPInputStream.GZIP_MAGIC + 42)
.array();
gzipCompressedPlusExtra.write(notGZIPMagic);
assertEquals(numCompressedBytes + notGZIPMagic.length, gzipCompressedPlusExtra.size(),
"unexpected number of compressed + extra bytes");
// now use GZIPInputStream to decompress the compressed plus extra bytes and verify
// that the extra bytes don't cause unexpected decompressed output
final ByteArrayOutputStream decompressedBaos = new ByteArrayOutputStream();
int n = 0;
try (ByteArrayInputStream bais = new ByteArrayInputStream(gzipCompressedPlusExtra.toByteArray());
GZIPInputStream gzipIn = new GZIPInputStream(bais)) {
final byte[] tmpBuf = new byte[42];
while ((n = gzipIn.read(tmpBuf)) != -1) {
decompressedBaos.write(tmpBuf, 0, n);
}
final byte[] decompressed = decompressedBaos.toByteArray();
// verify the decompressed content
assertEquals(rawUncompressed.length, decompressed.length,
"unexpected number of decompressed bytes");
assertArrayEquals(rawUncompressed, decompressed, "unexpected decompressed data");
// make sure additional calls to read still return EOF
assertEquals(-1, gzipIn.read(), "unexpected return from read(), expected EOF");
assertEquals(-1, gzipIn.read(new byte[10]), "unexpected return from read(), expected EOF");
}
}
/*
* Verifies that the InputStream.available() method is invoked on the underlying InputStream
* to determine presence of additional GZIP members in the stream.
*/
@Test
void testInputStreamAvailableCalled() throws Exception {
final byte[] rawUncompressedMember1 = new byte[random.nextInt(111)];
random.nextBytes(rawUncompressedMember1);
System.err.println("GZIP member 1 has " + rawUncompressedMember1.length + " bytes");
final byte[] rawUncompressedMember2 = new byte[random.nextInt(33)];
random.nextBytes(rawUncompressedMember2);
System.err.println("GZIP member 2 has " + rawUncompressedMember2.length + " bytes");
final ByteArrayOutputStream twoMemberGzipCompressedBaos = new ByteArrayOutputStream();
// generate GZIP format data with 2 valid GZIP members
try (GZIPOutputStream gzos = new GZIPOutputStream(twoMemberGzipCompressedBaos)) {
gzos.write(rawUncompressedMember1); // GZIP compress
gzos.write(rawUncompressedMember2); // GZIP compress
}
final byte[] gzipCompressed = twoMemberGzipCompressedBaos.toByteArray();
final AtomicBoolean availableInvoked = new AtomicBoolean();
// an InputStream which tracks the calls to available()
final ByteArrayInputStream underlying = new ByteArrayInputStream(gzipCompressed) {
@Override
public int available() {
availableInvoked.set(true);
return super.available();
}
};
// now use GZIPInputStream to decompress the compressed data and expect the decompressed
// data to be correct and also expect the InputStream.available() to have been invoked
final ByteArrayOutputStream decompressedBaos = new ByteArrayOutputStream();
int n = 0;
try (GZIPInputStream gzipIn = new GZIPInputStream(underlying)) {
final byte[] tmpBuf = new byte[1024];
while ((n = gzipIn.read(tmpBuf)) != -1) {
decompressedBaos.write(tmpBuf, 0, n);
}
assertTrue(availableInvoked.get(), "InputStream.available() wasn't invoked");
final byte[] decompressed = decompressedBaos.toByteArray();
// verify the decompressed content, it should represent the two GZIP members
assertEquals(rawUncompressedMember1.length + rawUncompressedMember2.length,
decompressed.length, "unexpected number of decompressed bytes");
assertArrayEquals(rawUncompressedMember1,
Arrays.copyOfRange(decompressed, 0, rawUncompressedMember1.length),
"unexpected decompressed data of first member");
assertArrayEquals(rawUncompressedMember2,
Arrays.copyOfRange(decompressed, rawUncompressedMember1.length, decompressed.length),
"unexpected decompressed data of second member");
// make sure additional calls to read still return EOF
assertEquals(-1, gzipIn.read(), "unexpected return from read(), expected EOF");
assertEquals(-1, gzipIn.read(new byte[42]), "unexpected return from read(), expected EOF");
}
}
// verify that decompressing the gzipCompressed data using GZIPInputStream
// generates the expected output
private static void verifyDecompressed(final byte[] rawUncompressed,
final byte[] gzipCompressed,
final int readBufSize, final int gzisBufSize)
throws IOException {
try (ByteArrayInputStream bais = new ByteArrayInputStream(gzipCompressed);
GZIPInputStream gzis = new GZIPInputStream(bais, gzisBufSize)) {
byte[] result = new byte[rawUncompressed.length + 10];
byte[] buf = new byte[readBufSize];
int n = 0;
int off = 0;
int numDecompressed = 0;
while ((n = gzis.read(buf, 0, buf.length)) != -1) {
System.arraycopy(buf, 0, result, off, n);
off += n;
System.arraycopy(buf, 0, result, numDecompressed, n);
numDecompressed += n;
// no range check, if overflow, let it fail
}
if (off != src.length || gzis.available() != 0 ||
!Arrays.equals(src, Arrays.copyOf(result, off))) {
throw new RuntimeException(
"GZIPInputStream reading failed! " +
", src.len=" + src.length +
", read=" + off);
}
assertEquals(rawUncompressed.length, numDecompressed,
"unexpected number of decompressed bytes");
assertEquals(0, gzis.available(),
"unexpected additional bytes available in the GZIPInputStream");
assertArrayEquals(rawUncompressed, Arrays.copyOf(result, numDecompressed),
"unexpected decompressed data");
}
}
}