/* * Copyright (c) 2009, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package java.util.zip; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CharacterCodingException; import java.nio.charset.CodingErrorAction; import java.util.Arrays; import jdk.internal.util.ArraysSupport; import sun.nio.cs.UTF_8; /** * Utility class for zipfile name and comment decoding and encoding */ class ZipCoder { private static final jdk.internal.access.JavaLangAccess JLA = jdk.internal.access.SharedSecrets.getJavaLangAccess(); // Encoding/decoding is stateless, so make it singleton. static final UTF8ZipCoder UTF8 = new UTF8ZipCoder(UTF_8.INSTANCE); public static ZipCoder get(Charset charset) { if (charset == UTF_8.INSTANCE) { return UTF8; } return new ZipCoder(charset); } /** * This enum represents the three possible return values for * {@link #compare(String, byte[], int, int, boolean)} when * this method compares a lookup name to a string encoded in the * CEN byte array. */ enum Comparison { /** * The lookup string is exactly equal * to the encoded string. */ EXACT_MATCH, /** * The lookup string and the encoded string differs only * by the encoded string having a trailing '/' character. */ DIRECTORY_MATCH, /** * The lookup string and the encoded string do not match. * (They are neither an exact match or a directory match.) */ NO_MATCH } String toString(byte[] ba, int off, int length) { try { return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString(); } catch (CharacterCodingException x) { throw new IllegalArgumentException(x); } } String toString(byte[] ba, int length) { return toString(ba, 0, length); } String toString(byte[] ba) { return toString(ba, 0, ba.length); } byte[] getBytes(String s) { try { ByteBuffer bb = encoder().encode(CharBuffer.wrap(s)); int pos = bb.position(); int limit = bb.limit(); if (bb.hasArray() && pos == 0 && limit == bb.capacity()) { return bb.array(); } byte[] bytes = new byte[bb.limit() - bb.position()]; bb.get(bytes); return bytes; } catch (CharacterCodingException x) { throw new IllegalArgumentException(x); } } static String toStringUTF8(byte[] ba, int len) { return UTF8.toString(ba, 0, len); } boolean isUTF8() { return false; } // Hash code functions for ZipFile entry names. We generate the hash as-if // we first decoded the byte sequence to a String, then appended '/' if no // trailing slash was found, then called String.hashCode(). This // normalization ensures we can simplify and speed up lookups. // // Does encoding error checking and hashing in a single pass for efficiency. // On an error, this function will throw CharacterCodingException while the // UTF8ZipCoder override will throw IllegalArgumentException, so we declare // throws Exception to keep things simple. int checkedHash(byte[] a, int off, int len) throws Exception { if (len == 0) { return 0; } int h = 0; // cb will be a newly allocated CharBuffer with pos == 0, // arrayOffset == 0, backed by an array. CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, len)); int limit = cb.limit(); char[] decoded = cb.array(); for (int i = 0; i < limit; i++) { h = 31 * h + decoded[i]; } if (limit > 0 && decoded[limit - 1] != '/') { h = 31 * h + '/'; } return h; } // Hash function equivalent of checkedHash for String inputs static int hash(String name) { int hsh = name.hashCode(); int len = name.length(); if (len > 0 && name.charAt(len - 1) != '/') { hsh = hsh * 31 + '/'; } return hsh; } boolean hasTrailingSlash(byte[] a, int end) { byte[] slashBytes = slashBytes(); return end >= slashBytes.length && Arrays.mismatch(a, end - slashBytes.length, end, slashBytes, 0, slashBytes.length) == -1; } private byte[] slashBytes; private final Charset cs; protected CharsetDecoder dec; private CharsetEncoder enc; private ZipCoder(Charset cs) { this.cs = cs; } protected CharsetDecoder decoder() { if (dec == null) { dec = cs.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); } return dec; } private CharsetEncoder encoder() { if (enc == null) { enc = cs.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); } return enc; } // This method produces an array with the bytes that will correspond to a // trailing '/' in the chosen character encoding. // // While in most charsets a trailing slash will be encoded as the byte // value of '/', this does not hold in the general case. E.g., in charsets // such as UTF-16 and UTF-32 it will be represented by a sequence of 2 or 4 // bytes, respectively. private byte[] slashBytes() { if (slashBytes == null) { // Take into account charsets that produce a BOM, e.g., UTF-16 byte[] slash = "/".getBytes(cs); byte[] doubleSlash = "//".getBytes(cs); slashBytes = Arrays.copyOfRange(doubleSlash, slash.length, doubleSlash.length); } return slashBytes; } /** * This method is used by ZipFile.Source.getEntryPos when comparing the * name being looked up to candidate names encoded in the CEN byte * array. * * Since ZipCode.getEntry supports looking up a "dir/" entry by * the name "dir", this method can optionally distinguish an * exact match from a partial "directory match" (where names only * differ by the encoded name having an additional trailing '/') * * The return values of this method are as follows: * * If the lookup name is exactly equal to the encoded string, return * {@link Comparison#EXACT_MATCH}. * * If the parameter {@code matchDirectory} is {@code true} and the * two strings differ only by the encoded string having an extra * trailing '/' character, then return {@link Comparison#DIRECTORY_MATCH}. * * Otherwise, return {@link Comparison#NO_MATCH} * * While a general implementation will need to decode bytes into a * String for comparison, this can be avoided if the String coder * and this ZipCoder are known to encode strings to the same bytes. * * @param str The lookup string to compare with the encoded string. * @param b The byte array holding the encoded string * @param off The offset into the array where the encoded string starts * @param len The length of the encoded string in bytes * @param matchDirectory If {@code true} and the strings do not match exactly, * a directory match will also be tested * */ Comparison compare(String str, byte[] b, int off, int len, boolean matchDirectory) { String decoded = toString(b, off, len); if (decoded.startsWith(str)) { if (decoded.length() == str.length()) { return Comparison.EXACT_MATCH; } else if (matchDirectory && decoded.length() == str.length() + 1 && decoded.endsWith("/") ) { return Comparison.DIRECTORY_MATCH; } } return Comparison.NO_MATCH; } static final class UTF8ZipCoder extends ZipCoder { private UTF8ZipCoder(Charset utf8) { super(utf8); } @Override boolean isUTF8() { return true; } @Override String toString(byte[] ba, int off, int length) { return JLA.newStringUTF8NoRepl(ba, off, length); } @Override byte[] getBytes(String s) { return JLA.getBytesUTF8NoRepl(s); } @Override int checkedHash(byte[] a, int off, int len) throws Exception { if (len == 0) { return 0; } int end = off + len; int asciiLen = JLA.countPositives(a, off, len); if (asciiLen != len) { // Non-ASCII, fall back to decoding a String // We avoid using decoder() here since the UTF8ZipCoder is // shared and that decoder is not thread safe. // We use the JLA.newStringUTF8NoRepl variant to throw // exceptions eagerly when opening ZipFiles return hash(JLA.newStringUTF8NoRepl(a, off, len)); } // T_BOOLEAN to treat the array as unsigned bytes, in line with StringLatin1.hashCode int h = ArraysSupport.vectorizedHashCode(a, off, len, 0, ArraysSupport.T_BOOLEAN); if (a[end - 1] != '/') { h = 31 * h + '/'; } return h; } @Override boolean hasTrailingSlash(byte[] a, int end) { return end > 0 && a[end - 1] == '/'; } @Override Comparison compare(String str, byte[] b, int off, int len, boolean matchDirectory) { try { byte[] encoded = JLA.getBytesNoRepl(str, UTF_8.INSTANCE); int mismatch = Arrays.mismatch(encoded, 0, encoded.length, b, off, off+len); if (mismatch == -1) { return Comparison.EXACT_MATCH; } else if (matchDirectory && len == mismatch + 1 && hasTrailingSlash(b, off + len)) { return Comparison.DIRECTORY_MATCH; } else { return Comparison.NO_MATCH; } } catch (CharacterCodingException e) { return Comparison.NO_MATCH; } } } }