diff --git a/jdk/src/java.base/share/classes/java/nio/file/FileChannelLinesSpliterator.java b/jdk/src/java.base/share/classes/java/nio/file/FileChannelLinesSpliterator.java
new file mode 100644
index 00000000000..dab526e8d2b
--- /dev/null
+++ b/jdk/src/java.base/share/classes/java/nio/file/FileChannelLinesSpliterator.java
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package java.nio.file;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.FileChannel;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.Spliterator;
+import java.util.function.Consumer;
+
+/**
+ * A file-based lines spliterator, leveraging a shared mapped byte buffer and
+ * associated file channel, covering lines of a file for character encodings
+ * where line feed characters can be easily identified from character encoded
+ * bytes.
+ *
+ *
+ * When the root spliterator is first split a mapped byte buffer will be created
+ * over the file for it's size that was observed when the stream was created.
+ * Thus a mapped byte buffer is only required for parallel stream execution.
+ * Sub-spliterators will share that mapped byte buffer. Splitting will use the
+ * mapped byte buffer to find the closest line feed characters(s) to the left or
+ * right of the mid-point of covered range of bytes of the file. If a line feed
+ * is found then the spliterator is split with returned spliterator containing
+ * the identified line feed characters(s) at the end of it's covered range of
+ * bytes.
+ *
+ *
+ * Traversing will create a buffered reader, derived from the file channel, for
+ * the range of bytes of the file. The lines are then read from that buffered
+ * reader. Once traversing commences no further splitting can be performed and
+ * the reference to the mapped byte buffer will be set to null.
+ */
+final class FileChannelLinesSpliterator implements Spliterator {
+
+ static final Set SUPPORTED_CHARSET_NAMES;
+ static {
+ SUPPORTED_CHARSET_NAMES = new HashSet<>();
+ SUPPORTED_CHARSET_NAMES.add(StandardCharsets.UTF_8.name());
+ SUPPORTED_CHARSET_NAMES.add(StandardCharsets.ISO_8859_1.name());
+ SUPPORTED_CHARSET_NAMES.add(StandardCharsets.US_ASCII.name());
+ }
+
+ private final FileChannel fc;
+ private final Charset cs;
+ private int index;
+ private final int fence;
+
+ // Null before first split, non-null when splitting, null when traversing
+ private ByteBuffer buffer;
+ // Non-null when traversing
+ private BufferedReader reader;
+
+ FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence) {
+ this.fc = fc;
+ this.cs = cs;
+ this.index = index;
+ this.fence = fence;
+ }
+
+ private FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence, ByteBuffer buffer) {
+ this.fc = fc;
+ this.buffer = buffer;
+ this.cs = cs;
+ this.index = index;
+ this.fence = fence;
+ }
+
+ @Override
+ public boolean tryAdvance(Consumer super String> action) {
+ String line = readLine();
+ if (line != null) {
+ action.accept(line);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public void forEachRemaining(Consumer super String> action) {
+ String line;
+ while ((line = readLine()) != null) {
+ action.accept(line);
+ }
+ }
+
+ private BufferedReader getBufferedReader() {
+ /**
+ * A readable byte channel that reads bytes from an underlying
+ * file channel over a specified range.
+ */
+ ReadableByteChannel rrbc = new ReadableByteChannel() {
+ @Override
+ public int read(ByteBuffer dst) throws IOException {
+ int bytesToRead = fence - index;
+ if (bytesToRead == 0)
+ return -1;
+
+ int bytesRead;
+ if (bytesToRead < dst.remaining()) {
+ // The number of bytes to read is less than remaining
+ // bytes in the buffer
+ // Snapshot the limit, reduce it, read, then restore
+ int oldLimit = dst.limit();
+ dst.limit(dst.position() + bytesToRead);
+ bytesRead = fc.read(dst, index);
+ dst.limit(oldLimit);
+ } else {
+ bytesRead = fc.read(dst, index);
+ }
+ if (bytesRead == -1) {
+ index = fence;
+ return bytesRead;
+ }
+
+ index += bytesRead;
+ return bytesRead;
+ }
+
+ @Override
+ public boolean isOpen() {
+ return fc.isOpen();
+ }
+
+ @Override
+ public void close() throws IOException {
+ fc.close();
+ }
+ };
+ return new BufferedReader(Channels.newReader(rrbc, cs.newDecoder(), -1));
+ }
+
+ private String readLine() {
+ if (reader == null) {
+ reader = getBufferedReader();
+ buffer = null;
+ }
+
+ try {
+ return reader.readLine();
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+
+ private ByteBuffer getMappedByteBuffer() {
+ // TODO can the mapped byte buffer be explicitly unmapped?
+ // It's possible, via a shared-secret mechanism, when either
+ // 1) the spliterator starts traversing, although traversal can
+ // happen concurrently for mulitple spliterators, so care is
+ // needed in this case; or
+ // 2) when the stream is closed using some shared holder to pass
+ // the mapped byte buffer when it is created.
+ try {
+ return fc.map(FileChannel.MapMode.READ_ONLY, 0, fence);
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+
+ @Override
+ public Spliterator trySplit() {
+ // Cannot split after partial traverse
+ if (reader != null)
+ return null;
+
+ ByteBuffer b;
+ if ((b = buffer) == null) {
+ b = buffer = getMappedByteBuffer();
+ }
+
+ final int hi = fence, lo = index;
+
+ // Check if line separator hits the mid point
+ int mid = (lo + hi) >>> 1;
+ int c = b.get(mid);
+ if (c == '\n') {
+ mid++;
+ } else if (c == '\r') {
+ // Check if a line separator of "\r\n"
+ if (++mid < hi && b.get(mid) == '\n') {
+ mid++;
+ }
+ } else {
+ // TODO give up after a certain distance from the mid point?
+ // Scan to the left and right of the mid point
+ int midL = mid - 1;
+ int midR = mid + 1;
+ mid = 0;
+ while (midL > lo && midR < hi) {
+ // Sample to the left
+ c = b.get(midL--);
+ if (c == '\n' || c == '\r') {
+ // If c is "\r" then no need to check for "\r\n"
+ // since the subsequent value was previously checked
+ mid = midL + 2;
+ break;
+ }
+
+ // Sample to the right
+ c = b.get(midR++);
+ if (c == '\n' || c == '\r') {
+ mid = midR;
+ // Check if line-separator is "\r\n"
+ if (c == '\r' && mid < hi && b.get(mid) == '\n') {
+ mid++;
+ }
+ break;
+ }
+ }
+ }
+
+ // The left spliterator will have the line-separator at the end
+ return (mid > lo && mid < hi)
+ ? new FileChannelLinesSpliterator(fc, cs, lo, index = mid, b)
+ : null;
+ }
+
+ @Override
+ public long estimateSize() {
+ // Use the number of bytes as an estimate.
+ // We could divide by a constant that is the average number of
+ // characters per-line, but that constant will be factored out.
+ return fence - index;
+ }
+
+ @Override
+ public long getExactSizeIfKnown() {
+ return -1;
+ }
+
+ @Override
+ public int characteristics() {
+ return Spliterator.ORDERED | Spliterator.NONNULL;
+ }
+}
diff --git a/jdk/src/java.base/share/classes/java/nio/file/Files.java b/jdk/src/java.base/share/classes/java/nio/file/Files.java
index 7afd40bf12a..079361eb088 100644
--- a/jdk/src/java.base/share/classes/java/nio/file/Files.java
+++ b/jdk/src/java.base/share/classes/java/nio/file/Files.java
@@ -38,6 +38,7 @@ import java.io.Reader;
import java.io.UncheckedIOException;
import java.io.Writer;
import java.nio.channels.Channels;
+import java.nio.channels.FileChannel;
import java.nio.channels.SeekableByteChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
@@ -3735,6 +3736,7 @@ public final class Files {
}
}
+
/**
* Read all lines from a file as a {@code Stream}. Unlike {@link
* #readAllLines(Path, Charset) readAllLines}, this method does not read
@@ -3748,6 +3750,10 @@ public final class Files {
*
The returned stream contains a reference to an open file. The file
* is closed by closing the stream.
*
+ *
The file contents should not be modified during the execution of the
+ * terminal stream operation. Otherwise, the result of the terminal stream
+ * operation is undefined.
+ *
*
After this method returns, then any subsequent I/O exception that
* occurs while reading from the file or when a malformed or unmappable byte
* sequence is read, is wrapped in an {@link UncheckedIOException} that will
@@ -3761,6 +3767,30 @@ public final class Files {
* control structure to ensure that the stream's open file is closed promptly
* after the stream's operations have completed.
*
+ * @implNote
+ * This implementation supports good parallel stream performance for the
+ * standard charsets {@link StandardCharsets#UTF_8 UTF-8},
+ * {@link StandardCharsets#US_ASCII US-ASCII} and
+ * {@link StandardCharsets#ISO_8859_1 ISO-8859-1}. Such
+ * line-optimal charsets have the property that the encoded bytes
+ * of a line feed ('\n') or a carriage return ('\r') are efficiently
+ * identifiable from other encoded characters when randomly accessing the
+ * bytes of the file.
+ *
+ *
For non-line-optimal charsets the stream source's
+ * spliterator has poor splitting properties, similar to that of a
+ * spliterator associated with an iterator or that associated with a stream
+ * returned from {@link BufferedReader#lines()}. Poor splitting properties
+ * can result in poor parallel stream performance.
+ *
+ *
For line-optimal charsets the stream source's spliterator
+ * has good splitting properties, assuming the file contains a regular
+ * sequence of lines. Good splitting properties can result in good parallel
+ * stream performance. The spliterator for a line-optimal charset
+ * takes advantage of the charset properties (a line feed or a carriage
+ * return being efficient identifiable) such that when splitting it can
+ * approximately divide the number of covered lines in half.
+ *
* @param path
* the path to the file
* @param cs
@@ -3781,7 +3811,50 @@ public final class Files {
* @since 1.8
*/
public static Stream lines(Path path, Charset cs) throws IOException {
- BufferedReader br = Files.newBufferedReader(path, cs);
+ // Use the good splitting spliterator if:
+ // 1) the path is associated with the default file system;
+ // 2) the character set is supported; and
+ // 3) the file size is such that all bytes can be indexed by int values
+ // (this limitation is imposed by ByteBuffer)
+ if (path.getFileSystem() == FileSystems.getDefault() &&
+ FileChannelLinesSpliterator.SUPPORTED_CHARSET_NAMES.contains(cs.name())) {
+ FileChannel fc = FileChannel.open(path, StandardOpenOption.READ);
+
+ Stream fcls = createFileChannelLinesStream(fc, cs);
+ if (fcls != null) {
+ return fcls;
+ }
+ fc.close();
+ }
+
+ return createBufferedReaderLinesStream(Files.newBufferedReader(path, cs));
+ }
+
+ private static Stream createFileChannelLinesStream(FileChannel fc, Charset cs) throws IOException {
+ try {
+ // Obtaining the size from the FileChannel is much faster
+ // than obtaining using path.toFile().length()
+ long length = fc.size();
+ if (length <= Integer.MAX_VALUE) {
+ Spliterator s = new FileChannelLinesSpliterator(fc, cs, 0, (int) length);
+ return StreamSupport.stream(s, false)
+ .onClose(Files.asUncheckedRunnable(fc));
+ }
+ } catch (Error|RuntimeException|IOException e) {
+ try {
+ fc.close();
+ } catch (IOException ex) {
+ try {
+ e.addSuppressed(ex);
+ } catch (Throwable ignore) {
+ }
+ }
+ throw e;
+ }
+ return null;
+ }
+
+ private static Stream createBufferedReaderLinesStream(BufferedReader br) {
try {
return br.lines().onClose(asUncheckedRunnable(br));
} catch (Error|RuntimeException e) {
@@ -3790,7 +3863,8 @@ public final class Files {
} catch (IOException ex) {
try {
e.addSuppressed(ex);
- } catch (Throwable ignore) {}
+ } catch (Throwable ignore) {
+ }
}
throw e;
}
@@ -3804,6 +3878,10 @@ public final class Files {
*
The returned stream contains a reference to an open file. The file
* is closed by closing the stream.
*
+ *
The file contents should not be modified during the execution of the
+ * terminal stream operation. Otherwise, the result of the terminal stream
+ * operation is undefined.
+ *
*
This method works as if invoking it were equivalent to evaluating the
* expression:
*
{@code
diff --git a/jdk/test/java/nio/file/Files/StreamLinesTest.java b/jdk/test/java/nio/file/Files/StreamLinesTest.java
new file mode 100644
index 00000000000..f0f218138ba
--- /dev/null
+++ b/jdk/test/java/nio/file/Files/StreamLinesTest.java
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/* @test
+ * @bug 8072773
+ * @library /lib/testlibrary/ ../../../util/stream/bootlib
+ * @build java.util.stream.OpTestCase
+ * @build jdk.testlibrary.RandomFactory
+ * @run testng/othervm StreamLinesTest
+ * @summary Tests streams returned from Files.lines, primarily focused on
+ * testing the file-channel-based stream stream with supported
+ * character sets
+ * @key randomness
+ */
+
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Random;
+import java.util.function.IntFunction;
+import java.util.function.Supplier;
+import java.util.stream.OpTestCase;
+import java.util.stream.Stream;
+import java.util.stream.TestData;
+import jdk.testlibrary.RandomFactory;
+
+public class StreamLinesTest extends OpTestCase {
+
+ enum LineSeparator {
+ NONE(""),
+ N("\n"),
+ R("\r"),
+ RN("\r\n");
+
+ public final String value;
+
+ LineSeparator(String value) {
+ this.value = value;
+ }
+
+ public String toString() {
+ return name();
+ }
+ }
+
+ static Path generateTempFileWithLines(IntFunction lineGenerator,
+ IntFunction lineSeparatorGenerator,
+ int lines, Charset cs, boolean endLineSep) throws IOException {
+ Path p = Files.createTempFile("lines", null);
+ BufferedWriter bw = Files.newBufferedWriter(p, cs);
+
+ for (int i = 0; i < lines - 1; i++) {
+ bw.write(lineGenerator.apply(i));
+ bw.write(lineSeparatorGenerator.apply(i).value);
+ }
+ if (lines > 0) {
+ bw.write(lineGenerator.apply(lines - 1));
+ if (endLineSep)
+ bw.write(lineSeparatorGenerator.apply(lines - 1).value);
+ }
+
+ bw.flush();
+ bw.close();
+ p.toFile().deleteOnExit();
+
+ return p;
+ }
+
+ static void writeLineSeparator(Path p,
+ IntFunction lineSeparatorGenerator,
+ int lines, Charset cs) throws IOException {
+ BufferedWriter bw = Files.newBufferedWriter(p, cs, StandardOpenOption.APPEND);
+ bw.write(lineSeparatorGenerator.apply(lines - 1).value);
+ bw.flush();
+ bw.close();
+ }
+
+ static List readAllLines(Path path, Charset cs) throws IOException {
+ try (BufferedReader reader = Files.newBufferedReader(path, cs)) {
+ List result = new ArrayList<>();
+ for (; ; ) {
+ String line = reader.readLine();
+ if (line == null)
+ break;
+ result.add(line);
+ }
+ return result;
+ }
+ }
+
+ static Object[] of(String description, IntFunction lineGenerator,
+ IntFunction separatorGenerator, int n, Charset cs) {
+ return new Object[]{description, lineGenerator, separatorGenerator, n, cs};
+ }
+
+ private static final Random random = RandomFactory.getRandom();
+
+ @DataProvider
+ public static Object[][] lines() {
+ List