001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.IOException;
022import java.io.UnsupportedEncodingException;
023import java.nio.ByteBuffer;
024import java.nio.channels.SeekableByteChannel;
025import java.nio.charset.Charset;
026import java.nio.charset.CharsetEncoder;
027import java.nio.charset.StandardCharsets;
028import java.nio.file.Files;
029import java.nio.file.Path;
030import java.nio.file.StandardOpenOption;
031import java.util.ArrayList;
032import java.util.Arrays;
033import java.util.Collections;
034import java.util.List;
035
036import org.apache.commons.io.Charsets;
037import org.apache.commons.io.FileSystem;
038import org.apache.commons.io.StandardLineSeparator;
039import org.apache.commons.io.build.AbstractStreamBuilder;
040
041/**
042 * Reads lines in a file reversely (similar to a BufferedReader, but starting at
043 * the last line). Useful for e.g. searching in log files.
044 *
045 * @since 2.2
046 */
047public class ReversedLinesFileReader implements Closeable {
048
049    /**
050     * Builds a new {@link ReversedLinesFileReader} instance.
051     * <p>
052     * For example:
053     * </p>
054     * <pre>{@code
055     * ReversedLinesFileReader r = ReversedLinesFileReader.builder()
056     *   .setPath(path)
057     *   .setBufferSize(4096)
058     *   .setCharset(StandardCharsets.UTF_8)
059     *   .get()}
060     * </pre>
061     * <p>
062     * @since 2.12.0
063     */
064    public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
065
066        public Builder() {
067            setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
068            setBufferSize(DEFAULT_BLOCK_SIZE);
069        }
070
071        /**
072         * Constructs a new instance.
073         *
074         * @throws UnsupportedOperationException if the origin cannot be converted to a Path.
075         */
076        @Override
077        public ReversedLinesFileReader get() throws IOException {
078            return new ReversedLinesFileReader(getOrigin().getPath(), getBufferSize(), getCharset());
079        }
080
081    }
082
083    private class FilePart {
084        private final long no;
085
086        private final byte[] data;
087
088        private byte[] leftOver;
089
090        private int currentLastBytePos;
091
092        /**
093         * Constructs a new instance.
094         *
095         * @param no                     the part number
096         * @param length                 its length
097         * @param leftOverOfLastFilePart remainder
098         * @throws IOException if there is a problem reading the file
099         */
100        private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
101            this.no = no;
102            final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
103            this.data = new byte[dataLength];
104            final long off = (no - 1) * blockSize;
105
106            // read data
107            if (no > 0 /* file not empty */) {
108                channel.position(off);
109                final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
110                if (countRead != length) {
111                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
112                }
113            }
114            // copy left over part into data arr
115            if (leftOverOfLastFilePart != null) {
116                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
117            }
118            this.currentLastBytePos = data.length - 1;
119            this.leftOver = null;
120        }
121
122        /**
123         * Creates the buffer containing any leftover bytes.
124         */
125        private void createLeftOver() {
126            final int lineLengthBytes = currentLastBytePos + 1;
127            if (lineLengthBytes > 0) {
128                // create left over for next block
129                leftOver = Arrays.copyOf(data, lineLengthBytes);
130            } else {
131                leftOver = null;
132            }
133            currentLastBytePos = -1;
134        }
135
136        /**
137         * Finds the new-line sequence and return its length.
138         *
139         * @param data buffer to scan
140         * @param i    start offset in buffer
141         * @return length of newline sequence or 0 if none found
142         */
143        private int getNewLineMatchByteCount(final byte[] data, final int i) {
144            for (final byte[] newLineSequence : newLineSequences) {
145                boolean match = true;
146                for (int j = newLineSequence.length - 1; j >= 0; j--) {
147                    final int k = i + j - (newLineSequence.length - 1);
148                    match &= k >= 0 && data[k] == newLineSequence[j];
149                }
150                if (match) {
151                    return newLineSequence.length;
152                }
153            }
154            return 0;
155        }
156
157        /**
158         * Reads a line.
159         *
160         * @return the line or null
161         */
162        private String readLine() { //NOPMD Bug in PMD
163
164            String line = null;
165            int newLineMatchByteCount;
166
167            final boolean isLastFilePart = no == 1;
168
169            int i = currentLastBytePos;
170            while (i > -1) {
171
172                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
173                    // avoidNewlineSplitBuffer: for all except the last file part we
174                    // take a few bytes to the next file part to avoid splitting of newlines
175                    createLeftOver();
176                    break; // skip last few bytes and leave it to the next file part
177                }
178
179                // --- check for newline ---
180                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
181                    final int lineStart = i + 1;
182                    final int lineLengthBytes = currentLastBytePos - lineStart + 1;
183
184                    if (lineLengthBytes < 0) {
185                        throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
186                    }
187                    final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
188
189                    line = new String(lineData, charset);
190
191                    currentLastBytePos = i - newLineMatchByteCount;
192                    break; // found line
193                }
194
195                // --- move cursor ---
196                i -= byteDecrement;
197
198                // --- end of file part handling ---
199                if (i < 0) {
200                    createLeftOver();
201                    break; // end of file part
202                }
203            }
204
205            // --- last file part handling ---
206            if (isLastFilePart && leftOver != null) {
207                // there will be no line break anymore, this is the first line of the file
208                line = new String(leftOver, charset);
209                leftOver = null;
210            }
211
212            return line;
213        }
214
215        /**
216         * Handles block rollover
217         *
218         * @return the new FilePart or null
219         * @throws IOException if there was a problem reading the file
220         */
221        private FilePart rollOver() throws IOException {
222
223            if (currentLastBytePos > -1) {
224                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
225                        + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
226            }
227
228            if (no > 1) {
229                return new FilePart(no - 1, blockSize, leftOver);
230            }
231            // NO 1 was the last FilePart, we're finished
232            if (leftOver != null) {
233                throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
234                        + new String(leftOver, charset));
235            }
236            return null;
237        }
238    }
239
240    private static final String EMPTY_STRING = "";
241
242    private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
243
244    /**
245     * Constructs a new {@link Builder}.
246     *
247     * @return a new {@link Builder}.
248     * @since 2.12.0
249     */
250    public static Builder builder() {
251        return new Builder();
252    }
253
254    private final int blockSize;
255    private final Charset charset;
256    private final SeekableByteChannel channel;
257    private final long totalByteLength;
258    private final long totalBlockCount;
259    private final byte[][] newLineSequences;
260    private final int avoidNewlineSplitBufferSize;
261    private final int byteDecrement;
262    private FilePart currentFilePart;
263    private boolean trailingNewlineOfFileSkipped;
264
265    /**
266     * Creates a ReversedLinesFileReader with default block size of 4KB and the
267     * platform's default encoding.
268     *
269     * @param file the file to be read
270     * @throws IOException if an I/O error occurs.
271     * @deprecated Use {@link ReaderInputStream#builder()} instead
272     */
273    @Deprecated
274    public ReversedLinesFileReader(final File file) throws IOException {
275        this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
276    }
277
278    /**
279     * Creates a ReversedLinesFileReader with default block size of 4KB and the
280     * specified encoding.
281     *
282     * @param file    the file to be read
283     * @param charset the charset to use, null uses the default Charset.
284     * @throws IOException if an I/O error occurs.
285     * @since 2.5
286     * @deprecated Use {@link ReaderInputStream#builder()} instead
287     */
288    @Deprecated
289    public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
290        this(file.toPath(), charset);
291    }
292
293    /**
294     * Creates a ReversedLinesFileReader with the given block size and encoding.
295     *
296     * @param file      the file to be read
297     * @param blockSize size of the internal buffer (for ideal performance this
298     *                  should match with the block size of the underlying file
299     *                  system).
300     * @param charset  the encoding of the file, null uses the default Charset.
301     * @throws IOException if an I/O error occurs.
302     * @since 2.3
303     * @deprecated Use {@link ReaderInputStream#builder()} instead
304     */
305    @Deprecated
306    public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
307        this(file.toPath(), blockSize, charset);
308    }
309
310    /**
311     * Creates a ReversedLinesFileReader with the given block size and encoding.
312     *
313     * @param file      the file to be read
314     * @param blockSize size of the internal buffer (for ideal performance this
315     *                  should match with the block size of the underlying file
316     *                  system).
317     * @param charsetName  the encoding of the file, null uses the default Charset.
318     * @throws IOException                                  if an I/O error occurs
319     * @throws java.nio.charset.UnsupportedCharsetException thrown instead of
320     *                                                      {@link UnsupportedEncodingException}
321     *                                                      in version 2.2 if the
322     *                                                      encoding is not
323     *                                                      supported.
324     * @deprecated Use {@link ReaderInputStream#builder()} instead
325     */
326    @Deprecated
327    public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
328        this(file.toPath(), blockSize, charsetName);
329    }
330
331    /**
332     * Creates a ReversedLinesFileReader with default block size of 4KB and the
333     * specified encoding.
334     *
335     * @param file    the file to be read
336     * @param charset the charset to use, null uses the default Charset.
337     * @throws IOException if an I/O error occurs.
338     * @since 2.7
339     * @deprecated Use {@link ReaderInputStream#builder()} instead
340     */
341    @Deprecated
342    public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
343        this(file, DEFAULT_BLOCK_SIZE, charset);
344    }
345
346    /**
347     * Creates a ReversedLinesFileReader with the given block size and encoding.
348     *
349     * @param file      the file to be read
350     * @param blockSize size of the internal buffer (for ideal performance this
351     *                  should match with the block size of the underlying file
352     *                  system).
353     * @param charset  the encoding of the file, null uses the default Charset.
354     * @throws IOException if an I/O error occurs.
355     * @since 2.7
356     * @deprecated Use {@link ReaderInputStream#builder()} instead
357     */
358    @Deprecated
359    public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
360        this.blockSize = blockSize;
361        this.charset = Charsets.toCharset(charset);
362
363        // --- check & prepare encoding ---
364        final CharsetEncoder charsetEncoder = this.charset.newEncoder();
365        final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
366        if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
367            // all one byte encodings are no problem
368            byteDecrement = 1;
369        } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
370        // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
371                this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
372                this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
373                this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
374                this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
375            byteDecrement = 1;
376        } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
377            // UTF-16 new line sequences are not allowed as second tuple of four byte
378            // sequences,
379            // however byte order has to be specified
380            byteDecrement = 2;
381        } else if (this.charset == StandardCharsets.UTF_16) {
382            throw new UnsupportedEncodingException(
383                    "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
384        } else {
385            throw new UnsupportedEncodingException(
386                    "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
387        }
388
389        // NOTE: The new line sequences are matched in the order given, so it is
390        // important that \r\n is BEFORE \n
391        this.newLineSequences = new byte[][] {
392            StandardLineSeparator.CRLF.getBytes(this.charset),
393            StandardLineSeparator.LF.getBytes(this.charset),
394            StandardLineSeparator.CR.getBytes(this.charset)
395        };
396
397        this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
398
399        // Open file
400        this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
401        this.totalByteLength = channel.size();
402        int lastBlockLength = (int) (this.totalByteLength % blockSize);
403        if (lastBlockLength > 0) {
404            this.totalBlockCount = this.totalByteLength / blockSize + 1;
405        } else {
406            this.totalBlockCount = this.totalByteLength / blockSize;
407            if (this.totalByteLength > 0) {
408                lastBlockLength = blockSize;
409            }
410        }
411        this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
412
413    }
414
415    /**
416     * Creates a ReversedLinesFileReader with the given block size and encoding.
417     *
418     * @param file        the file to be read
419     * @param blockSize   size of the internal buffer (for ideal performance this
420     *                    should match with the block size of the underlying file
421     *                    system).
422     * @param charsetName the encoding of the file, null uses the default Charset.
423     * @throws IOException                                  if an I/O error occurs
424     * @throws java.nio.charset.UnsupportedCharsetException thrown instead of
425     *                                                      {@link UnsupportedEncodingException}
426     *                                                      in version 2.2 if the
427     *                                                      encoding is not
428     *                                                      supported.
429     * @since 2.7
430     * @deprecated Use {@link ReaderInputStream#builder()} instead
431     */
432    @Deprecated
433    public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
434        this(file, blockSize, Charsets.toCharset(charsetName));
435    }
436
437    /**
438     * Closes underlying resources.
439     *
440     * @throws IOException if an I/O error occurs.
441     */
442    @Override
443    public void close() throws IOException {
444        channel.close();
445    }
446
447    /**
448     * Returns the lines of the file from bottom to top.
449     *
450     * @return the next line or null if the start of the file is reached
451     * @throws IOException if an I/O error occurs.
452     */
453    public String readLine() throws IOException {
454
455        String line = currentFilePart.readLine();
456        while (line == null) {
457            currentFilePart = currentFilePart.rollOver();
458            if (currentFilePart == null) {
459                // no more FileParts: we're done, leave line set to null
460                break;
461            }
462            line = currentFilePart.readLine();
463        }
464
465        // aligned behavior with BufferedReader that doesn't return a last, empty line
466        if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
467            trailingNewlineOfFileSkipped = true;
468            line = readLine();
469        }
470
471        return line;
472    }
473
474    /**
475     * Returns {@code lineCount} lines of the file from bottom to top.
476     * <p>
477     * If there are less than {@code lineCount} lines in the file, then that's what
478     * you get.
479     * </p>
480     * <p>
481     * Note: You can easily flip the result with {@link Collections#reverse(List)}.
482     * </p>
483     *
484     * @param lineCount How many lines to read.
485     * @return A new list
486     * @throws IOException if an I/O error occurs.
487     * @since 2.8.0
488     */
489    public List<String> readLines(final int lineCount) throws IOException {
490        if (lineCount < 0) {
491            throw new IllegalArgumentException("lineCount < 0");
492        }
493        final ArrayList<String> arrayList = new ArrayList<>(lineCount);
494        for (int i = 0; i < lineCount; i++) {
495            final String line = readLine();
496            if (line == null) {
497                return arrayList;
498            }
499            arrayList.add(line);
500        }
501        return arrayList;
502    }
503
504    /**
505     * Returns the last {@code lineCount} lines of the file.
506     * <p>
507     * If there are less than {@code lineCount} lines in the file, then that's what
508     * you get.
509     * </p>
510     *
511     * @param lineCount How many lines to read.
512     * @return A String.
513     * @throws IOException if an I/O error occurs.
514     * @since 2.8.0
515     */
516    public String toString(final int lineCount) throws IOException {
517        final List<String> lines = readLines(lineCount);
518        Collections.reverse(lines);
519        return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
520    }
521
522}