001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.IOException; 022import java.io.UnsupportedEncodingException; 023import java.nio.ByteBuffer; 024import java.nio.channels.SeekableByteChannel; 025import java.nio.charset.Charset; 026import java.nio.charset.CharsetEncoder; 027import java.nio.charset.StandardCharsets; 028import java.nio.file.Files; 029import java.nio.file.Path; 030import java.nio.file.StandardOpenOption; 031import java.util.ArrayList; 032import java.util.Arrays; 033import java.util.Collections; 034import java.util.List; 035 036import org.apache.commons.io.Charsets; 037import org.apache.commons.io.FileSystem; 038import org.apache.commons.io.StandardLineSeparator; 039import org.apache.commons.io.build.AbstractStreamBuilder; 040 041/** 042 * Reads lines in a file reversely (similar to a BufferedReader, but starting at 043 * the last line). Useful for e.g. searching in log files. 044 * 045 * @since 2.2 046 */ 047public class ReversedLinesFileReader implements Closeable { 048 049 /** 050 * Builds a new {@link ReversedLinesFileReader} instance. 051 * <p> 052 * For example: 053 * </p> 054 * <pre>{@code 055 * ReversedLinesFileReader r = ReversedLinesFileReader.builder() 056 * .setPath(path) 057 * .setBufferSize(4096) 058 * .setCharset(StandardCharsets.UTF_8) 059 * .get()} 060 * </pre> 061 * <p> 062 * @since 2.12.0 063 */ 064 public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> { 065 066 public Builder() { 067 setBufferSizeDefault(DEFAULT_BLOCK_SIZE); 068 setBufferSize(DEFAULT_BLOCK_SIZE); 069 } 070 071 /** 072 * Constructs a new instance. 073 * 074 * @throws UnsupportedOperationException if the origin cannot be converted to a Path. 075 */ 076 @Override 077 public ReversedLinesFileReader get() throws IOException { 078 return new ReversedLinesFileReader(getOrigin().getPath(), getBufferSize(), getCharset()); 079 } 080 081 } 082 083 private class FilePart { 084 private final long no; 085 086 private final byte[] data; 087 088 private byte[] leftOver; 089 090 private int currentLastBytePos; 091 092 /** 093 * Constructs a new instance. 094 * 095 * @param no the part number 096 * @param length its length 097 * @param leftOverOfLastFilePart remainder 098 * @throws IOException if there is a problem reading the file 099 */ 100 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 101 this.no = no; 102 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 103 this.data = new byte[dataLength]; 104 final long off = (no - 1) * blockSize; 105 106 // read data 107 if (no > 0 /* file not empty */) { 108 channel.position(off); 109 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length)); 110 if (countRead != length) { 111 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 112 } 113 } 114 // copy left over part into data arr 115 if (leftOverOfLastFilePart != null) { 116 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 117 } 118 this.currentLastBytePos = data.length - 1; 119 this.leftOver = null; 120 } 121 122 /** 123 * Creates the buffer containing any leftover bytes. 124 */ 125 private void createLeftOver() { 126 final int lineLengthBytes = currentLastBytePos + 1; 127 if (lineLengthBytes > 0) { 128 // create left over for next block 129 leftOver = Arrays.copyOf(data, lineLengthBytes); 130 } else { 131 leftOver = null; 132 } 133 currentLastBytePos = -1; 134 } 135 136 /** 137 * Finds the new-line sequence and return its length. 138 * 139 * @param data buffer to scan 140 * @param i start offset in buffer 141 * @return length of newline sequence or 0 if none found 142 */ 143 private int getNewLineMatchByteCount(final byte[] data, final int i) { 144 for (final byte[] newLineSequence : newLineSequences) { 145 boolean match = true; 146 for (int j = newLineSequence.length - 1; j >= 0; j--) { 147 final int k = i + j - (newLineSequence.length - 1); 148 match &= k >= 0 && data[k] == newLineSequence[j]; 149 } 150 if (match) { 151 return newLineSequence.length; 152 } 153 } 154 return 0; 155 } 156 157 /** 158 * Reads a line. 159 * 160 * @return the line or null 161 */ 162 private String readLine() { //NOPMD Bug in PMD 163 164 String line = null; 165 int newLineMatchByteCount; 166 167 final boolean isLastFilePart = no == 1; 168 169 int i = currentLastBytePos; 170 while (i > -1) { 171 172 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 173 // avoidNewlineSplitBuffer: for all except the last file part we 174 // take a few bytes to the next file part to avoid splitting of newlines 175 createLeftOver(); 176 break; // skip last few bytes and leave it to the next file part 177 } 178 179 // --- check for newline --- 180 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 181 final int lineStart = i + 1; 182 final int lineLengthBytes = currentLastBytePos - lineStart + 1; 183 184 if (lineLengthBytes < 0) { 185 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes); 186 } 187 final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes); 188 189 line = new String(lineData, charset); 190 191 currentLastBytePos = i - newLineMatchByteCount; 192 break; // found line 193 } 194 195 // --- move cursor --- 196 i -= byteDecrement; 197 198 // --- end of file part handling --- 199 if (i < 0) { 200 createLeftOver(); 201 break; // end of file part 202 } 203 } 204 205 // --- last file part handling --- 206 if (isLastFilePart && leftOver != null) { 207 // there will be no line break anymore, this is the first line of the file 208 line = new String(leftOver, charset); 209 leftOver = null; 210 } 211 212 return line; 213 } 214 215 /** 216 * Handles block rollover 217 * 218 * @return the new FilePart or null 219 * @throws IOException if there was a problem reading the file 220 */ 221 private FilePart rollOver() throws IOException { 222 223 if (currentLastBytePos > -1) { 224 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 225 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 226 } 227 228 if (no > 1) { 229 return new FilePart(no - 1, blockSize, leftOver); 230 } 231 // NO 1 was the last FilePart, we're finished 232 if (leftOver != null) { 233 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 234 + new String(leftOver, charset)); 235 } 236 return null; 237 } 238 } 239 240 private static final String EMPTY_STRING = ""; 241 242 private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize(); 243 244 /** 245 * Constructs a new {@link Builder}. 246 * 247 * @return a new {@link Builder}. 248 * @since 2.12.0 249 */ 250 public static Builder builder() { 251 return new Builder(); 252 } 253 254 private final int blockSize; 255 private final Charset charset; 256 private final SeekableByteChannel channel; 257 private final long totalByteLength; 258 private final long totalBlockCount; 259 private final byte[][] newLineSequences; 260 private final int avoidNewlineSplitBufferSize; 261 private final int byteDecrement; 262 private FilePart currentFilePart; 263 private boolean trailingNewlineOfFileSkipped; 264 265 /** 266 * Creates a ReversedLinesFileReader with default block size of 4KB and the 267 * platform's default encoding. 268 * 269 * @param file the file to be read 270 * @throws IOException if an I/O error occurs. 271 * @deprecated Use {@link ReaderInputStream#builder()} instead 272 */ 273 @Deprecated 274 public ReversedLinesFileReader(final File file) throws IOException { 275 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset()); 276 } 277 278 /** 279 * Creates a ReversedLinesFileReader with default block size of 4KB and the 280 * specified encoding. 281 * 282 * @param file the file to be read 283 * @param charset the charset to use, null uses the default Charset. 284 * @throws IOException if an I/O error occurs. 285 * @since 2.5 286 * @deprecated Use {@link ReaderInputStream#builder()} instead 287 */ 288 @Deprecated 289 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException { 290 this(file.toPath(), charset); 291 } 292 293 /** 294 * Creates a ReversedLinesFileReader with the given block size and encoding. 295 * 296 * @param file the file to be read 297 * @param blockSize size of the internal buffer (for ideal performance this 298 * should match with the block size of the underlying file 299 * system). 300 * @param charset the encoding of the file, null uses the default Charset. 301 * @throws IOException if an I/O error occurs. 302 * @since 2.3 303 * @deprecated Use {@link ReaderInputStream#builder()} instead 304 */ 305 @Deprecated 306 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException { 307 this(file.toPath(), blockSize, charset); 308 } 309 310 /** 311 * Creates a ReversedLinesFileReader with the given block size and encoding. 312 * 313 * @param file the file to be read 314 * @param blockSize size of the internal buffer (for ideal performance this 315 * should match with the block size of the underlying file 316 * system). 317 * @param charsetName the encoding of the file, null uses the default Charset. 318 * @throws IOException if an I/O error occurs 319 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of 320 * {@link UnsupportedEncodingException} 321 * in version 2.2 if the 322 * encoding is not 323 * supported. 324 * @deprecated Use {@link ReaderInputStream#builder()} instead 325 */ 326 @Deprecated 327 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException { 328 this(file.toPath(), blockSize, charsetName); 329 } 330 331 /** 332 * Creates a ReversedLinesFileReader with default block size of 4KB and the 333 * specified encoding. 334 * 335 * @param file the file to be read 336 * @param charset the charset to use, null uses the default Charset. 337 * @throws IOException if an I/O error occurs. 338 * @since 2.7 339 * @deprecated Use {@link ReaderInputStream#builder()} instead 340 */ 341 @Deprecated 342 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException { 343 this(file, DEFAULT_BLOCK_SIZE, charset); 344 } 345 346 /** 347 * Creates a ReversedLinesFileReader with the given block size and encoding. 348 * 349 * @param file the file to be read 350 * @param blockSize size of the internal buffer (for ideal performance this 351 * should match with the block size of the underlying file 352 * system). 353 * @param charset the encoding of the file, null uses the default Charset. 354 * @throws IOException if an I/O error occurs. 355 * @since 2.7 356 * @deprecated Use {@link ReaderInputStream#builder()} instead 357 */ 358 @Deprecated 359 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException { 360 this.blockSize = blockSize; 361 this.charset = Charsets.toCharset(charset); 362 363 // --- check & prepare encoding --- 364 final CharsetEncoder charsetEncoder = this.charset.newEncoder(); 365 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 366 if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) { 367 // all one byte encodings are no problem 368 byteDecrement = 1; 369 } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 370 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 371 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) 372 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) 373 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) 374 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) 375 byteDecrement = 1; 376 } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) { 377 // UTF-16 new line sequences are not allowed as second tuple of four byte 378 // sequences, 379 // however byte order has to be specified 380 byteDecrement = 2; 381 } else if (this.charset == StandardCharsets.UTF_16) { 382 throw new UnsupportedEncodingException( 383 "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)"); 384 } else { 385 throw new UnsupportedEncodingException( 386 "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)"); 387 } 388 389 // NOTE: The new line sequences are matched in the order given, so it is 390 // important that \r\n is BEFORE \n 391 this.newLineSequences = new byte[][] { 392 StandardLineSeparator.CRLF.getBytes(this.charset), 393 StandardLineSeparator.LF.getBytes(this.charset), 394 StandardLineSeparator.CR.getBytes(this.charset) 395 }; 396 397 this.avoidNewlineSplitBufferSize = newLineSequences[0].length; 398 399 // Open file 400 this.channel = Files.newByteChannel(file, StandardOpenOption.READ); 401 this.totalByteLength = channel.size(); 402 int lastBlockLength = (int) (this.totalByteLength % blockSize); 403 if (lastBlockLength > 0) { 404 this.totalBlockCount = this.totalByteLength / blockSize + 1; 405 } else { 406 this.totalBlockCount = this.totalByteLength / blockSize; 407 if (this.totalByteLength > 0) { 408 lastBlockLength = blockSize; 409 } 410 } 411 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 412 413 } 414 415 /** 416 * Creates a ReversedLinesFileReader with the given block size and encoding. 417 * 418 * @param file the file to be read 419 * @param blockSize size of the internal buffer (for ideal performance this 420 * should match with the block size of the underlying file 421 * system). 422 * @param charsetName the encoding of the file, null uses the default Charset. 423 * @throws IOException if an I/O error occurs 424 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of 425 * {@link UnsupportedEncodingException} 426 * in version 2.2 if the 427 * encoding is not 428 * supported. 429 * @since 2.7 430 * @deprecated Use {@link ReaderInputStream#builder()} instead 431 */ 432 @Deprecated 433 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException { 434 this(file, blockSize, Charsets.toCharset(charsetName)); 435 } 436 437 /** 438 * Closes underlying resources. 439 * 440 * @throws IOException if an I/O error occurs. 441 */ 442 @Override 443 public void close() throws IOException { 444 channel.close(); 445 } 446 447 /** 448 * Returns the lines of the file from bottom to top. 449 * 450 * @return the next line or null if the start of the file is reached 451 * @throws IOException if an I/O error occurs. 452 */ 453 public String readLine() throws IOException { 454 455 String line = currentFilePart.readLine(); 456 while (line == null) { 457 currentFilePart = currentFilePart.rollOver(); 458 if (currentFilePart == null) { 459 // no more FileParts: we're done, leave line set to null 460 break; 461 } 462 line = currentFilePart.readLine(); 463 } 464 465 // aligned behavior with BufferedReader that doesn't return a last, empty line 466 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) { 467 trailingNewlineOfFileSkipped = true; 468 line = readLine(); 469 } 470 471 return line; 472 } 473 474 /** 475 * Returns {@code lineCount} lines of the file from bottom to top. 476 * <p> 477 * If there are less than {@code lineCount} lines in the file, then that's what 478 * you get. 479 * </p> 480 * <p> 481 * Note: You can easily flip the result with {@link Collections#reverse(List)}. 482 * </p> 483 * 484 * @param lineCount How many lines to read. 485 * @return A new list 486 * @throws IOException if an I/O error occurs. 487 * @since 2.8.0 488 */ 489 public List<String> readLines(final int lineCount) throws IOException { 490 if (lineCount < 0) { 491 throw new IllegalArgumentException("lineCount < 0"); 492 } 493 final ArrayList<String> arrayList = new ArrayList<>(lineCount); 494 for (int i = 0; i < lineCount; i++) { 495 final String line = readLine(); 496 if (line == null) { 497 return arrayList; 498 } 499 arrayList.add(line); 500 } 501 return arrayList; 502 } 503 504 /** 505 * Returns the last {@code lineCount} lines of the file. 506 * <p> 507 * If there are less than {@code lineCount} lines in the file, then that's what 508 * you get. 509 * </p> 510 * 511 * @param lineCount How many lines to read. 512 * @return A String. 513 * @throws IOException if an I/O error occurs. 514 * @since 2.8.0 515 */ 516 public String toString(final int lineCount) throws IOException { 517 final List<String> lines = readLines(lineCount); 518 Collections.reverse(lines); 519 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator(); 520 } 521 522}