001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.IOException; 022import java.io.UnsupportedEncodingException; 023import java.nio.ByteBuffer; 024import java.nio.channels.SeekableByteChannel; 025import java.nio.charset.Charset; 026import java.nio.charset.CharsetEncoder; 027import java.nio.charset.StandardCharsets; 028import java.nio.file.Files; 029import java.nio.file.Path; 030import java.nio.file.StandardOpenOption; 031import java.util.ArrayList; 032import java.util.Collections; 033import java.util.List; 034 035import org.apache.commons.io.Charsets; 036import org.apache.commons.io.IOUtils; 037 038/** 039 * Reads lines in a file reversely (similar to a BufferedReader, but starting at 040 * the last line). Useful for e.g. searching in log files. 041 * 042 * @since 2.2 043 */ 044public class ReversedLinesFileReader implements Closeable { 045 046 private class FilePart { 047 private final long no; 048 049 private final byte[] data; 050 051 private byte[] leftOver; 052 053 private int currentLastBytePos; 054 055 /** 056 * ctor 057 * 058 * @param no the part number 059 * @param length its length 060 * @param leftOverOfLastFilePart remainder 061 * @throws IOException if there is a problem reading the file 062 */ 063 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 064 this.no = no; 065 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 066 this.data = new byte[dataLength]; 067 final long off = (no - 1) * blockSize; 068 069 // read data 070 if (no > 0 /* file not empty */) { 071 channel.position(off); 072 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length)); 073 if (countRead != length) { 074 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 075 } 076 } 077 // copy left over part into data arr 078 if (leftOverOfLastFilePart != null) { 079 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 080 } 081 this.currentLastBytePos = data.length - 1; 082 this.leftOver = null; 083 } 084 085 /** 086 * Creates the buffer containing any left over bytes. 087 */ 088 private void createLeftOver() { 089 final int lineLengthBytes = currentLastBytePos + 1; 090 if (lineLengthBytes > 0) { 091 // create left over for next block 092 leftOver = new byte[lineLengthBytes]; 093 System.arraycopy(data, 0, leftOver, 0, lineLengthBytes); 094 } else { 095 leftOver = null; 096 } 097 currentLastBytePos = -1; 098 } 099 100 /** 101 * Finds the new-line sequence and return its length. 102 * 103 * @param data buffer to scan 104 * @param i start offset in buffer 105 * @return length of newline sequence or 0 if none found 106 */ 107 private int getNewLineMatchByteCount(final byte[] data, final int i) { 108 for (final byte[] newLineSequence : newLineSequences) { 109 boolean match = true; 110 for (int j = newLineSequence.length - 1; j >= 0; j--) { 111 final int k = i + j - (newLineSequence.length - 1); 112 match &= k >= 0 && data[k] == newLineSequence[j]; 113 } 114 if (match) { 115 return newLineSequence.length; 116 } 117 } 118 return 0; 119 } 120 121 /** 122 * Reads a line. 123 * 124 * @return the line or null 125 * @throws IOException if there is an error reading from the file 126 */ 127 private String readLine() throws IOException { 128 129 String line = null; 130 int newLineMatchByteCount; 131 132 final boolean isLastFilePart = no == 1; 133 134 int i = currentLastBytePos; 135 while (i > -1) { 136 137 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 138 // avoidNewlineSplitBuffer: for all except the last file part we 139 // take a few bytes to the next file part to avoid splitting of newlines 140 createLeftOver(); 141 break; // skip last few bytes and leave it to the next file part 142 } 143 144 // --- check for newline --- 145 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 146 final int lineStart = i + 1; 147 final int lineLengthBytes = currentLastBytePos - lineStart + 1; 148 149 if (lineLengthBytes < 0) { 150 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes); 151 } 152 final byte[] lineData = new byte[lineLengthBytes]; 153 System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes); 154 155 line = new String(lineData, encoding); 156 157 currentLastBytePos = i - newLineMatchByteCount; 158 break; // found line 159 } 160 161 // --- move cursor --- 162 i -= byteDecrement; 163 164 // --- end of file part handling --- 165 if (i < 0) { 166 createLeftOver(); 167 break; // end of file part 168 } 169 } 170 171 // --- last file part handling --- 172 if (isLastFilePart && leftOver != null) { 173 // there will be no line break anymore, this is the first line of the file 174 line = new String(leftOver, encoding); 175 leftOver = null; 176 } 177 178 return line; 179 } 180 181 /** 182 * Handles block rollover 183 * 184 * @return the new FilePart or null 185 * @throws IOException if there was a problem reading the file 186 */ 187 private FilePart rollOver() throws IOException { 188 189 if (currentLastBytePos > -1) { 190 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 191 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 192 } 193 194 if (no > 1) { 195 return new FilePart(no - 1, blockSize, leftOver); 196 } 197 // NO 1 was the last FilePart, we're finished 198 if (leftOver != null) { 199 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 200 + new String(leftOver, encoding)); 201 } 202 return null; 203 } 204 } 205 206 private static final String EMPTY_STRING = ""; 207 private static final int DEFAULT_BLOCK_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 208 209 private final int blockSize; 210 private final Charset encoding; 211 private final SeekableByteChannel channel; 212 private final long totalByteLength; 213 private final long totalBlockCount; 214 private final byte[][] newLineSequences; 215 private final int avoidNewlineSplitBufferSize; 216 private final int byteDecrement; 217 private FilePart currentFilePart; 218 private boolean trailingNewlineOfFileSkipped; 219 220 /** 221 * Creates a ReversedLinesFileReader with default block size of 4KB and the 222 * platform's default encoding. 223 * 224 * @param file the file to be read 225 * @throws IOException if an I/O error occurs 226 * @deprecated 2.5 use {@link #ReversedLinesFileReader(File, Charset)} instead 227 */ 228 @Deprecated 229 public ReversedLinesFileReader(final File file) throws IOException { 230 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset()); 231 } 232 233 /** 234 * Creates a ReversedLinesFileReader with default block size of 4KB and the 235 * specified encoding. 236 * 237 * @param file the file to be read 238 * @param charset the charset to use, null uses the default Charset. 239 * @throws IOException if an I/O error occurs 240 * @since 2.5 241 */ 242 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException { 243 this(file.toPath(), charset); 244 } 245 246 /** 247 * Creates a ReversedLinesFileReader with the given block size and encoding. 248 * 249 * @param file the file to be read 250 * @param blockSize size of the internal buffer (for ideal performance this 251 * should match with the block size of the underlying file 252 * system). 253 * @param charset the encoding of the file, null uses the default Charset. 254 * @throws IOException if an I/O error occurs 255 * @since 2.3 256 */ 257 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException { 258 this(file.toPath(), blockSize, charset); 259 } 260 261 /** 262 * Creates a ReversedLinesFileReader with the given block size and encoding. 263 * 264 * @param file the file to be read 265 * @param blockSize size of the internal buffer (for ideal performance this 266 * should match with the block size of the underlying file 267 * system). 268 * @param charsetName the encoding of the file, null uses the default Charset. 269 * @throws IOException if an I/O error occurs 270 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of 271 * {@link UnsupportedEncodingException} 272 * in version 2.2 if the 273 * encoding is not 274 * supported. 275 */ 276 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException { 277 this(file.toPath(), blockSize, charsetName); 278 } 279 280 /** 281 * Creates a ReversedLinesFileReader with default block size of 4KB and the 282 * specified encoding. 283 * 284 * @param file the file to be read 285 * @param charset the charset to use, null uses the default Charset. 286 * @throws IOException if an I/O error occurs 287 * @since 2.7 288 */ 289 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException { 290 this(file, DEFAULT_BLOCK_SIZE, charset); 291 } 292 293 /** 294 * Creates a ReversedLinesFileReader with the given block size and encoding. 295 * 296 * @param file the file to be read 297 * @param blockSize size of the internal buffer (for ideal performance this 298 * should match with the block size of the underlying file 299 * system). 300 * @param charset the encoding of the file, null uses the default Charset. 301 * @throws IOException if an I/O error occurs 302 * @since 2.7 303 */ 304 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException { 305 this.blockSize = blockSize; 306 this.encoding = Charsets.toCharset(charset); 307 308 // --- check & prepare encoding --- 309 final CharsetEncoder charsetEncoder = this.encoding.newEncoder(); 310 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 311 if (maxBytesPerChar == 1f) { 312 // all one byte encodings are no problem 313 byteDecrement = 1; 314 } else if (this.encoding == StandardCharsets.UTF_8) { 315 // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte 316 // can never be a newline byte 317 // http://en.wikipedia.org/wiki/UTF-8 318 byteDecrement = 1; 319 } else if (this.encoding == Charset.forName("Shift_JIS") || // Same as for UTF-8 320 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 321 this.encoding == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) 322 this.encoding == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) 323 this.encoding == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) 324 this.encoding == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) 325 byteDecrement = 1; 326 } else if (this.encoding == StandardCharsets.UTF_16BE || this.encoding == StandardCharsets.UTF_16LE) { 327 // UTF-16 new line sequences are not allowed as second tuple of four byte 328 // sequences, 329 // however byte order has to be specified 330 byteDecrement = 2; 331 } else if (this.encoding == StandardCharsets.UTF_16) { 332 throw new UnsupportedEncodingException( 333 "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)"); 334 } else { 335 throw new UnsupportedEncodingException( 336 "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)"); 337 } 338 339 // NOTE: The new line sequences are matched in the order given, so it is 340 // important that \r\n is BEFORE \n 341 this.newLineSequences = new byte[][] { "\r\n".getBytes(this.encoding), "\n".getBytes(this.encoding), 342 "\r".getBytes(this.encoding) }; 343 344 this.avoidNewlineSplitBufferSize = newLineSequences[0].length; 345 346 // Open file 347 this.channel = Files.newByteChannel(file, StandardOpenOption.READ); 348 this.totalByteLength = channel.size(); 349 int lastBlockLength = (int) (this.totalByteLength % blockSize); 350 if (lastBlockLength > 0) { 351 this.totalBlockCount = this.totalByteLength / blockSize + 1; 352 } else { 353 this.totalBlockCount = this.totalByteLength / blockSize; 354 if (this.totalByteLength > 0) { 355 lastBlockLength = blockSize; 356 } 357 } 358 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 359 360 } 361 362 /** 363 * Creates a ReversedLinesFileReader with the given block size and encoding. 364 * 365 * @param file the file to be read 366 * @param blockSize size of the internal buffer (for ideal performance this 367 * should match with the block size of the underlying file 368 * system). 369 * @param charsetName the encoding of the file, null uses the default Charset. 370 * @throws IOException if an I/O error occurs 371 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of 372 * {@link UnsupportedEncodingException} 373 * in version 2.2 if the 374 * encoding is not 375 * supported. 376 * @since 2.7 377 */ 378 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException { 379 this(file, blockSize, Charsets.toCharset(charsetName)); 380 } 381 382 /** 383 * Closes underlying resources. 384 * 385 * @throws IOException if an I/O error occurs 386 */ 387 @Override 388 public void close() throws IOException { 389 channel.close(); 390 } 391 392 /** 393 * Returns the lines of the file from bottom to top. 394 * 395 * @return the next line or null if the start of the file is reached 396 * @throws IOException if an I/O error occurs 397 */ 398 public String readLine() throws IOException { 399 400 String line = currentFilePart.readLine(); 401 while (line == null) { 402 currentFilePart = currentFilePart.rollOver(); 403 if (currentFilePart != null) { 404 line = currentFilePart.readLine(); 405 } else { 406 // no more fileparts: we're done, leave line set to null 407 break; 408 } 409 } 410 411 // aligned behavior with BufferedReader that doesn't return a last, empty line 412 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) { 413 trailingNewlineOfFileSkipped = true; 414 line = readLine(); 415 } 416 417 return line; 418 } 419 420 /** 421 * Returns {@code lineCount} lines of the file from bottom to top. 422 * <p> 423 * If there are less than {@code lineCount} lines in the file, then that's what 424 * you get. 425 * </p> 426 * <p> 427 * Note: You can easily flip the result with {@link Collections#reverse(List)}. 428 * </p> 429 * 430 * @param lineCount How many lines to read. 431 * @return A new list 432 * @throws IOException if an I/O error occurs 433 * @since 2.8.0 434 */ 435 public List<String> readLines(final int lineCount) throws IOException { 436 if (lineCount < 0) { 437 throw new IllegalArgumentException("lineCount < 0"); 438 } 439 final ArrayList<String> arrayList = new ArrayList<>(lineCount); 440 for (int i = 0; i < lineCount; i++) { 441 final String line = readLine(); 442 if (line == null) { 443 return arrayList; 444 } 445 arrayList.add(line); 446 } 447 return arrayList; 448 } 449 450 /** 451 * Returns the last {@code lineCount} lines of the file. 452 * <p> 453 * If there are less than {@code lineCount} lines in the file, then that's what 454 * you get. 455 * </p> 456 * 457 * @param lineCount How many lines to read. 458 * @return A String. 459 * @throws IOException if an I/O error occurs 460 * @since 2.8.0 461 */ 462 public String toString(final int lineCount) throws IOException { 463 final List<String> lines = readLines(lineCount); 464 Collections.reverse(lines); 465 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator(); 466 } 467 468}