From 34f404aa9e9bd039126e7e30388b2a8da3317ce5 Mon Sep 17 00:00:00 2001
From: tankers746
- * Every implementation of the Java platform is required to support the following character encodings. Consult the
- * release documentation for your implementation to see if any other encodings are supported. Consult the release
- * documentation for your implementation to see if any other encodings are supported.
- *
- *
- *
- * @see Standard charsets
- * @since 2.3
- * @version $Id: Charsets.java 1311751 2012-04-10 14:26:21Z ggregory $
- */
-public class Charsets {
- //
- // This class should only contain Charset instances for required encodings. This guarantees that it will load
- // correctly and without delay on all Java platforms.
- //
-
- /**
- * Returns the given Charset or the default Charset if the given Charset is null.
- *
- * @param charset
- * A charset or null.
- * @return the given Charset or the default Charset if the given Charset is null
- */
- public static Charset toCharset(Charset charset) {
- return charset == null ? Charset.defaultCharset() : charset;
- }
-
- /**
- * Returns a Charset for the named charset. If the name is null, return the default Charset.
- *
- * @param charset
- * The name of the requested charset, may be null.
- * @return a Charset for the named charset
- * @throws UnsupportedCharsetException
- * If the named charset is unavailable
- */
- public static Charset toCharset(String charset) {
- return charset == null ? Charset.defaultCharset() : Charset.forName(charset);
- }
-
- /**
- * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. US-ASCII
- * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.ISO-8859-1
- * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.UTF-8
- * Eight-bit Unicode Transformation Format.UTF-16BE
- * Sixteen-bit Unicode Transformation Format, big-endian byte order.UTF-16LE
- * Sixteen-bit Unicode Transformation Format, little-endian byte order.UTF-16
- * Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order
- * accepted on input, big-endian used on output.)
- * Every implementation of the Java platform is required to support this character encoding. - *
- * - * @see Standard charsets - */ - public static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); - - /** - *- * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. - *
- *- * Every implementation of the Java platform is required to support this character encoding. - *
- * - * @see Standard charsets - */ - public static final Charset US_ASCII = Charset.forName("US-ASCII"); - - /** - *- * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark - * (either order accepted on input, big-endian used on output) - *
- *- * Every implementation of the Java platform is required to support this character encoding. - *
- * - * @see Standard charsets - */ - public static final Charset UTF_16 = Charset.forName("UTF-16"); - - /** - *- * Sixteen-bit Unicode Transformation Format, big-endian byte order. - *
- *- * Every implementation of the Java platform is required to support this character encoding. - *
- * - * @see Standard charsets - */ - public static final Charset UTF_16BE = Charset.forName("UTF-16BE"); - - /** - *- * Sixteen-bit Unicode Transformation Format, little-endian byte order. - *
- *- * Every implementation of the Java platform is required to support this character encoding. - *
- * - * @see Standard charsets - */ - public static final Charset UTF_16LE = Charset.forName("UTF-16LE"); - - /** - *- * Eight-bit Unicode Transformation Format. - *
- *- * Every implementation of the Java platform is required to support this character encoding. - *
- * - * @see Standard charsets - */ - public static final Charset UTF_8 = Charset.forName("UTF-8"); -} \ No newline at end of file diff --git a/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java b/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java deleted file mode 100644 index ad66b5c3..00000000 --- a/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.io.input; - -import java.io.Closeable; -import java.io.File; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.io.UnsupportedEncodingException; -import java.nio.charset.Charset; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.UnsupportedCharsetException; - -import org.apache.commons.io.Charsets; - -/** - * Reads lines in a file reversely (similar to a BufferedReader, but starting at - * the last line). Useful for e.g. searching in log files. - * - * @since 2.2 - */ -public class ReversedLinesFileReader implements Closeable { - - private final int blockSize; - private final Charset encoding; - - private final RandomAccessFile randomAccessFile; - - private final long totalByteLength; - private final long totalBlockCount; - - private final byte[][] newLineSequences; - private final int avoidNewlineSplitBufferSize; - private final int byteDecrement; - - private FilePart currentFilePart; - - private boolean trailingNewlineOfFileSkipped = false; - - /** - * Creates a ReversedLinesFileReader with default block size of 4KB and the - * platform's default encoding. - * - * @param file - * the file to be read - * @throws IOException if an I/O error occurs - */ - public ReversedLinesFileReader(final File file) throws IOException { - this(file, 4096, Charset.defaultCharset().toString()); - } - - /** - * Creates a ReversedLinesFileReader with the given block size and encoding. - * - * @param file - * the file to be read - * @param blockSize - * size of the internal buffer (for ideal performance this should - * match with the block size of the underlying file system). - * @param encoding - * the encoding of the file - * @throws IOException if an I/O error occurs - * @since 2.3 - */ - public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException { - this.blockSize = blockSize; - this.encoding = encoding; - - randomAccessFile = new RandomAccessFile(file, "r"); - totalByteLength = randomAccessFile.length(); - int lastBlockLength = (int) (totalByteLength % blockSize); - if (lastBlockLength > 0) { - totalBlockCount = totalByteLength / blockSize + 1; - } else { - totalBlockCount = totalByteLength / blockSize; - if (totalByteLength > 0) { - lastBlockLength = blockSize; - } - } - currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); - - // --- check & prepare encoding --- - Charset charset = Charsets.toCharset(encoding); - CharsetEncoder charsetEncoder = charset.newEncoder(); - float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); - if(maxBytesPerChar==1f) { - // all one byte encodings are no problem - byteDecrement = 1; - } else if(charset == Charset.forName("UTF-8")) { - // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte - // http://en.wikipedia.org/wiki/UTF-8 - byteDecrement = 1; - } else if(charset == Charset.forName("Shift_JIS")) { - // Same as for UTF-8 - // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html - byteDecrement = 1; - } else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) { - // UTF-16 new line sequences are not allowed as second tuple of four byte sequences, - // however byte order has to be specified - byteDecrement = 2; - } else if(charset == Charset.forName("UTF-16")) { - throw new UnsupportedEncodingException( - "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)"); - } else { - throw new UnsupportedEncodingException( - "Encoding "+encoding+" is not supported yet (feel free to submit a patch)"); - } - // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n - newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) }; - - avoidNewlineSplitBufferSize = newLineSequences[0].length; - } - - /** - * Creates a ReversedLinesFileReader with the given block size and encoding. - * - * @param file - * the file to be read - * @param blockSize - * size of the internal buffer (for ideal performance this should - * match with the block size of the underlying file system). - * @param encoding - * the encoding of the file - * @throws IOException if an I/O error occurs - * @throws UnsupportedCharsetException - * thrown instead of {@link UnsupportedEncodingException} in version 2.2 if the encoding is not - * supported. - */ - public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException { - this(file, blockSize, Charsets.toCharset(encoding)); - } - - /** - * Returns the lines of the file from bottom to top. - * - * @return the next line or null if the start of the file is reached - * @throws IOException if an I/O error occurs - */ - public String readLine() throws IOException { - - String line = currentFilePart.readLine(); - while (line == null) { - currentFilePart = currentFilePart.rollOver(); - if (currentFilePart != null) { - line = currentFilePart.readLine(); - } else { - // no more fileparts: we're done, leave line set to null - break; - } - } - - // aligned behaviour wiht BufferedReader that doesn't return a last, emtpy line - if("".equals(line) && !trailingNewlineOfFileSkipped) { - trailingNewlineOfFileSkipped = true; - line = readLine(); - } - - return line; - } - - /** - * Closes underlying resources. - * - * @throws IOException if an I/O error occurs - */ - public void close() throws IOException { - randomAccessFile.close(); - } - - private class FilePart { - private final long no; - - private final byte[] data; - - private byte[] leftOver; - - private int currentLastBytePos; - - /** - * ctor - * @param no the part number - * @param length its length - * @param leftOverOfLastFilePart remainder - * @throws IOException if there is a problem reading the file - */ - private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException { - this.no = no; - int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); - this.data = new byte[dataLength]; - final long off = (no - 1) * blockSize; - - // read data - if (no > 0 /* file not empty */) { - randomAccessFile.seek(off); - final int countRead = randomAccessFile.read(data, 0, length); - if (countRead != length) { - throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); - } - } - // copy left over part into data arr - if (leftOverOfLastFilePart != null) { - System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); - } - this.currentLastBytePos = data.length - 1; - this.leftOver = null; - } - - /** - * Handles block rollover - * - * @return the new FilePart or null - * @throws IOException if there was a problem reading the file - */ - private FilePart rollOver() throws IOException { - - if (currentLastBytePos > -1) { - throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " - + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); - } - - if (no > 1) { - return new FilePart(no - 1, blockSize, leftOver); - } else { - // NO 1 was the last FilePart, we're finished - if (leftOver != null) { - throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" - + new String(leftOver, encoding)); - } - return null; - } - } - - /** - * Reads a line. - * - * @return the line or null - * @throws IOException if there is an error reading from the file - */ - private String readLine() throws IOException { - - String line = null; - int newLineMatchByteCount; - - boolean isLastFilePart = no == 1; - - int i = currentLastBytePos; - while (i > -1) { - - if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { - // avoidNewlineSplitBuffer: for all except the last file part we - // take a few bytes to the next file part to avoid splitting of newlines - createLeftOver(); - break; // skip last few bytes and leave it to the next file part - } - - // --- check for newline --- - if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { - final int lineStart = i + 1; - int lineLengthBytes = currentLastBytePos - lineStart + 1; - - if (lineLengthBytes < 0) { - throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes); - } - byte[] lineData = new byte[lineLengthBytes]; - System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes); - - line = new String(lineData, encoding); - - currentLastBytePos = i - newLineMatchByteCount; - break; // found line - } - - // --- move cursor --- - i -= byteDecrement; - - // --- end of file part handling --- - if (i < 0) { - createLeftOver(); - break; // end of file part - } - } - - // --- last file part handling --- - if (isLastFilePart && leftOver != null) { - // there will be no line break anymore, this is the first line of the file - line = new String(leftOver, encoding); - leftOver = null; - } - - return line; - } - - /** - * Creates the buffer containing any left over bytes. - */ - private void createLeftOver() { - int lineLengthBytes = currentLastBytePos + 1; - if (lineLengthBytes > 0) { - // create left over for next block - leftOver = new byte[lineLengthBytes]; - System.arraycopy(data, 0, leftOver, 0, lineLengthBytes); - } else { - leftOver = null; - } - currentLastBytePos = -1; - } - - /** - * Finds the new-line sequence and return its length. - * - * @param data buffer to scan - * @param i start offset in buffer - * @return length of newline sequence or 0 if none found - */ - private int getNewLineMatchByteCount(byte[] data, int i) { - for (byte[] newLineSequence : newLineSequences) { - boolean match = true; - for (int j = newLineSequence.length - 1; j >= 0; j--) { - int k = i + j - (newLineSequence.length - 1); - match &= k >= 0 && data[k] == newLineSequence[j]; - } - if (match) { - return newLineSequence.length; - } - } - return 0; - } - } - -} \ No newline at end of file