BE THE CODER - org/apache/commons/io/input/ReaderInputStream.java


/*

 * Licensed to the Apache Software Foundation (ASF) under one or more

 * contributor license agreements.  See the NOTICE file distributed with

 * this work for additional information regarding copyright ownership.

 * The ASF licenses this file to You under the Apache License, Version 2.0

 * (the "License"); you may not use this file except in compliance with

 * the License.  You may obtain a copy of the License at

 * 

 *      http://www.apache.org/licenses/LICENSE-2.0

 * 

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */

package org.apache.commons.io.input;



import java.io.IOException;

import java.io.InputStream;

import java.io.Reader;

import java.nio.ByteBuffer;

import java.nio.CharBuffer;

import java.nio.charset.Charset;

import java.nio.charset.CharsetEncoder;

import java.nio.charset.CoderResult;

import java.nio.charset.CodingErrorAction;



/**

 * {@link InputStream} implementation that reads a character stream from a {@link Reader}

 * and transforms it to a byte stream using a specified charset encoding. The stream

 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset

 * encodings supported by the JRE are handled correctly. In particular for charsets such as

 * UTF-16, the implementation ensures that one and only one byte order marker

 * is produced.

 * <p>

 * Since in general it is not possible to predict the number of characters to be read from the

 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from

 * the {@link Reader} are buffered. There is therefore no well defined correlation

 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.

 * This also implies that in general there is no need to wrap the underlying {@link Reader}

 * in a {@link java.io.BufferedReader}.

 * <p>

 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};

 * in the following example, reading from <tt>in2</tt> would return the same byte

 * sequence as reading from <tt>in</tt> (provided that the initial byte sequence is legal

 * with respect to the charset encoding):

 * <pre>

 * InputStream in = ...

 * Charset cs = ...

 * InputStreamReader reader = new InputStreamReader(in, cs);

 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>

 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},

 * except that the control flow is reversed: both classes transform a character stream

 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,

 * while {@link ReaderInputStream} pulls it from the underlying stream.

 * <p>

 * Note that while there are use cases where there is no alternative to using

 * this class, very often the need to use this class is an indication of a flaw

 * in the design of the code. This class is typically used in situations where an existing

 * API only accepts an {@link InputStream}, but where the most natural way to produce the data

 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation

 * where this problem may appear is when implementing the {@link javax.activation.DataSource}

 * interface from the Java Activation Framework.

 * <p>

 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next

 * read operation will block or not, it is not possible to provide a meaningful

 * implementation of the {@link InputStream#available()} method. A call to this method

 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.

 * <p>

 * Instances of {@link ReaderInputStream} are not thread safe.

 * 

 * @see org.apache.commons.io.output.WriterOutputStream

 * 

 * @author <a href="mailto:[email protected]">Andreas Veithen</a>

 * @since Commons IO 2.0

 */

public class ReaderInputStream extends InputStream {

    private static final int DEFAULT_BUFFER_SIZE = 1024;



    private final Reader reader;

    private final CharsetEncoder encoder;



    /**

     * CharBuffer used as input for the decoder. It should be reasonably

     * large as we read data from the underlying Reader into this buffer.

     */

    private final CharBuffer encoderIn;



    /**

     * ByteBuffer used as output for the decoder. This buffer can be small

     * as it is only used to transfer data from the decoder to the

     * buffer provided by the caller.

     */

    private final ByteBuffer encoderOut = ByteBuffer.allocate(128);



    private CoderResult lastCoderResult;

    private boolean endOfInput;



    /**

     * Construct a new {@link ReaderInputStream}.

     * 

     * @param reader the target {@link Reader}

     * @param encoder the charset encoder

     * @since Commons IO 2.1

     */

    public ReaderInputStream(Reader reader, CharsetEncoder encoder) {

        this(reader, encoder, DEFAULT_BUFFER_SIZE);

    }



    /**

     * Construct a new {@link ReaderInputStream}.

     * 

     * @param reader the target {@link Reader}

     * @param encoder the charset encoder

     * @param bufferSize the size of the input buffer in number of characters

     * @since Commons IO 2.1

     */

    public ReaderInputStream(Reader reader, CharsetEncoder encoder, int bufferSize) {

        this.reader = reader;

        this.encoder = encoder;

        encoderIn = CharBuffer.allocate(bufferSize);

        encoderIn.flip();

    }



    /**

     * Construct a new {@link ReaderInputStream}.

     * 

     * @param reader the target {@link Reader}

     * @param charset the charset encoding

     * @param bufferSize the size of the input buffer in number of characters

     */

    public ReaderInputStream(Reader reader, Charset charset, int bufferSize) {

        this(reader,

             charset.newEncoder()

                    .onMalformedInput(CodingErrorAction.REPLACE)

                    .onUnmappableCharacter(CodingErrorAction.REPLACE),

             bufferSize);

    }



    /**

     * Construct a new {@link ReaderInputStream} with a default input buffer size of

     * 1024 characters.

     * 

     * @param reader the target {@link Reader}

     * @param charset the charset encoding

     */

    public ReaderInputStream(Reader reader, Charset charset) {

        this(reader, charset, DEFAULT_BUFFER_SIZE);

    }



    /**

     * Construct a new {@link ReaderInputStream}.

     * 

     * @param reader the target {@link Reader}

     * @param charsetName the name of the charset encoding

     * @param bufferSize the size of the input buffer in number of characters

     */

    public ReaderInputStream(Reader reader, String charsetName, int bufferSize) {

        this(reader, Charset.forName(charsetName), bufferSize);

    }



    /**

     * Construct a new {@link ReaderInputStream} with a default input buffer size of

     * 1024 characters.

     * 

     * @param reader the target {@link Reader}

     * @param charsetName the name of the charset encoding

     */

    public ReaderInputStream(Reader reader, String charsetName) {

        this(reader, charsetName, DEFAULT_BUFFER_SIZE);

    }



    /**

     * Construct a new {@link ReaderInputStream} that uses the default character encoding

     * with a default input buffer size of 1024 characters.

     * 

     * @param reader the target {@link Reader}

     */

    public ReaderInputStream(Reader reader) {

        this(reader, Charset.defaultCharset());

    }



    /**

     * Read the specified number of bytes into an array.

     * 

     * @param b the byte array to read into

     * @param off the offset to start reading bytes into

     * @param len the number of bytes to read

     * @return the number of bytes read or <code>-1</code>

     *         if the end of the stream has been reached

     * @throws IOException if an I/O error occurs

     */

    @Override

    public int read(byte[] b, int off, int len) throws IOException {

        int read = 0;

        while (len > 0) {

            if (encoderOut.position() > 0) {

                encoderOut.flip();

                int c = Math.min(encoderOut.remaining(), len);

                encoderOut.get(b, off, c);

                off += c;

                len -= c;

                read += c;

                encoderOut.compact();

            } else {

                if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {

                    encoderIn.compact();

                    int position = encoderIn.position();

                    // We don't use Reader#read(CharBuffer) here because it is more efficient

                    // to write directly to the underlying char array (the default implementation

                    // copies data to a temporary char array).

                    int c = reader.read(encoderIn.array(), position, encoderIn.remaining());

                    if (c == -1) {

                        endOfInput = true;

                    } else {

                        encoderIn.position(position+c);

                    }

                    encoderIn.flip();

                }

                lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);

                if (endOfInput && encoderOut.position() == 0) {

                    break;

                }

            }

        }

        return read == 0 && endOfInput ? -1 : read;

    }



    /**

     * Read the specified number of bytes into an array.

     * 

     * @param b the byte array to read into

     * @return the number of bytes read or <code>-1</code>

     *         if the end of the stream has been reached

     * @throws IOException if an I/O error occurs

     */

    @Override

    public int read(byte[] b) throws IOException {

        return read(b, 0, b.length);

    }



    /**

     * Read a single byte.

     *

     * @return either the byte read or <code>-1</code> if the end of the stream

     *         has been reached

     * @throws IOException if an I/O error occurs

     */

    @Override

    public int read() throws IOException {

        byte[] b = new byte[1];

        return read(b) == -1 ? -1 : b[0] & 0xFF;

    }



    /**

     * Close the stream. This method will cause the underlying {@link Reader}

     * to be closed.

     * @throws IOException if an I/O error occurs

     */

    @Override

    public void close() throws IOException {

        reader.close();

    }

}
Open Source Repository
Home	/commons-io/commons-io-2.1 \| Repository Home
Open Source Repository