001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import static org.apache.commons.io.IOUtils.EOF;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.Reader;
024import java.nio.ByteBuffer;
025import java.nio.CharBuffer;
026import java.nio.charset.Charset;
027import java.nio.charset.CharsetEncoder;
028import java.nio.charset.CoderResult;
029import java.nio.charset.CodingErrorAction;
030import java.util.Objects;
031
032/**
033 * {@link InputStream} implementation that reads a character stream from a {@link Reader}
034 * and transforms it to a byte stream using a specified charset encoding. The stream
035 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset
036 * encodings supported by the JRE are handled correctly. In particular for charsets such as
037 * UTF-16, the implementation ensures that one and only one byte order marker
038 * is produced.
039 * <p>
040 * Since in general it is not possible to predict the number of characters to be read from the
041 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from
042 * the {@link Reader} are buffered. There is therefore no well defined correlation
043 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.
044 * This also implies that in general there is no need to wrap the underlying {@link Reader}
045 * in a {@link java.io.BufferedReader}.
046 * <p>
047 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};
048 * in the following example, reading from {@code in2} would return the same byte
049 * sequence as reading from {@code in} (provided that the initial byte sequence is legal
050 * with respect to the charset encoding):
051 * <pre>
052 * InputStream inputStream = ...
053 * Charset cs = ...
054 * InputStreamReader reader = new InputStreamReader(inputStream, cs);
055 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>
056 *
057 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},
058 * except that the control flow is reversed: both classes transform a character stream
059 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,
060 * while {@link ReaderInputStream} pulls it from the underlying stream.
061 * <p>
062 * Note that while there are use cases where there is no alternative to using
063 * this class, very often the need to use this class is an indication of a flaw
064 * in the design of the code. This class is typically used in situations where an existing
065 * API only accepts an {@link InputStream}, but where the most natural way to produce the data
066 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation
067 * where this problem may appear is when implementing the {@code javax.activation.DataSource}
068 * interface from the Java Activation Framework.
069 * <p>
070 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next
071 * read operation will block or not, it is not possible to provide a meaningful
072 * implementation of the {@link InputStream#available()} method. A call to this method
073 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.
074 * </p>
075 * <p>
076 * Instances of {@link ReaderInputStream} are not thread safe.
077 * </p>
078 *
079 * @see org.apache.commons.io.output.WriterOutputStream
080 *
081 * @since 2.0
082 */
083public class ReaderInputStream extends InputStream {
084    private static final int DEFAULT_BUFFER_SIZE = 1024;
085
086    private final Reader reader;
087    private final CharsetEncoder encoder;
088
089    /**
090     * CharBuffer used as input for the decoder. It should be reasonably
091     * large as we read data from the underlying Reader into this buffer.
092     */
093    private final CharBuffer encoderIn;
094
095    /**
096     * ByteBuffer used as output for the decoder. This buffer can be small
097     * as it is only used to transfer data from the decoder to the
098     * buffer provided by the caller.
099     */
100    private final ByteBuffer encoderOut;
101
102    private CoderResult lastCoderResult;
103    private boolean endOfInput;
104
105    /**
106     * Construct a new {@link ReaderInputStream}.
107     *
108     * @param reader the target {@link Reader}
109     * @param encoder the charset encoder
110     * @since 2.1
111     */
112    public ReaderInputStream(final Reader reader, final CharsetEncoder encoder) {
113        this(reader, encoder, DEFAULT_BUFFER_SIZE);
114    }
115
116    /**
117     * Construct a new {@link ReaderInputStream}.
118     *
119     * @param reader the target {@link Reader}
120     * @param encoder the charset encoder
121     * @param bufferSize the size of the input buffer in number of characters
122     * @since 2.1
123     */
124    public ReaderInputStream(final Reader reader, final CharsetEncoder encoder, final int bufferSize) {
125        this.reader = reader;
126        this.encoder = encoder;
127        this.encoderIn = CharBuffer.allocate(bufferSize);
128        this.encoderIn.flip();
129        this.encoderOut = ByteBuffer.allocate(128);
130        this.encoderOut.flip();
131    }
132
133    /**
134     * Construct a new {@link ReaderInputStream}.
135     *
136     * @param reader the target {@link Reader}
137     * @param charset the charset encoding
138     * @param bufferSize the size of the input buffer in number of characters
139     */
140    public ReaderInputStream(final Reader reader, final Charset charset, final int bufferSize) {
141        this(reader,
142             charset.newEncoder()
143                    .onMalformedInput(CodingErrorAction.REPLACE)
144                    .onUnmappableCharacter(CodingErrorAction.REPLACE),
145             bufferSize);
146    }
147
148    /**
149     * Construct a new {@link ReaderInputStream} with a default input buffer size of
150     * {@value #DEFAULT_BUFFER_SIZE} characters.
151     *
152     * @param reader the target {@link Reader}
153     * @param charset the charset encoding
154     */
155    public ReaderInputStream(final Reader reader, final Charset charset) {
156        this(reader, charset, DEFAULT_BUFFER_SIZE);
157    }
158
159    /**
160     * Construct a new {@link ReaderInputStream}.
161     *
162     * @param reader the target {@link Reader}
163     * @param charsetName the name of the charset encoding
164     * @param bufferSize the size of the input buffer in number of characters
165     */
166    public ReaderInputStream(final Reader reader, final String charsetName, final int bufferSize) {
167        this(reader, Charset.forName(charsetName), bufferSize);
168    }
169
170    /**
171     * Construct a new {@link ReaderInputStream} with a default input buffer size of
172     * {@value #DEFAULT_BUFFER_SIZE} characters.
173     *
174     * @param reader the target {@link Reader}
175     * @param charsetName the name of the charset encoding
176     */
177    public ReaderInputStream(final Reader reader, final String charsetName) {
178        this(reader, charsetName, DEFAULT_BUFFER_SIZE);
179    }
180
181    /**
182     * Construct a new {@link ReaderInputStream} that uses the default character encoding
183     * with a default input buffer size of {@value #DEFAULT_BUFFER_SIZE} characters.
184     *
185     * @param reader the target {@link Reader}
186     * @deprecated 2.5 use {@link #ReaderInputStream(Reader, Charset)} instead
187     */
188    @Deprecated
189    public ReaderInputStream(final Reader reader) {
190        this(reader, Charset.defaultCharset());
191    }
192
193    /**
194     * Fills the internal char buffer from the reader.
195     *
196     * @throws IOException
197     *             If an I/O error occurs
198     */
199    private void fillBuffer() throws IOException {
200        if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {
201            encoderIn.compact();
202            final int position = encoderIn.position();
203            // We don't use Reader#read(CharBuffer) here because it is more efficient
204            // to write directly to the underlying char array (the default implementation
205            // copies data to a temporary char array).
206            final int c = reader.read(encoderIn.array(), position, encoderIn.remaining());
207            if (c == EOF) {
208                endOfInput = true;
209            } else {
210                encoderIn.position(position+c);
211            }
212            encoderIn.flip();
213        }
214        encoderOut.compact();
215        lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
216        encoderOut.flip();
217    }
218
219    /**
220     * Read the specified number of bytes into an array.
221     *
222     * @param array the byte array to read into
223     * @param off the offset to start reading bytes into
224     * @param len the number of bytes to read
225     * @return the number of bytes read or {@code -1}
226     *         if the end of the stream has been reached
227     * @throws IOException if an I/O error occurs.
228     */
229    @Override
230    public int read(final byte[] array, int off, int len) throws IOException {
231        Objects.requireNonNull(array, "array");
232        if (len < 0 || off < 0 || (off + len) > array.length) {
233            throw new IndexOutOfBoundsException("Array Size=" + array.length +
234                    ", offset=" + off + ", length=" + len);
235        }
236        int read = 0;
237        if (len == 0) {
238            return 0; // Always return 0 if len == 0
239        }
240        while (len > 0) {
241            if (encoderOut.hasRemaining()) {
242                final int c = Math.min(encoderOut.remaining(), len);
243                encoderOut.get(array, off, c);
244                off += c;
245                len -= c;
246                read += c;
247            } else {
248                fillBuffer();
249                if (endOfInput && !encoderOut.hasRemaining()) {
250                    break;
251                }
252            }
253        }
254        return read == 0 && endOfInput ? EOF : read;
255    }
256
257    /**
258     * Read the specified number of bytes into an array.
259     *
260     * @param b the byte array to read into
261     * @return the number of bytes read or {@code -1}
262     *         if the end of the stream has been reached
263     * @throws IOException if an I/O error occurs.
264     */
265    @Override
266    public int read(final byte[] b) throws IOException {
267        return read(b, 0, b.length);
268    }
269
270    /**
271     * Read a single byte.
272     *
273     * @return either the byte read or {@code -1} if the end of the stream
274     *         has been reached
275     * @throws IOException if an I/O error occurs.
276     */
277    @Override
278    public int read() throws IOException {
279        for (;;) {
280            if (encoderOut.hasRemaining()) {
281                return encoderOut.get() & 0xFF;
282            }
283            fillBuffer();
284            if (endOfInput && !encoderOut.hasRemaining()) {
285                return EOF;
286            }
287        }
288    }
289
290    /**
291     * Close the stream. This method will cause the underlying {@link Reader}
292     * to be closed.
293     * @throws IOException if an I/O error occurs.
294     */
295    @Override
296    public void close() throws IOException {
297        reader.close();
298    }
299}