001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.io.input;
019
020import static org.apache.commons.io.IOUtils.EOF;
021
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.ByteBuffer;
025import java.nio.CharBuffer;
026import java.nio.charset.CharacterCodingException;
027import java.nio.charset.Charset;
028import java.nio.charset.CharsetEncoder;
029import java.nio.charset.CoderResult;
030import java.nio.charset.CodingErrorAction;
031import java.util.Objects;
032
033import org.apache.commons.io.Charsets;
034import org.apache.commons.io.IOUtils;
035
036/**
037 * Implements an {@link InputStream} to read from String, StringBuffer, StringBuilder or CharBuffer.
038 * <p>
039 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
040 * </p>
041 *
042 * @since 2.2
043 */
044public class CharSequenceInputStream extends InputStream {
045
046    private static final int NO_MARK = -1;
047
048    private final CharsetEncoder charsetEncoder;
049    private final CharBuffer cBuf;
050    private final ByteBuffer bBuf;
051
052    private int cBufMark; // position in cBuf
053    private int bBufMark; // position in bBuf
054
055    /**
056     * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
057     *
058     * @param cs the input character sequence.
059     * @param charset the character set name to use.
060     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
061     */
062    public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
063        this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
064    }
065
066    /**
067     * Constructs a new instance.
068     *
069     * @param cs the input character sequence.
070     * @param charset the character set name to use, null maps to the default Charset.
071     * @param bufferSize the buffer size to use.
072     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
073     */
074    public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
075        // @formatter:off
076        this.charsetEncoder = Charsets.toCharset(charset).newEncoder()
077            .onMalformedInput(CodingErrorAction.REPLACE)
078            .onUnmappableCharacter(CodingErrorAction.REPLACE);
079        // @formatter:on
080        // Ensure that buffer is long enough to hold a complete character
081        this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(charsetEncoder, bufferSize));
082        this.bBuf.flip();
083        this.cBuf = CharBuffer.wrap(cs);
084        this.cBufMark = NO_MARK;
085        this.bBufMark = NO_MARK;
086    }
087
088    /**
089     * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
090     *
091     * @param cs the input character sequence.
092     * @param charset the character set name to use.
093     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
094     */
095    public CharSequenceInputStream(final CharSequence cs, final String charset) {
096        this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
097    }
098
099    /**
100     * Constructs a new instance.
101     *
102     * @param cs the input character sequence.
103     * @param charset the character set name to use, null maps to the default Charset.
104     * @param bufferSize the buffer size to use.
105     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
106     */
107    public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
108        this(cs, Charsets.toCharset(charset), bufferSize);
109    }
110
111    /**
112     * Return an estimate of the number of bytes remaining in the byte stream.
113     * @return the count of bytes that can be read without blocking (or returning EOF).
114     *
115     * @throws IOException if an error occurs (probably not possible).
116     */
117    @Override
118    public int available() throws IOException {
119        // The cached entries are in bBuf; since encoding always creates at least one byte
120        // per character, we can add the two to get a better estimate (e.g. if bBuf is empty)
121        // Note that the previous implementation (2.4) could return zero even though there were
122        // encoded bytes still available.
123        return this.bBuf.remaining() + this.cBuf.remaining();
124    }
125
126    @Override
127    public void close() throws IOException {
128        // noop
129    }
130
131    /**
132     * Fills the byte output buffer from the input char buffer.
133     *
134     * @throws CharacterCodingException
135     *             an error encoding data.
136     */
137    private void fillBuffer() throws CharacterCodingException {
138        this.bBuf.compact();
139        final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
140        if (result.isError()) {
141            result.throwException();
142        }
143        this.bBuf.flip();
144    }
145
146    /**
147     * Gets the CharsetEncoder.
148     *
149     * @return the CharsetEncoder.
150     */
151    CharsetEncoder getCharsetEncoder() {
152        return charsetEncoder;
153    }
154
155    /**
156     * {@inheritDoc}
157     * @param readlimit max read limit (ignored).
158     */
159    @Override
160    public synchronized void mark(final int readlimit) {
161        this.cBufMark = this.cBuf.position();
162        this.bBufMark = this.bBuf.position();
163        this.cBuf.mark();
164        this.bBuf.mark();
165        // It would be nice to be able to use mark & reset on the cBuf and bBuf;
166        // however the bBuf is re-used so that won't work
167    }
168
169    @Override
170    public boolean markSupported() {
171        return true;
172    }
173
174    @Override
175    public int read() throws IOException {
176        for (;;) {
177            if (this.bBuf.hasRemaining()) {
178                return this.bBuf.get() & 0xFF;
179            }
180            fillBuffer();
181            if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
182                return EOF;
183            }
184        }
185    }
186
187    @Override
188    public int read(final byte[] b) throws IOException {
189        return read(b, 0, b.length);
190    }
191
192    @Override
193    public int read(final byte[] array, int off, int len) throws IOException {
194        Objects.requireNonNull(array, "array");
195        if (len < 0 || off + len > array.length) {
196            throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len);
197        }
198        if (len == 0) {
199            return 0; // must return 0 for zero length read
200        }
201        if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
202            return EOF;
203        }
204        int bytesRead = 0;
205        while (len > 0) {
206            if (this.bBuf.hasRemaining()) {
207                final int chunk = Math.min(this.bBuf.remaining(), len);
208                this.bBuf.get(array, off, chunk);
209                off += chunk;
210                len -= chunk;
211                bytesRead += chunk;
212            } else {
213                fillBuffer();
214                if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
215                    break;
216                }
217            }
218        }
219        return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
220    }
221
222    @Override
223    public synchronized void reset() throws IOException {
224        //
225        // This is not the most efficient implementation, as it re-encodes from the beginning.
226        //
227        // Since the bBuf is re-used, in general it's necessary to re-encode the data.
228        //
229        // It should be possible to apply some optimisations however:
230        // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
231        // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
232        // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
233        // restart from there.
234        //
235        if (this.cBufMark != NO_MARK) {
236            // if cBuf is at 0, we have not started reading anything, so skip re-encoding
237            if (this.cBuf.position() != 0) {
238                this.charsetEncoder.reset();
239                this.cBuf.rewind();
240                this.bBuf.rewind();
241                this.bBuf.limit(0); // rewind does not clear the buffer
242                while(this.cBuf.position() < this.cBufMark) {
243                    this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
244                    this.bBuf.limit(0);
245                    fillBuffer();
246                }
247            }
248            if (this.cBuf.position() != this.cBufMark) {
249                throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
250                        "expected=" + this.cBufMark);
251            }
252            this.bBuf.position(this.bBufMark);
253            this.cBufMark = NO_MARK;
254            this.bBufMark = NO_MARK;
255        }
256    }
257
258    @Override
259    public long skip(long n) throws IOException {
260        //
261        // This could be made more efficient by using position to skip within the current buffer.
262        //
263        long skipped = 0;
264        while (n > 0 && available() > 0) {
265            this.read();
266            n--;
267            skipped++;
268        }
269        return skipped;
270    }
271
272}