001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.io.input; 019 020import static org.apache.commons.io.IOUtils.EOF; 021 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.ByteBuffer; 025import java.nio.CharBuffer; 026import java.nio.charset.CharacterCodingException; 027import java.nio.charset.Charset; 028import java.nio.charset.CharsetEncoder; 029import java.nio.charset.CoderResult; 030import java.nio.charset.CodingErrorAction; 031import java.util.Objects; 032 033import org.apache.commons.io.Charsets; 034import org.apache.commons.io.IOUtils; 035 036/** 037 * Implements an {@link InputStream} to read from String, StringBuffer, StringBuilder or CharBuffer. 038 * <p> 039 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}. 040 * </p> 041 * 042 * @since 2.2 043 */ 044public class CharSequenceInputStream extends InputStream { 045 046 private static final int NO_MARK = -1; 047 048 private final CharsetEncoder charsetEncoder; 049 private final CharBuffer cBuf; 050 private final ByteBuffer bBuf; 051 052 private int cBufMark; // position in cBuf 053 private int bBufMark; // position in bBuf 054 055 /** 056 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. 057 * 058 * @param cs the input character sequence. 059 * @param charset the character set name to use. 060 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 061 */ 062 public CharSequenceInputStream(final CharSequence cs, final Charset charset) { 063 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); 064 } 065 066 /** 067 * Constructs a new instance. 068 * 069 * @param cs the input character sequence. 070 * @param charset the character set name to use, null maps to the default Charset. 071 * @param bufferSize the buffer size to use. 072 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 073 */ 074 public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) { 075 // @formatter:off 076 this.charsetEncoder = Charsets.toCharset(charset).newEncoder() 077 .onMalformedInput(CodingErrorAction.REPLACE) 078 .onUnmappableCharacter(CodingErrorAction.REPLACE); 079 // @formatter:on 080 // Ensure that buffer is long enough to hold a complete character 081 this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(charsetEncoder, bufferSize)); 082 this.bBuf.flip(); 083 this.cBuf = CharBuffer.wrap(cs); 084 this.cBufMark = NO_MARK; 085 this.bBufMark = NO_MARK; 086 } 087 088 /** 089 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. 090 * 091 * @param cs the input character sequence. 092 * @param charset the character set name to use. 093 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 094 */ 095 public CharSequenceInputStream(final CharSequence cs, final String charset) { 096 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); 097 } 098 099 /** 100 * Constructs a new instance. 101 * 102 * @param cs the input character sequence. 103 * @param charset the character set name to use, null maps to the default Charset. 104 * @param bufferSize the buffer size to use. 105 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 106 */ 107 public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) { 108 this(cs, Charsets.toCharset(charset), bufferSize); 109 } 110 111 /** 112 * Return an estimate of the number of bytes remaining in the byte stream. 113 * @return the count of bytes that can be read without blocking (or returning EOF). 114 * 115 * @throws IOException if an error occurs (probably not possible). 116 */ 117 @Override 118 public int available() throws IOException { 119 // The cached entries are in bBuf; since encoding always creates at least one byte 120 // per character, we can add the two to get a better estimate (e.g. if bBuf is empty) 121 // Note that the previous implementation (2.4) could return zero even though there were 122 // encoded bytes still available. 123 return this.bBuf.remaining() + this.cBuf.remaining(); 124 } 125 126 @Override 127 public void close() throws IOException { 128 // noop 129 } 130 131 /** 132 * Fills the byte output buffer from the input char buffer. 133 * 134 * @throws CharacterCodingException 135 * an error encoding data. 136 */ 137 private void fillBuffer() throws CharacterCodingException { 138 this.bBuf.compact(); 139 final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true); 140 if (result.isError()) { 141 result.throwException(); 142 } 143 this.bBuf.flip(); 144 } 145 146 /** 147 * Gets the CharsetEncoder. 148 * 149 * @return the CharsetEncoder. 150 */ 151 CharsetEncoder getCharsetEncoder() { 152 return charsetEncoder; 153 } 154 155 /** 156 * {@inheritDoc} 157 * @param readlimit max read limit (ignored). 158 */ 159 @Override 160 public synchronized void mark(final int readlimit) { 161 this.cBufMark = this.cBuf.position(); 162 this.bBufMark = this.bBuf.position(); 163 this.cBuf.mark(); 164 this.bBuf.mark(); 165 // It would be nice to be able to use mark & reset on the cBuf and bBuf; 166 // however the bBuf is re-used so that won't work 167 } 168 169 @Override 170 public boolean markSupported() { 171 return true; 172 } 173 174 @Override 175 public int read() throws IOException { 176 for (;;) { 177 if (this.bBuf.hasRemaining()) { 178 return this.bBuf.get() & 0xFF; 179 } 180 fillBuffer(); 181 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 182 return EOF; 183 } 184 } 185 } 186 187 @Override 188 public int read(final byte[] b) throws IOException { 189 return read(b, 0, b.length); 190 } 191 192 @Override 193 public int read(final byte[] array, int off, int len) throws IOException { 194 Objects.requireNonNull(array, "array"); 195 if (len < 0 || off + len > array.length) { 196 throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len); 197 } 198 if (len == 0) { 199 return 0; // must return 0 for zero length read 200 } 201 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 202 return EOF; 203 } 204 int bytesRead = 0; 205 while (len > 0) { 206 if (this.bBuf.hasRemaining()) { 207 final int chunk = Math.min(this.bBuf.remaining(), len); 208 this.bBuf.get(array, off, chunk); 209 off += chunk; 210 len -= chunk; 211 bytesRead += chunk; 212 } else { 213 fillBuffer(); 214 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 215 break; 216 } 217 } 218 } 219 return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead; 220 } 221 222 @Override 223 public synchronized void reset() throws IOException { 224 // 225 // This is not the most efficient implementation, as it re-encodes from the beginning. 226 // 227 // Since the bBuf is re-used, in general it's necessary to re-encode the data. 228 // 229 // It should be possible to apply some optimisations however: 230 // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since 231 // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is 232 // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to 233 // restart from there. 234 // 235 if (this.cBufMark != NO_MARK) { 236 // if cBuf is at 0, we have not started reading anything, so skip re-encoding 237 if (this.cBuf.position() != 0) { 238 this.charsetEncoder.reset(); 239 this.cBuf.rewind(); 240 this.bBuf.rewind(); 241 this.bBuf.limit(0); // rewind does not clear the buffer 242 while(this.cBuf.position() < this.cBufMark) { 243 this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing) 244 this.bBuf.limit(0); 245 fillBuffer(); 246 } 247 } 248 if (this.cBuf.position() != this.cBufMark) { 249 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " + 250 "expected=" + this.cBufMark); 251 } 252 this.bBuf.position(this.bBufMark); 253 this.cBufMark = NO_MARK; 254 this.bBufMark = NO_MARK; 255 } 256 } 257 258 @Override 259 public long skip(long n) throws IOException { 260 // 261 // This could be made more efficient by using position to skip within the current buffer. 262 // 263 long skipped = 0; 264 while (n > 0 && available() > 0) { 265 this.read(); 266 n--; 267 skipped++; 268 } 269 return skipped; 270 } 271 272}