001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.IOException; 020import java.io.OutputStream; 021import java.io.Writer; 022import java.nio.ByteBuffer; 023import java.nio.CharBuffer; 024import java.nio.charset.Charset; 025import java.nio.charset.CharsetDecoder; 026import java.nio.charset.CoderResult; 027import java.nio.charset.CodingErrorAction; 028import java.nio.charset.StandardCharsets; 029 030import org.apache.commons.io.Charsets; 031import org.apache.commons.io.IOUtils; 032import org.apache.commons.io.build.AbstractStreamBuilder; 033import org.apache.commons.io.charset.CharsetDecoders; 034 035/** 036 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to 037 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled 038 * correctly. 039 * <p> 040 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in 041 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()} 042 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can 043 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer}, 044 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}. 045 * <p> 046 * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter}; in the following example, writing to {@code out2} 047 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding): 048 * 049 * <pre> 050 * OutputStream out = ... 051 * Charset cs = ... 052 * OutputStreamWriter writer = new OutputStreamWriter(out, cs); 053 * WriterOutputStream out2 = new WriterOutputStream(writer, cs); 054 * </pre> 055 * 056 * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader}, except that the control flow is reversed: both classes 057 * transform a byte stream into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream, while 058 * {@link WriterOutputStream} pushes it to the underlying stream. 059 * <p> 060 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in 061 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is 062 * known to represent character data that must be decoded for further use. 063 * </p> 064 * <p> 065 * Instances of {@link WriterOutputStream} are not thread safe. 066 * </p> 067 * 068 * @see org.apache.commons.io.input.ReaderInputStream 069 * @since 2.0 070 */ 071public class WriterOutputStream extends OutputStream { 072 073 /** 074 * Builds a new {@link WriterOutputStream} instance. 075 * <p> 076 * For example: 077 * </p> 078 * <pre>{@code 079 * WriterOutputStream s = WriterOutputStream.builder() 080 * .setPath(path) 081 * .setBufferSize(8192) 082 * .setCharset(StandardCharsets.UTF_8) 083 * .setWriteImmediately(false) 084 * .get()} 085 * </pre> 086 * <p> 087 * @since 2.12.0 088 */ 089 public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> { 090 091 private CharsetDecoder charsetDecoder; 092 private boolean writeImmediately; 093 094 public Builder() { 095 this.charsetDecoder = getCharset().newDecoder(); 096 } 097 098 /** 099 * Constructs a new instance. 100 * 101 * @throws UnsupportedOperationException if the origin cannot be converted to a Writer. 102 */ 103 @SuppressWarnings("resource") 104 @Override 105 public WriterOutputStream get() throws IOException { 106 return new WriterOutputStream(getOrigin().getWriter(getCharset()), charsetDecoder, getBufferSize(), writeImmediately); 107 } 108 109 @Override 110 public Builder setCharset(final Charset charset) { 111 super.setCharset(charset); 112 this.charsetDecoder = getCharset().newDecoder(); 113 return this; 114 } 115 116 @Override 117 public Builder setCharset(final String charset) { 118 super.setCharset(charset); 119 this.charsetDecoder = getCharset().newDecoder(); 120 return this; 121 } 122 123 /** 124 * Sets the charset decoder. 125 * 126 * @param charsetDecoder the charset decoder. 127 * @return this 128 */ 129 public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) { 130 this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder(); 131 super.setCharset(this.charsetDecoder.charset()); 132 return this; 133 } 134 135 /** 136 * Sets whether the output buffer will be flushed after each write operation ({@code true}), i.e. all available data will be written to the underlying 137 * {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} 138 * is called. 139 * 140 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the 141 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 142 * {@link #flush()} or {@link #close()} is called. 143 * @return this 144 */ 145 public Builder setWriteImmediately(final boolean writeImmediately) { 146 this.writeImmediately = writeImmediately; 147 return this; 148 } 149 150 } 151 152 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 153 154 /** 155 * Constructs a new {@link Builder}. 156 * 157 * @return a new {@link Builder}. 158 * @since 2.12.0 159 */ 160 public static Builder builder() { 161 return new Builder(); 162 } 163 164 /** 165 * Checks if the JDK in use properly supports the given charset. 166 * 167 * @param charset the charset to check the support for 168 */ 169 private static void checkIbmJdkWithBrokenUTF16(final Charset charset) { 170 if (!StandardCharsets.UTF_16.name().equals(charset.name())) { 171 return; 172 } 173 final String TEST_STRING_2 = "v\u00e9s"; 174 final byte[] bytes = TEST_STRING_2.getBytes(charset); 175 176 final CharsetDecoder charsetDecoder2 = charset.newDecoder(); 177 final ByteBuffer bb2 = ByteBuffer.allocate(16); 178 final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length()); 179 final int len = bytes.length; 180 for (int i = 0; i < len; i++) { 181 bb2.put(bytes[i]); 182 bb2.flip(); 183 try { 184 charsetDecoder2.decode(bb2, cb2, i == len - 1); 185 } catch (final IllegalArgumentException e) { 186 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " 187 + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 188 } 189 bb2.compact(); 190 } 191 cb2.rewind(); 192 if (!TEST_STRING_2.equals(cb2.toString())) { 193 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " 194 + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 195 } 196 197 } 198 199 private final Writer writer; 200 private final CharsetDecoder decoder; 201 202 private final boolean writeImmediately; 203 204 /** 205 * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder. 206 */ 207 private final ByteBuffer decoderIn = ByteBuffer.allocate(128); 208 209 /** 210 * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer. 211 */ 212 private final CharBuffer decoderOut; 213 214 /** 215 * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output buffer size of {@value #BUFFER_SIZE} 216 * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is called. 217 * 218 * @param writer the target {@link Writer} 219 * @deprecated Use {@link #builder()} instead 220 */ 221 @Deprecated 222 public WriterOutputStream(final Writer writer) { 223 this(writer, Charset.defaultCharset(), BUFFER_SIZE, false); 224 } 225 226 /** 227 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 228 * when it overflows or when {@link #flush()} or {@link #close()} is called. 229 * 230 * @param writer the target {@link Writer} 231 * @param charset the charset encoding 232 * @deprecated Use {@link #builder()} instead 233 */ 234 @Deprecated 235 public WriterOutputStream(final Writer writer, final Charset charset) { 236 this(writer, charset, BUFFER_SIZE, false); 237 } 238 239 /** 240 * Constructs a new {@link WriterOutputStream}. 241 * 242 * @param writer the target {@link Writer} 243 * @param charset the charset encoding 244 * @param bufferSize the size of the output buffer in number of characters 245 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the 246 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 247 * {@link #flush()} or {@link #close()} is called. 248 * @deprecated Use {@link #builder()} instead 249 */ 250 @Deprecated 251 public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) { 252 // @formatter:off 253 this(writer, 254 Charsets.toCharset(charset).newDecoder() 255 .onMalformedInput(CodingErrorAction.REPLACE) 256 .onUnmappableCharacter(CodingErrorAction.REPLACE) 257 .replaceWith("?"), 258 bufferSize, 259 writeImmediately); 260 // @formatter:on 261 } 262 263 /** 264 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 265 * when it overflows or when {@link #flush()} or {@link #close()} is called. 266 * 267 * @param writer the target {@link Writer} 268 * @param decoder the charset decoder 269 * @since 2.1 270 * @deprecated Use {@link #builder()} instead 271 */ 272 @Deprecated 273 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) { 274 this(writer, decoder, BUFFER_SIZE, false); 275 } 276 277 /** 278 * Constructs a new {@link WriterOutputStream}. 279 * 280 * @param writer the target {@link Writer} 281 * @param decoder the charset decoder 282 * @param bufferSize the size of the output buffer in number of characters 283 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the 284 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 285 * {@link #flush()} or {@link #close()} is called. 286 * @since 2.1 287 * @deprecated Use {@link #builder()} instead 288 */ 289 @Deprecated 290 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) { 291 checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset()); 292 this.writer = writer; 293 this.decoder = CharsetDecoders.toCharsetDecoder(decoder); 294 this.writeImmediately = writeImmediately; 295 this.decoderOut = CharBuffer.allocate(bufferSize); 296 } 297 298 /** 299 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 300 * when it overflows or when {@link #flush()} or {@link #close()} is called. 301 * 302 * @param writer the target {@link Writer} 303 * @param charsetName the name of the charset encoding 304 * @deprecated Use {@link #builder()} instead 305 */ 306 @Deprecated 307 public WriterOutputStream(final Writer writer, final String charsetName) { 308 this(writer, charsetName, BUFFER_SIZE, false); 309 } 310 311 /** 312 * Constructs a new {@link WriterOutputStream}. 313 * 314 * @param writer the target {@link Writer} 315 * @param charsetName the name of the charset encoding 316 * @param bufferSize the size of the output buffer in number of characters 317 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the 318 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 319 * {@link #flush()} or {@link #close()} is called. 320 * @deprecated Use {@link #builder()} instead 321 */ 322 @Deprecated 323 public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) { 324 this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately); 325 } 326 327 /** 328 * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that 329 * {@link Writer#close()} will be called. 330 * 331 * @throws IOException if an I/O error occurs. 332 */ 333 @Override 334 public void close() throws IOException { 335 processInput(true); 336 flushOutput(); 337 writer.close(); 338 } 339 340 /** 341 * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that 342 * {@link Writer#flush()} will be called. 343 * 344 * @throws IOException if an I/O error occurs. 345 */ 346 @Override 347 public void flush() throws IOException { 348 flushOutput(); 349 writer.flush(); 350 } 351 352 /** 353 * Flush the output. 354 * 355 * @throws IOException if an I/O error occurs. 356 */ 357 private void flushOutput() throws IOException { 358 if (decoderOut.position() > 0) { 359 writer.write(decoderOut.array(), 0, decoderOut.position()); 360 decoderOut.rewind(); 361 } 362 } 363 364 /** 365 * Decode the contents of the input ByteBuffer into a CharBuffer. 366 * 367 * @param endOfInput indicates end of input 368 * @throws IOException if an I/O error occurs. 369 */ 370 private void processInput(final boolean endOfInput) throws IOException { 371 // Prepare decoderIn for reading 372 decoderIn.flip(); 373 CoderResult coderResult; 374 while (true) { 375 coderResult = decoder.decode(decoderIn, decoderOut, endOfInput); 376 if (coderResult.isOverflow()) { 377 flushOutput(); 378 } else if (coderResult.isUnderflow()) { 379 break; 380 } else { 381 // The decoder is configured to replace malformed input and unmappable characters, 382 // so we should not get here. 383 throw new IOException("Unexpected coder result"); 384 } 385 } 386 // Discard the bytes that have been read 387 decoderIn.compact(); 388 } 389 390 /** 391 * Write bytes from the specified byte array to the stream. 392 * 393 * @param b the byte array containing the bytes to write 394 * @throws IOException if an I/O error occurs. 395 */ 396 @Override 397 public void write(final byte[] b) throws IOException { 398 write(b, 0, b.length); 399 } 400 401 /** 402 * Write bytes from the specified byte array to the stream. 403 * 404 * @param b the byte array containing the bytes to write 405 * @param off the start offset in the byte array 406 * @param len the number of bytes to write 407 * @throws IOException if an I/O error occurs. 408 */ 409 @Override 410 public void write(final byte[] b, int off, int len) throws IOException { 411 while (len > 0) { 412 final int c = Math.min(len, decoderIn.remaining()); 413 decoderIn.put(b, off, c); 414 processInput(false); 415 len -= c; 416 off += c; 417 } 418 if (writeImmediately) { 419 flushOutput(); 420 } 421 } 422 423 /** 424 * Write a single byte to the stream. 425 * 426 * @param b the byte to write 427 * @throws IOException if an I/O error occurs. 428 */ 429 @Override 430 public void write(final int b) throws IOException { 431 write(new byte[] { (byte) b }, 0, 1); 432 } 433}