001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.File;
020import java.io.FileNotFoundException;
021import java.io.FileOutputStream;
022import java.io.IOException;
023import java.io.OutputStream;
024import java.io.OutputStreamWriter;
025import java.io.StringWriter;
026import java.io.Writer;
027import java.nio.charset.Charset;
028import java.nio.charset.StandardCharsets;
029import java.util.Locale;
030import java.util.Objects;
031import java.util.regex.Matcher;
032
033import org.apache.commons.io.Charsets;
034import org.apache.commons.io.IOUtils;
035import org.apache.commons.io.build.AbstractStreamBuilder;
036import org.apache.commons.io.input.XmlStreamReader;
037
038/**
039 * Character stream that handles all the necessary work to figure out the
040 * charset encoding of the XML document written to the stream.
041 *
042 * @see XmlStreamReader
043 * @since 2.0
044 */
045public class XmlStreamWriter extends Writer {
046
047    /**
048     * Builds a new {@link XmlStreamWriter} instance.
049     * <p>
050     * For example:
051     * </p>
052     * <pre>{@code
053     * WriterOutputStream w = WriterOutputStream.builder()
054     *   .setPath(path)
055     *   .setCharset(StandardCharsets.UTF_8)
056     *   .get()}
057     * </pre>
058     * <p>
059     * @since 2.12.0
060     */
061    public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> {
062
063        public Builder() {
064            setCharsetDefault(StandardCharsets.UTF_8);
065            setCharset(StandardCharsets.UTF_8);
066        }
067
068        /**
069         * Constructs a new instance.
070         *
071         * @throws UnsupportedOperationException if the origin cannot be converted to an OutputStream.
072         */
073        @SuppressWarnings("resource")
074        @Override
075        public XmlStreamWriter get() throws IOException {
076            return new XmlStreamWriter(getOrigin().getOutputStream(), getCharset());
077        }
078
079    }
080
081    private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
082
083    /**
084     * Constructs a new {@link Builder}.
085     *
086     * @return a new {@link Builder}.
087     * @since 2.12.0
088     */
089    public static Builder builder() {
090        return new Builder();
091    }
092
093    private final OutputStream out;
094
095    private final Charset defaultCharset;
096
097    private StringWriter prologWriter = new StringWriter(BUFFER_SIZE);
098
099    private Writer writer;
100
101    private Charset charset;
102
103    /**
104     * Constructs a new XML stream writer for the specified file
105     * with a default encoding of UTF-8.
106     *
107     * @param file The file to write to
108     * @throws FileNotFoundException if there is an error creating or
109     * opening the file
110     * @deprecated Use {@link #builder()}
111     */
112    @Deprecated
113    public XmlStreamWriter(final File file) throws FileNotFoundException {
114        this(file, null);
115    }
116
117    /**
118     * Constructs a new XML stream writer for the specified file
119     * with the specified default encoding.
120     *
121     * @param file The file to write to
122     * @param defaultEncoding The default encoding if not encoding could be detected
123     * @throws FileNotFoundException if there is an error creating or
124     * opening the file
125     * @deprecated Use {@link #builder()}
126     */
127    @Deprecated
128    @SuppressWarnings("resource")
129    public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
130        this(new FileOutputStream(file), defaultEncoding);
131    }
132
133    /**
134     * Constructs a new XML stream writer for the specified output stream
135     * with a default encoding of UTF-8.
136     *
137     * @param out The output stream
138     * @deprecated Use {@link #builder()}
139     */
140    @Deprecated
141    public XmlStreamWriter(final OutputStream out) {
142        this(out, StandardCharsets.UTF_8);
143    }
144
145    /**
146     * Constructs a new XML stream writer for the specified output stream
147     * with the specified default encoding.
148     *
149     * @param out The output stream
150     * @param defaultEncoding The default encoding if not encoding could be detected
151     */
152    private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) {
153        this.out = out;
154        this.defaultCharset = Objects.requireNonNull(defaultEncoding);
155    }
156
157    /**
158     * Constructs a new XML stream writer for the specified output stream
159     * with the specified default encoding.
160     *
161     * @param out The output stream
162     * @param defaultEncoding The default encoding if not encoding could be detected
163     * @deprecated Use {@link #builder()}
164     */
165    @Deprecated
166    public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
167        this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8));
168    }
169
170    /**
171     * Closes the underlying writer.
172     *
173     * @throws IOException if an error occurs closing the underlying writer
174     */
175    @Override
176    public void close() throws IOException {
177        if (writer == null) {
178            charset = defaultCharset;
179            writer = new OutputStreamWriter(out, charset);
180            writer.write(prologWriter.toString());
181        }
182        writer.close();
183    }
184
185    /**
186     * Detects the encoding.
187     *
188     * @param cbuf the buffer to write the characters from
189     * @param off The start offset
190     * @param len The number of characters to write
191     * @throws IOException if an error occurs detecting the encoding
192     */
193    private void detectEncoding(final char[] cbuf, final int off, final int len)
194            throws IOException {
195        int size = len;
196        final StringBuffer xmlProlog = prologWriter.getBuffer();
197        if (xmlProlog.length() + len > BUFFER_SIZE) {
198            size = BUFFER_SIZE - xmlProlog.length();
199        }
200        prologWriter.write(cbuf, off, size);
201
202        // try to determine encoding
203        if (xmlProlog.length() >= 5) {
204            if (xmlProlog.substring(0, 5).equals("<?xml")) {
205                // try to extract encoding from XML prolog
206                final int xmlPrologEnd = xmlProlog.indexOf("?>");
207                if (xmlPrologEnd > 0) {
208                    // ok, full XML prolog written: let's extract encoding
209                    final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0,
210                            xmlPrologEnd));
211                    if (m.find()) {
212                        final String encName = m.group(1).toUpperCase(Locale.ROOT);
213                        charset = Charset.forName(encName.substring(1, encName.length() - 1));
214                    } else {
215                        // no encoding found in XML prolog: using default
216                        // encoding
217                        charset = defaultCharset;
218                    }
219                } else if (xmlProlog.length() >= BUFFER_SIZE) {
220                    // no encoding found in first characters: using default
221                    // encoding
222                    charset = defaultCharset;
223                }
224            } else {
225                // no XML prolog: using default encoding
226                charset = defaultCharset;
227            }
228            if (charset != null) {
229                // encoding has been chosen: let's do it
230                prologWriter = null;
231                writer = new OutputStreamWriter(out, charset);
232                writer.write(xmlProlog.toString());
233                if (len > size) {
234                    writer.write(cbuf, off + size, len - size);
235                }
236            }
237        }
238    }
239
240    /**
241     * Flushes the underlying writer.
242     *
243     * @throws IOException if an error occurs flushing the underlying writer
244     */
245    @Override
246    public void flush() throws IOException {
247        if (writer != null) {
248            writer.flush();
249        }
250    }
251
252    /**
253     * Returns the default encoding.
254     *
255     * @return the default encoding
256     */
257    public String getDefaultEncoding() {
258        return defaultCharset.name();
259    }
260
261    /**
262     * Returns the detected encoding.
263     *
264     * @return the detected encoding
265     */
266    public String getEncoding() {
267        return charset.name();
268    }
269
270    /**
271     * Writes the characters to the underlying writer, detecting encoding.
272     *
273     * @param cbuf the buffer to write the characters from
274     * @param off The start offset
275     * @param len The number of characters to write
276     * @throws IOException if an error occurs detecting the encoding
277     */
278    @Override
279    public void write(final char[] cbuf, final int off, final int len) throws IOException {
280        if (prologWriter != null) {
281            detectEncoding(cbuf, off, len);
282        } else {
283            writer.write(cbuf, off, len);
284        }
285    }
286}