001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.File; 020import java.io.FileNotFoundException; 021import java.io.FileOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024import java.io.OutputStreamWriter; 025import java.io.StringWriter; 026import java.io.Writer; 027import java.nio.charset.Charset; 028import java.nio.charset.StandardCharsets; 029import java.util.Locale; 030import java.util.Objects; 031import java.util.regex.Matcher; 032 033import org.apache.commons.io.Charsets; 034import org.apache.commons.io.IOUtils; 035import org.apache.commons.io.build.AbstractStreamBuilder; 036import org.apache.commons.io.input.XmlStreamReader; 037 038/** 039 * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream. 040 * <p> 041 * To build an instance, see {@link Builder}. 042 * </p> 043 * 044 * @see XmlStreamReader 045 * @since 2.0 046 */ 047public class XmlStreamWriter extends Writer { 048 049 /** 050 * Builds a new {@link XmlStreamWriter} instance. 051 * <p> 052 * For example: 053 * </p> 054 * <pre>{@code 055 * WriterOutputStream w = WriterOutputStream.builder() 056 * .setPath(path) 057 * .setCharset(StandardCharsets.UTF_8) 058 * .get();} 059 * </pre> 060 * 061 * @since 2.12.0 062 */ 063 public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> { 064 065 public Builder() { 066 setCharsetDefault(StandardCharsets.UTF_8); 067 setCharset(StandardCharsets.UTF_8); 068 } 069 070 /** 071 * Constructs a new instance. 072 * <p> 073 * This builder use the aspect OutputStream, OpenOption[], and Charset. 074 * </p> 075 * <p> 076 * You must provide an origin that can be converted to an OutputStream by this builder, otherwise, this call will throw an 077 * {@link UnsupportedOperationException}. 078 * </p> 079 * 080 * @return a new instance. 081 * @throws UnsupportedOperationException if the origin cannot provide an OutputStream. 082 * @throws IOException if an I/O error occurs. 083 * @see #getOutputStream() 084 */ 085 @SuppressWarnings("resource") 086 @Override 087 public XmlStreamWriter get() throws IOException { 088 return new XmlStreamWriter(getOutputStream(), getCharset()); 089 } 090 091 } 092 093 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 094 095 /** 096 * Constructs a new {@link Builder}. 097 * 098 * @return a new {@link Builder}. 099 * @since 2.12.0 100 */ 101 public static Builder builder() { 102 return new Builder(); 103 } 104 105 private final OutputStream out; 106 107 private final Charset defaultCharset; 108 109 private StringWriter prologWriter = new StringWriter(BUFFER_SIZE); 110 111 private Writer writer; 112 113 private Charset charset; 114 115 /** 116 * Constructs a new XML stream writer for the specified file 117 * with a default encoding of UTF-8. 118 * 119 * @param file The file to write to 120 * @throws FileNotFoundException if there is an error creating or 121 * opening the file 122 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 123 */ 124 @Deprecated 125 public XmlStreamWriter(final File file) throws FileNotFoundException { 126 this(file, null); 127 } 128 129 /** 130 * Constructs a new XML stream writer for the specified file 131 * with the specified default encoding. 132 * 133 * @param file The file to write to 134 * @param defaultEncoding The default encoding if not encoding could be detected 135 * @throws FileNotFoundException if there is an error creating or 136 * opening the file 137 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 138 */ 139 @Deprecated 140 @SuppressWarnings("resource") 141 public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException { 142 this(new FileOutputStream(file), defaultEncoding); 143 } 144 145 /** 146 * Constructs a new XML stream writer for the specified output stream 147 * with a default encoding of UTF-8. 148 * 149 * @param out The output stream 150 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 151 */ 152 @Deprecated 153 public XmlStreamWriter(final OutputStream out) { 154 this(out, StandardCharsets.UTF_8); 155 } 156 157 /** 158 * Constructs a new XML stream writer for the specified output stream 159 * with the specified default encoding. 160 * 161 * @param out The output stream 162 * @param defaultEncoding The default encoding if not encoding could be detected 163 */ 164 private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) { 165 this.out = out; 166 this.defaultCharset = Objects.requireNonNull(defaultEncoding); 167 } 168 169 /** 170 * Constructs a new XML stream writer for the specified output stream 171 * with the specified default encoding. 172 * 173 * @param out The output stream 174 * @param defaultEncoding The default encoding if not encoding could be detected 175 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 176 */ 177 @Deprecated 178 public XmlStreamWriter(final OutputStream out, final String defaultEncoding) { 179 this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8)); 180 } 181 182 /** 183 * Closes the underlying writer. 184 * 185 * @throws IOException if an error occurs closing the underlying writer 186 */ 187 @Override 188 public void close() throws IOException { 189 if (writer == null) { 190 charset = defaultCharset; 191 writer = new OutputStreamWriter(out, charset); 192 writer.write(prologWriter.toString()); 193 } 194 writer.close(); 195 } 196 197 /** 198 * Detects the encoding. 199 * 200 * @param cbuf the buffer to write the characters from 201 * @param off The start offset 202 * @param len The number of characters to write 203 * @throws IOException if an error occurs detecting the encoding 204 */ 205 private void detectEncoding(final char[] cbuf, final int off, final int len) 206 throws IOException { 207 int size = len; 208 final StringBuffer xmlProlog = prologWriter.getBuffer(); 209 if (xmlProlog.length() + len > BUFFER_SIZE) { 210 size = BUFFER_SIZE - xmlProlog.length(); 211 } 212 prologWriter.write(cbuf, off, size); 213 214 // try to determine encoding 215 if (xmlProlog.length() >= 5) { 216 if (xmlProlog.substring(0, 5).equals("<?xml")) { 217 // try to extract encoding from XML prolog 218 final int xmlPrologEnd = xmlProlog.indexOf("?>"); 219 if (xmlPrologEnd > 0) { 220 // ok, full XML prolog written: let's extract encoding 221 final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0, 222 xmlPrologEnd)); 223 if (m.find()) { 224 final String encName = m.group(1).toUpperCase(Locale.ROOT); 225 charset = Charset.forName(encName.substring(1, encName.length() - 1)); 226 } else { 227 // no encoding found in XML prolog: using default 228 // encoding 229 charset = defaultCharset; 230 } 231 } else if (xmlProlog.length() >= BUFFER_SIZE) { 232 // no encoding found in first characters: using default 233 // encoding 234 charset = defaultCharset; 235 } 236 } else { 237 // no XML prolog: using default encoding 238 charset = defaultCharset; 239 } 240 if (charset != null) { 241 // encoding has been chosen: let's do it 242 prologWriter = null; 243 writer = new OutputStreamWriter(out, charset); 244 writer.write(xmlProlog.toString()); 245 if (len > size) { 246 writer.write(cbuf, off + size, len - size); 247 } 248 } 249 } 250 } 251 252 /** 253 * Flushes the underlying writer. 254 * 255 * @throws IOException if an error occurs flushing the underlying writer 256 */ 257 @Override 258 public void flush() throws IOException { 259 if (writer != null) { 260 writer.flush(); 261 } 262 } 263 264 /** 265 * Returns the default encoding. 266 * 267 * @return the default encoding 268 */ 269 public String getDefaultEncoding() { 270 return defaultCharset.name(); 271 } 272 273 /** 274 * Returns the detected encoding. 275 * 276 * @return the detected encoding 277 */ 278 public String getEncoding() { 279 return charset.name(); 280 } 281 282 /** 283 * Writes the characters to the underlying writer, detecting encoding. 284 * 285 * @param cbuf the buffer to write the characters from 286 * @param off The start offset 287 * @param len The number of characters to write 288 * @throws IOException if an error occurs detecting the encoding 289 */ 290 @Override 291 public void write(final char[] cbuf, final int off, final int len) throws IOException { 292 if (prologWriter != null) { 293 detectEncoding(cbuf, off, len); 294 } else { 295 writer.write(cbuf, off, len); 296 } 297 } 298}