001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.net; 019 020import java.io.ByteArrayOutputStream; 021import java.io.UnsupportedEncodingException; 022import java.util.BitSet; 023 024import org.apache.commons.codec.BinaryDecoder; 025import org.apache.commons.codec.BinaryEncoder; 026import org.apache.commons.codec.CharEncoding; 027import org.apache.commons.codec.DecoderException; 028import org.apache.commons.codec.EncoderException; 029import org.apache.commons.codec.StringDecoder; 030import org.apache.commons.codec.StringEncoder; 031import org.apache.commons.codec.binary.StringUtils; 032 033/** 034 * Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding. 035 * <p> 036 * This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and 037 * {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below 038 * 1.4 rely on the platform's default charset encoding. 039 * </p> 040 * <p> 041 * This class is thread-safe as of 1.11 042 * </p> 043 * 044 * @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a> 045 * of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification</a> 046 * 047 * @since 1.2 048 */ 049public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { 050 051 /** 052 * The default charset used for string decoding and encoding. 053 * 054 * @deprecated TODO: This field will be changed to a private final Charset in 2.0. (CODEC-126) 055 */ 056 @Deprecated 057 protected volatile String charset; // added volatile: see CODEC-232 058 059 /** 060 * Release 1.5 made this field final. 061 */ 062 protected static final byte ESCAPE_CHAR = '%'; 063 064 /** 065 * BitSet of www-form-url safe characters. 066 * This is a copy of the internal BitSet which is now used for the conversion. 067 * Changes to this field are ignored. 068 * @deprecated 1.11 Will be removed in 2.0 (CODEC-230) 069 */ 070 @Deprecated 071 protected static final BitSet WWW_FORM_URL; 072 073 private static final BitSet WWW_FORM_URL_SAFE = new BitSet(256); 074 075 // Static initializer for www_form_url 076 static { 077 // alpha characters 078 for (int i = 'a'; i <= 'z'; i++) { 079 WWW_FORM_URL_SAFE.set(i); 080 } 081 for (int i = 'A'; i <= 'Z'; i++) { 082 WWW_FORM_URL_SAFE.set(i); 083 } 084 // numeric characters 085 for (int i = '0'; i <= '9'; i++) { 086 WWW_FORM_URL_SAFE.set(i); 087 } 088 // special chars 089 WWW_FORM_URL_SAFE.set('-'); 090 WWW_FORM_URL_SAFE.set('_'); 091 WWW_FORM_URL_SAFE.set('.'); 092 WWW_FORM_URL_SAFE.set('*'); 093 // blank to be replaced with + 094 WWW_FORM_URL_SAFE.set(' '); 095 096 // Create a copy in case anyone (ab)uses it 097 WWW_FORM_URL = (BitSet) WWW_FORM_URL_SAFE.clone(); 098 } 099 100 101 /** 102 * Default constructor. 103 */ 104 public URLCodec() { 105 this(CharEncoding.UTF_8); 106 } 107 108 /** 109 * Constructor which allows for the selection of a default charset. 110 * 111 * @param charset the default string charset to use. 112 */ 113 public URLCodec(final String charset) { 114 super(); 115 this.charset = charset; 116 } 117 118 /** 119 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 120 * 121 * @param urlsafe 122 * bitset of characters deemed URL safe 123 * @param bytes 124 * array of bytes to convert to URL safe characters 125 * @return array of bytes containing URL safe characters 126 */ 127 public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) { 128 if (bytes == null) { 129 return null; 130 } 131 if (urlsafe == null) { 132 urlsafe = WWW_FORM_URL_SAFE; 133 } 134 135 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 136 for (final byte c : bytes) { 137 int b = c; 138 if (b < 0) { 139 b = 256 + b; 140 } 141 if (urlsafe.get(b)) { 142 if (b == ' ') { 143 b = '+'; 144 } 145 buffer.write(b); 146 } else { 147 buffer.write(ESCAPE_CHAR); 148 final char hex1 = Utils.hexDigit(b >> 4); 149 final char hex2 = Utils.hexDigit(b); 150 buffer.write(hex1); 151 buffer.write(hex2); 152 } 153 } 154 return buffer.toByteArray(); 155 } 156 157 /** 158 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted 159 * back to their original representation. 160 * 161 * @param bytes 162 * array of URL safe characters 163 * @return array of original bytes 164 * @throws DecoderException 165 * Thrown if URL decoding is unsuccessful 166 */ 167 public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException { 168 if (bytes == null) { 169 return null; 170 } 171 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 172 for (int i = 0; i < bytes.length; i++) { 173 final int b = bytes[i]; 174 if (b == '+') { 175 buffer.write(' '); 176 } else if (b == ESCAPE_CHAR) { 177 try { 178 final int u = Utils.digit16(bytes[++i]); 179 final int l = Utils.digit16(bytes[++i]); 180 buffer.write((char) ((u << 4) + l)); 181 } catch (final ArrayIndexOutOfBoundsException e) { 182 throw new DecoderException("Invalid URL encoding: ", e); 183 } 184 } else { 185 buffer.write(b); 186 } 187 } 188 return buffer.toByteArray(); 189 } 190 191 /** 192 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 193 * 194 * @param bytes 195 * array of bytes to convert to URL safe characters 196 * @return array of bytes containing URL safe characters 197 */ 198 @Override 199 public byte[] encode(final byte[] bytes) { 200 return encodeUrl(WWW_FORM_URL_SAFE, bytes); 201 } 202 203 204 /** 205 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted 206 * back to their original representation. 207 * 208 * @param bytes 209 * array of URL safe characters 210 * @return array of original bytes 211 * @throws DecoderException 212 * Thrown if URL decoding is unsuccessful 213 */ 214 @Override 215 public byte[] decode(final byte[] bytes) throws DecoderException { 216 return decodeUrl(bytes); 217 } 218 219 /** 220 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped. 221 * 222 * @param str 223 * string to convert to a URL safe form 224 * @param charsetName 225 * the charset for str 226 * @return URL safe string 227 * @throws UnsupportedEncodingException 228 * Thrown if charset is not supported 229 */ 230 public String encode(final String str, final String charsetName) throws UnsupportedEncodingException { 231 if (str == null) { 232 return null; 233 } 234 return StringUtils.newStringUsAscii(encode(str.getBytes(charsetName))); 235 } 236 237 /** 238 * Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped. 239 * 240 * @param str 241 * string to convert to a URL safe form 242 * @return URL safe string 243 * @throws EncoderException 244 * Thrown if URL encoding is unsuccessful 245 * 246 * @see #getDefaultCharset() 247 */ 248 @Override 249 public String encode(final String str) throws EncoderException { 250 if (str == null) { 251 return null; 252 } 253 try { 254 return encode(str, getDefaultCharset()); 255 } catch (final UnsupportedEncodingException e) { 256 throw new EncoderException(e.getMessage(), e); 257 } 258 } 259 260 261 /** 262 * Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted 263 * back to their original representation. 264 * 265 * @param str 266 * URL safe string to convert into its original form 267 * @param charsetName 268 * the original string charset 269 * @return original string 270 * @throws DecoderException 271 * Thrown if URL decoding is unsuccessful 272 * @throws UnsupportedEncodingException 273 * Thrown if charset is not supported 274 */ 275 public String decode(final String str, final String charsetName) 276 throws DecoderException, UnsupportedEncodingException { 277 if (str == null) { 278 return null; 279 } 280 return new String(decode(StringUtils.getBytesUsAscii(str)), charsetName); 281 } 282 283 /** 284 * Decodes a URL safe string into its original form using the default string charset. Escaped characters are 285 * converted back to their original representation. 286 * 287 * @param str 288 * URL safe string to convert into its original form 289 * @return original string 290 * @throws DecoderException 291 * Thrown if URL decoding is unsuccessful 292 * @see #getDefaultCharset() 293 */ 294 @Override 295 public String decode(final String str) throws DecoderException { 296 if (str == null) { 297 return null; 298 } 299 try { 300 return decode(str, getDefaultCharset()); 301 } catch (final UnsupportedEncodingException e) { 302 throw new DecoderException(e.getMessage(), e); 303 } 304 } 305 306 /** 307 * Encodes an object into its URL safe form. Unsafe characters are escaped. 308 * 309 * @param obj 310 * string to convert to a URL safe form 311 * @return URL safe object 312 * @throws EncoderException 313 * Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful 314 */ 315 @Override 316 public Object encode(final Object obj) throws EncoderException { 317 if (obj == null) { 318 return null; 319 } else if (obj instanceof byte[]) { 320 return encode((byte[])obj); 321 } else if (obj instanceof String) { 322 return encode((String)obj); 323 } else { 324 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded"); 325 326 } 327 } 328 329 /** 330 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original 331 * representation. 332 * 333 * @param obj 334 * URL safe object to convert into its original form 335 * @return original object 336 * @throws DecoderException 337 * Thrown if the argument is not a {@code String} or {@code byte[]}. Thrown if a failure 338 * condition is encountered during the decode process. 339 */ 340 @Override 341 public Object decode(final Object obj) throws DecoderException { 342 if (obj == null) { 343 return null; 344 } else if (obj instanceof byte[]) { 345 return decode((byte[]) obj); 346 } else if (obj instanceof String) { 347 return decode((String) obj); 348 } else { 349 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded"); 350 351 } 352 } 353 354 /** 355 * The default charset used for string decoding and encoding. 356 * 357 * @return the default string charset. 358 */ 359 public String getDefaultCharset() { 360 return this.charset; 361 } 362 363 /** 364 * The {@code String} encoding used for decoding and encoding. 365 * 366 * @return Returns the encoding. 367 * 368 * @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0. 369 */ 370 @Deprecated 371 public String getEncoding() { 372 return this.charset; 373 } 374 375}