001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.math.BigInteger;
021import java.util.Objects;
022
023import org.apache.commons.codec.CodecPolicy;
024
025/**
026 * Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
027 *
028 * <p>
029 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
030 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
031 * </p>
032 * <p>
033 * The class can be parameterized in the following manner with various constructors:
034 * </p>
035 * <ul>
036 * <li>URL-safe mode: Default off.</li>
037 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of
038 * 4 in the encoded data.
039 * <li>Line separator: Default is CRLF ("\r\n")</li>
040 * </ul>
041 * <p>
042 * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes.
043 * </p>
044 * <p>
045 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only
046 * encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252,
047 * UTF-8, etc).
048 * </p>
049 * <p>
050 * This class is thread-safe.
051 * </p>
052 *
053 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
054 * @since 1.0
055 */
056public class Base64 extends BaseNCodec {
057
058    /**
059     * BASE32 characters are 6 bits in length.
060     * They are formed by taking a block of 3 octets to form a 24-bit string,
061     * which is converted into 4 BASE64 characters.
062     */
063    private static final int BITS_PER_ENCODED_BYTE = 6;
064    private static final int BYTES_PER_UNENCODED_BLOCK = 3;
065    private static final int BYTES_PER_ENCODED_BLOCK = 4;
066
067    /**
068     * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet"
069     * equivalents as specified in Table 1 of RFC 2045.
070     *
071     * Thanks to "commons" project in ws.apache.org for this code.
072     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
073     */
074    private static final byte[] STANDARD_ENCODE_TABLE = {
075            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
076            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
077            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
078            'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
079            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
080    };
081
082    /**
083     * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
084     * changed to - and _ to make the encoded Base64 results more URL-SAFE.
085     * This table is only used when the Base64's mode is set to URL-SAFE.
086     */
087    private static final byte[] URL_SAFE_ENCODE_TABLE = {
088            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
089            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
090            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
091            'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
092            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
093    };
094
095    /**
096     * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified
097     * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
098     * alphabet but fall within the bounds of the array are translated to -1.
099     *
100     * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both
101     * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit).
102     *
103     * Thanks to "commons" project in ws.apache.org for this code.
104     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
105     */
106    private static final byte[] DECODE_TABLE = {
107        //   0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
108            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
109            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
110            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - /
111            52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
112            -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
113            15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _
114            -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
115            41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51                      // 70-7a p-z
116    };
117
118    /**
119     * Base64 uses 6-bit fields.
120     */
121    /** Mask used to extract 6 bits, used when encoding */
122    private static final int MASK_6BITS = 0x3f;
123    /** Mask used to extract 4 bits, used when decoding final trailing character. */
124    private static final int MASK_4BITS = 0xf;
125    /** Mask used to extract 2 bits, used when decoding final trailing character. */
126    private static final int MASK_2BITS = 0x3;
127
128    // The static final fields above are used for the original static byte[] methods on Base64.
129    // The private member fields below are used with the new streaming approach, which requires
130    // some state be preserved between calls of encode() and decode().
131
132    /**
133     * Decodes Base64 data into octets.
134     * <p>
135     * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
136     * </p>
137     *
138     * @param base64Data
139     *            Byte array containing Base64 data
140     * @return Array containing decoded data.
141     */
142    public static byte[] decodeBase64(final byte[] base64Data) {
143        return new Base64().decode(base64Data);
144    }
145
146    /**
147     * Decodes a Base64 String into octets.
148     * <p>
149     * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
150     * </p>
151     *
152     * @param base64String
153     *            String containing Base64 data
154     * @return Array containing decoded data.
155     * @since 1.4
156     */
157    public static byte[] decodeBase64(final String base64String) {
158        return new Base64().decode(base64String);
159    }
160
161    // Implementation of integer encoding used for crypto
162    /**
163     * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
164     *
165     * @param pArray
166     *            a byte array containing base64 character data
167     * @return A BigInteger
168     * @since 1.4
169     */
170    public static BigInteger decodeInteger(final byte[] pArray) {
171        return new BigInteger(1, decodeBase64(pArray));
172    }
173
174    /**
175     * Encodes binary data using the base64 algorithm but does not chunk the output.
176     *
177     * @param binaryData
178     *            binary data to encode
179     * @return byte[] containing Base64 characters in their UTF-8 representation.
180     */
181    public static byte[] encodeBase64(final byte[] binaryData) {
182        return encodeBase64(binaryData, false);
183    }
184
185    /**
186     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
187     *
188     * @param binaryData
189     *            Array containing binary data to encode.
190     * @param isChunked
191     *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
192     * @return Base64-encoded data.
193     * @throws IllegalArgumentException
194     *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
195     */
196    public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) {
197        return encodeBase64(binaryData, isChunked, false);
198    }
199
200    /**
201     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
202     *
203     * @param binaryData
204     *            Array containing binary data to encode.
205     * @param isChunked
206     *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
207     * @param urlSafe
208     *            if {@code true} this encoder will emit - and _ instead of the usual + and / characters.
209     *            <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
210     * @return Base64-encoded data.
211     * @throws IllegalArgumentException
212     *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
213     * @since 1.4
214     */
215    public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) {
216        return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
217    }
218
219    /**
220     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
221     *
222     * @param binaryData
223     *            Array containing binary data to encode.
224     * @param isChunked
225     *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
226     * @param urlSafe
227     *            if {@code true} this encoder will emit - and _ instead of the usual + and / characters.
228     *            <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
229     * @param maxResultSize
230     *            The maximum result size to accept.
231     * @return Base64-encoded data.
232     * @throws IllegalArgumentException
233     *             Thrown when the input array needs an output array bigger than maxResultSize
234     * @since 1.4
235     */
236    public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked,
237                                      final boolean urlSafe, final int maxResultSize) {
238        if (binaryData == null || binaryData.length == 0) {
239            return binaryData;
240        }
241
242        // Create this so can use the super-class method
243        // Also ensures that the same roundings are performed by the ctor and the code
244        final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
245        final long len = b64.getEncodedLength(binaryData);
246        if (len > maxResultSize) {
247            throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
248                len +
249                ") than the specified maximum size of " +
250                maxResultSize);
251        }
252
253        return b64.encode(binaryData);
254    }
255
256    /**
257     * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
258     *
259     * @param binaryData
260     *            binary data to encode
261     * @return Base64 characters chunked in 76 character blocks
262     */
263    public static byte[] encodeBase64Chunked(final byte[] binaryData) {
264        return encodeBase64(binaryData, true);
265    }
266
267    /**
268     * Encodes binary data using the base64 algorithm but does not chunk the output.
269     *
270     * NOTE:  We changed the behavior of this method from multi-line chunking (commons-codec-1.4) to
271     * single-line non-chunking (commons-codec-1.5).
272     *
273     * @param binaryData
274     *            binary data to encode
275     * @return String containing Base64 characters.
276     * @since 1.4 (NOTE:  1.4 chunked the output, whereas 1.5 does not).
277     */
278    public static String encodeBase64String(final byte[] binaryData) {
279        return StringUtils.newStringUsAscii(encodeBase64(binaryData, false));
280    }
281
282    /**
283     * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
284     * url-safe variation emits - and _ instead of + and / characters.
285     * <b>Note: no padding is added.</b>
286     * @param binaryData
287     *            binary data to encode
288     * @return byte[] containing Base64 characters in their UTF-8 representation.
289     * @since 1.4
290     */
291    public static byte[] encodeBase64URLSafe(final byte[] binaryData) {
292        return encodeBase64(binaryData, false, true);
293    }
294
295    /**
296     * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
297     * url-safe variation emits - and _ instead of + and / characters.
298     * <b>Note: no padding is added.</b>
299     * @param binaryData
300     *            binary data to encode
301     * @return String containing Base64 characters
302     * @since 1.4
303     */
304    public static String encodeBase64URLSafeString(final byte[] binaryData) {
305        return StringUtils.newStringUsAscii(encodeBase64(binaryData, false, true));
306    }
307
308    /**
309     * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
310     *
311     * @param bigInteger
312     *            a BigInteger
313     * @return A byte array containing base64 character data
314     * @throws NullPointerException
315     *             if null is passed in
316     * @since 1.4
317     */
318    public static byte[] encodeInteger(final BigInteger bigInteger) {
319        Objects.requireNonNull(bigInteger, "bigInteger");
320        return encodeBase64(toIntegerBytes(bigInteger), false);
321    }
322
323    /**
324     * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
325     * method treats whitespace as valid.
326     *
327     * @param arrayOctet
328     *            byte array to test
329     * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
330     *         {@code false}, otherwise
331     * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
332     */
333    @Deprecated
334    public static boolean isArrayByteBase64(final byte[] arrayOctet) {
335        return isBase64(arrayOctet);
336    }
337
338    /**
339     * Returns whether or not the {@code octet} is in the base 64 alphabet.
340     *
341     * @param octet
342     *            The value to test
343     * @return {@code true} if the value is defined in the the base 64 alphabet, {@code false} otherwise.
344     * @since 1.4
345     */
346    public static boolean isBase64(final byte octet) {
347        return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1);
348    }
349
350    /**
351     * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
352     * method treats whitespace as valid.
353     *
354     * @param arrayOctet
355     *            byte array to test
356     * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
357     *         {@code false}, otherwise
358     * @since 1.5
359     */
360    public static boolean isBase64(final byte[] arrayOctet) {
361        for (int i = 0; i < arrayOctet.length; i++) {
362            if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) {
363                return false;
364            }
365        }
366        return true;
367    }
368
369    /**
370     * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the
371     * method treats whitespace as valid.
372     *
373     * @param base64
374     *            String to test
375     * @return {@code true} if all characters in the String are valid characters in the Base64 alphabet or if
376     *         the String is empty; {@code false}, otherwise
377     *  @since 1.5
378     */
379    public static boolean isBase64(final String base64) {
380        return isBase64(StringUtils.getBytesUtf8(base64));
381    }
382
383    /**
384     * Returns a byte-array representation of a {@code BigInteger} without sign bit.
385     *
386     * @param bigInt
387     *            {@code BigInteger} to be converted
388     * @return a byte array representation of the BigInteger parameter
389     */
390    static byte[] toIntegerBytes(final BigInteger bigInt) {
391        int bitlen = bigInt.bitLength();
392        // round bitlen
393        bitlen = ((bitlen + 7) >> 3) << 3;
394        final byte[] bigBytes = bigInt.toByteArray();
395
396        if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) {
397            return bigBytes;
398        }
399        // set up params for copying everything but sign bit
400        int startSrc = 0;
401        int len = bigBytes.length;
402
403        // if bigInt is exactly byte-aligned, just skip signbit in copy
404        if ((bigInt.bitLength() % 8) == 0) {
405            startSrc = 1;
406            len--;
407        }
408        final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
409        final byte[] resizedBytes = new byte[bitlen / 8];
410        System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
411        return resizedBytes;
412    }
413
414    /**
415     * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able
416     * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch
417     * between the two modes.
418     */
419    private final byte[] encodeTable;
420
421    // Only one decode table currently; keep for consistency with Base32 code
422    private final byte[] decodeTable = DECODE_TABLE;
423
424    /**
425     * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
426     */
427    private final byte[] lineSeparator;
428
429    /**
430     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
431     * {@code decodeSize = 3 + lineSeparator.length;}
432     */
433    private final int decodeSize;
434
435    /**
436     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
437     * {@code encodeSize = 4 + lineSeparator.length;}
438     */
439    private final int encodeSize;
440
441    /**
442     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
443     * <p>
444     * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE.
445     * </p>
446     *
447     * <p>
448     * When decoding all variants are supported.
449     * </p>
450     */
451    public Base64() {
452        this(0);
453    }
454
455    /**
456     * Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
457     * <p>
458     * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
459     * </p>
460     *
461     * <p>
462     * When decoding all variants are supported.
463     * </p>
464     *
465     * @param urlSafe
466     *            if {@code true}, URL-safe encoding is used. In most cases this should be set to
467     *            {@code false}.
468     * @since 1.4
469     */
470    public Base64(final boolean urlSafe) {
471        this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
472    }
473
474    /**
475     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
476     * <p>
477     * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is
478     * STANDARD_ENCODE_TABLE.
479     * </p>
480     * <p>
481     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
482     * </p>
483     * <p>
484     * When decoding all variants are supported.
485     * </p>
486     *
487     * @param lineLength
488     *            Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
489     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
490     *            decoding.
491     * @since 1.4
492     */
493    public Base64(final int lineLength) {
494        this(lineLength, CHUNK_SEPARATOR);
495    }
496
497    /**
498     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
499     * <p>
500     * When encoding the line length and line separator are given in the constructor, and the encoding table is
501     * STANDARD_ENCODE_TABLE.
502     * </p>
503     * <p>
504     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
505     * </p>
506     * <p>
507     * When decoding all variants are supported.
508     * </p>
509     *
510     * @param lineLength
511     *            Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
512     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
513     *            decoding.
514     * @param lineSeparator
515     *            Each line of encoded data will end with this sequence of bytes.
516     * @throws IllegalArgumentException
517     *             Thrown when the provided lineSeparator included some base64 characters.
518     * @since 1.4
519     */
520    public Base64(final int lineLength, final byte[] lineSeparator) {
521        this(lineLength, lineSeparator, false);
522    }
523
524    /**
525     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
526     * <p>
527     * When encoding the line length and line separator are given in the constructor, and the encoding table is
528     * STANDARD_ENCODE_TABLE.
529     * </p>
530     * <p>
531     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
532     * </p>
533     * <p>
534     * When decoding all variants are supported.
535     * </p>
536     *
537     * @param lineLength
538     *            Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
539     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
540     *            decoding.
541     * @param lineSeparator
542     *            Each line of encoded data will end with this sequence of bytes.
543     * @param urlSafe
544     *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
545     *            operations. Decoding seamlessly handles both modes.
546     *            <b>Note: no padding is added when using the URL-safe alphabet.</b>
547     * @throws IllegalArgumentException
548     *             Thrown when the {@code lineSeparator} contains Base64 characters.
549     * @since 1.4
550     */
551    public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) {
552        this(lineLength, lineSeparator, urlSafe, DECODING_POLICY_DEFAULT);
553    }
554
555    /**
556     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
557     * <p>
558     * When encoding the line length and line separator are given in the constructor, and the encoding table is
559     * STANDARD_ENCODE_TABLE.
560     * </p>
561     * <p>
562     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
563     * </p>
564     * <p>
565     * When decoding all variants are supported.
566     * </p>
567     *
568     * @param lineLength
569     *            Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
570     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
571     *            decoding.
572     * @param lineSeparator
573     *            Each line of encoded data will end with this sequence of bytes.
574     * @param urlSafe
575     *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
576     *            operations. Decoding seamlessly handles both modes.
577     *            <b>Note: no padding is added when using the URL-safe alphabet.</b>
578     * @param decodingPolicy The decoding policy.
579     * @throws IllegalArgumentException
580     *             Thrown when the {@code lineSeparator} contains Base64 characters.
581     * @since 1.15
582     */
583    public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe, final CodecPolicy decodingPolicy) {
584        super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK,
585                lineLength,
586                lineSeparator == null ? 0 : lineSeparator.length,
587                PAD_DEFAULT,
588                decodingPolicy);
589        // TODO could be simplified if there is no requirement to reject invalid line sep when length <=0
590        // @see test case Base64Test.testConstructors()
591        if (lineSeparator != null) {
592            if (containsAlphabetOrPad(lineSeparator)) {
593                final String sep = StringUtils.newStringUtf8(lineSeparator);
594                throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
595            }
596            if (lineLength > 0){ // null line-sep forces no chunking rather than throwing IAE
597                this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
598                this.lineSeparator = new byte[lineSeparator.length];
599                System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
600            } else {
601                this.encodeSize = BYTES_PER_ENCODED_BLOCK;
602                this.lineSeparator = null;
603            }
604        } else {
605            this.encodeSize = BYTES_PER_ENCODED_BLOCK;
606            this.lineSeparator = null;
607        }
608        this.decodeSize = this.encodeSize - 1;
609        this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
610    }
611
612    // Implementation of the Encoder Interface
613
614    /**
615     * <p>
616     * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
617     * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
618     * call is not necessary when decoding, but it doesn't hurt, either.
619     * </p>
620     * <p>
621     * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are
622     * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
623     * garbage-out philosophy: it will not check the provided data for validity.
624     * </p>
625     * <p>
626     * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
627     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
628     * </p>
629     *
630     * @param in
631     *            byte[] array of ascii data to base64 decode.
632     * @param inPos
633     *            Position to start reading data from.
634     * @param inAvail
635     *            Amount of bytes available from input for decoding.
636     * @param context
637     *            the context to be used
638     */
639    @Override
640    void decode(final byte[] in, int inPos, final int inAvail, final Context context) {
641        if (context.eof) {
642            return;
643        }
644        if (inAvail < 0) {
645            context.eof = true;
646        }
647        for (int i = 0; i < inAvail; i++) {
648            final byte[] buffer = ensureBufferSize(decodeSize, context);
649            final byte b = in[inPos++];
650            if (b == pad) {
651                // We're done.
652                context.eof = true;
653                break;
654            }
655            if (b >= 0 && b < DECODE_TABLE.length) {
656                final int result = DECODE_TABLE[b];
657                if (result >= 0) {
658                    context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK;
659                    context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
660                    if (context.modulus == 0) {
661                        buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS);
662                        buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
663                        buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
664                    }
665                }
666            }
667        }
668
669        // Two forms of EOF as far as base64 decoder is concerned: actual
670        // EOF (-1) and first time '=' character is encountered in stream.
671        // This approach makes the '=' padding characters completely optional.
672        if (context.eof && context.modulus != 0) {
673            final byte[] buffer = ensureBufferSize(decodeSize, context);
674
675            // We have some spare bits remaining
676            // Output all whole multiples of 8 bits and ignore the rest
677            switch (context.modulus) {
678//              case 0 : // impossible, as excluded above
679                case 1 : // 6 bits - either ignore entirely, or raise an exception
680                    validateTrailingCharacter();
681                    break;
682                case 2 : // 12 bits = 8 + 4
683                    validateCharacter(MASK_4BITS, context);
684                    context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits
685                    buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
686                    break;
687                case 3 : // 18 bits = 8 + 8 + 2
688                    validateCharacter(MASK_2BITS, context);
689                    context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
690                    buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
691                    buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
692                    break;
693                default:
694                    throw new IllegalStateException("Impossible modulus " + context.modulus);
695            }
696        }
697    }
698
699    /**
700     * <p>
701     * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
702     * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last
703     * remaining bytes (if not multiple of 3).
704     * </p>
705     * <p><b>Note: no padding is added when encoding using the URL-safe alphabet.</b></p>
706     * <p>
707     * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
708     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
709     * </p>
710     *
711     * @param in
712     *            byte[] array of binary data to base64 encode.
713     * @param inPos
714     *            Position to start reading data from.
715     * @param inAvail
716     *            Amount of bytes available from input for encoding.
717     * @param context
718     *            the context to be used
719     */
720    @Override
721    void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
722        if (context.eof) {
723            return;
724        }
725        // inAvail < 0 is how we're informed of EOF in the underlying data we're
726        // encoding.
727        if (inAvail < 0) {
728            context.eof = true;
729            if (0 == context.modulus && lineLength == 0) {
730                return; // no leftovers to process and not using chunking
731            }
732            final byte[] buffer = ensureBufferSize(encodeSize, context);
733            final int savedPos = context.pos;
734            switch (context.modulus) { // 0-2
735                case 0 : // nothing to do here
736                    break;
737                case 1 : // 8 bits = 6 + 2
738                    // top 6 bits:
739                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS];
740                    // remaining 2:
741                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS];
742                    // URL-SAFE skips the padding to further reduce size.
743                    if (encodeTable == STANDARD_ENCODE_TABLE) {
744                        buffer[context.pos++] = pad;
745                        buffer[context.pos++] = pad;
746                    }
747                    break;
748
749                case 2 : // 16 bits = 6 + 6 + 4
750                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS];
751                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS];
752                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS];
753                    // URL-SAFE skips the padding to further reduce size.
754                    if (encodeTable == STANDARD_ENCODE_TABLE) {
755                        buffer[context.pos++] = pad;
756                    }
757                    break;
758                default:
759                    throw new IllegalStateException("Impossible modulus " + context.modulus);
760            }
761            context.currentLinePos += context.pos - savedPos; // keep track of current line position
762            // if currentPos == 0 we are at the start of a line, so don't add CRLF
763            if (lineLength > 0 && context.currentLinePos > 0) {
764                System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
765                context.pos += lineSeparator.length;
766            }
767        } else {
768            for (int i = 0; i < inAvail; i++) {
769                final byte[] buffer = ensureBufferSize(encodeSize, context);
770                context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK;
771                int b = in[inPos++];
772                if (b < 0) {
773                    b += 256;
774                }
775                context.ibitWorkArea = (context.ibitWorkArea << 8) + b; //  BITS_PER_BYTE
776                if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract
777                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS];
778                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS];
779                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS];
780                    buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
781                    context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
782                    if (lineLength > 0 && lineLength <= context.currentLinePos) {
783                        System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
784                        context.pos += lineSeparator.length;
785                        context.currentLinePos = 0;
786                    }
787                }
788            }
789        }
790    }
791
792    /**
793     * Returns whether or not the {@code octet} is in the Base64 alphabet.
794     *
795     * @param octet
796     *            The value to test
797     * @return {@code true} if the value is defined in the the Base64 alphabet {@code false} otherwise.
798     */
799    @Override
800    protected boolean isInAlphabet(final byte octet) {
801        return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
802    }
803
804    /**
805     * Returns our current encode mode. True if we're URL-SAFE, false otherwise.
806     *
807     * @return true if we're in URL-SAFE mode, false otherwise.
808     * @since 1.4
809     */
810    public boolean isUrlSafe() {
811        return this.encodeTable == URL_SAFE_ENCODE_TABLE;
812    }
813
814    /**
815     * Validates whether decoding the final trailing character is possible in the context
816     * of the set of possible base 64 values.
817     *
818     * <p>The character is valid if the lower bits within the provided mask are zero. This
819     * is used to test the final trailing base-64 digit is zero in the bits that will be discarded.
820     *
821     * @param emptyBitsMask The mask of the lower bits that should be empty
822     * @param context the context to be used
823     *
824     * @throws IllegalArgumentException if the bits being checked contain any non-zero value
825     */
826    private void validateCharacter(final int emptyBitsMask, final Context context) {
827        if (isStrictDecoding() && (context.ibitWorkArea & emptyBitsMask) != 0) {
828            throw new IllegalArgumentException(
829                "Strict decoding: Last encoded character (before the paddings if any) is a valid base 64 alphabet but not a possible encoding. " +
830                "Expected the discarded bits from the character to be zero.");
831        }
832    }
833
834    /**
835     * Validates whether decoding allows an entire final trailing character that cannot be
836     * used for a complete byte.
837     *
838     * @throws IllegalArgumentException if strict decoding is enabled
839     */
840    private void validateTrailingCharacter() {
841        if (isStrictDecoding()) {
842            throw new IllegalArgumentException(
843                "Strict decoding: Last encoded character (before the paddings if any) is a valid base 64 alphabet but not a possible encoding. " +
844                "Decoding requires at least two trailing 6-bit characters to create bytes.");
845        }
846    }
847
848}