001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import org.apache.commons.codec.BinaryDecoder;
021import org.apache.commons.codec.BinaryEncoder;
022import org.apache.commons.codec.DecoderException;
023import org.apache.commons.codec.EncoderException;
024
025/**
026 * Converts between byte arrays and strings of "0"s and "1"s.
027 *
028 * <p>This class is immutable and thread-safe.</p>
029 *
030 * TODO: may want to add more bit vector functions like and/or/xor/nand
031 * TODO: also might be good to generate boolean[] from byte[] et cetera.
032 *
033 * @since 1.3
034 */
035public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
036    /*
037     * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
038     * it.
039     */
040    /** Empty char array. */
041    private static final char[] EMPTY_CHAR_ARRAY = new char[0];
042
043    /** Empty byte array. */
044    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
045
046    /** Mask for bit 0 of a byte. */
047    private static final int BIT_0 = 1;
048
049    /** Mask for bit 1 of a byte. */
050    private static final int BIT_1 = 0x02;
051
052    /** Mask for bit 2 of a byte. */
053    private static final int BIT_2 = 0x04;
054
055    /** Mask for bit 3 of a byte. */
056    private static final int BIT_3 = 0x08;
057
058    /** Mask for bit 4 of a byte. */
059    private static final int BIT_4 = 0x10;
060
061    /** Mask for bit 5 of a byte. */
062    private static final int BIT_5 = 0x20;
063
064    /** Mask for bit 6 of a byte. */
065    private static final int BIT_6 = 0x40;
066
067    /** Mask for bit 7 of a byte. */
068    private static final int BIT_7 = 0x80;
069
070    private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
071
072    /**
073     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
074     *
075     * @param ascii
076     *                  each byte represents an ASCII '0' or '1'
077     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
078     */
079    public static byte[] fromAscii(final byte[] ascii) {
080        if (isEmpty(ascii)) {
081            return EMPTY_BYTE_ARRAY;
082        }
083        // get length/8 times bytes with 3 bit shifts to the right of the length
084        final byte[] l_raw = new byte[ascii.length >> 3];
085        /*
086         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
087         * loop.
088         */
089        for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
090            for (int bits = 0; bits < BITS.length; ++bits) {
091                if (ascii[jj - bits] == '1') {
092                    l_raw[ii] |= BITS[bits];
093                }
094            }
095        }
096        return l_raw;
097    }
098
099    // ------------------------------------------------------------------------
100    //
101    // static codec operations
102    //
103    // ------------------------------------------------------------------------
104    /**
105     * Decodes a char array where each char represents an ASCII '0' or '1'.
106     *
107     * @param ascii
108     *                  each char represents an ASCII '0' or '1'
109     * @return the raw encoded binary where each bit corresponds to a char in the char array argument
110     */
111    public static byte[] fromAscii(final char[] ascii) {
112        if (ascii == null || ascii.length == 0) {
113            return EMPTY_BYTE_ARRAY;
114        }
115        // get length/8 times bytes with 3 bit shifts to the right of the length
116        final byte[] l_raw = new byte[ascii.length >> 3];
117        /*
118         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
119         * loop.
120         */
121        for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
122            for (int bits = 0; bits < BITS.length; ++bits) {
123                if (ascii[jj - bits] == '1') {
124                    l_raw[ii] |= BITS[bits];
125                }
126            }
127        }
128        return l_raw;
129    }
130
131    /**
132     * Returns {@code true} if the given array is {@code null} or empty (size 0.)
133     *
134     * @param array
135     *            the source array
136     * @return {@code true} if the given array is {@code null} or empty (size 0.)
137     */
138    private static boolean isEmpty(final byte[] array) {
139        return array == null || array.length == 0;
140    }
141
142    /**
143     * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
144     * char.
145     *
146     * @param raw
147     *                  the raw binary data to convert
148     * @return an array of 0 and 1 character bytes for each bit of the argument
149     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
150     */
151    public static byte[] toAsciiBytes(final byte[] raw) {
152        if (isEmpty(raw)) {
153            return EMPTY_BYTE_ARRAY;
154        }
155        // get 8 times the bytes with 3 bit shifts to the left of the length
156        final byte[] l_ascii = new byte[raw.length << 3];
157        /*
158         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
159         * loop.
160         */
161        for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
162            for (int bits = 0; bits < BITS.length; ++bits) {
163                if ((raw[ii] & BITS[bits]) == 0) {
164                    l_ascii[jj - bits] = '0';
165                } else {
166                    l_ascii[jj - bits] = '1';
167                }
168            }
169        }
170        return l_ascii;
171    }
172
173    /**
174     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
175     *
176     * @param raw
177     *                  the raw binary data to convert
178     * @return an array of 0 and 1 characters for each bit of the argument
179     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
180     */
181    public static char[] toAsciiChars(final byte[] raw) {
182        if (isEmpty(raw)) {
183            return EMPTY_CHAR_ARRAY;
184        }
185        // get 8 times the bytes with 3 bit shifts to the left of the length
186        final char[] l_ascii = new char[raw.length << 3];
187        /*
188         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
189         * loop.
190         */
191        for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
192            for (int bits = 0; bits < BITS.length; ++bits) {
193                if ((raw[ii] & BITS[bits]) == 0) {
194                    l_ascii[jj - bits] = '0';
195                } else {
196                    l_ascii[jj - bits] = '1';
197                }
198            }
199        }
200        return l_ascii;
201    }
202
203    /**
204     * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
205     *
206     * @param raw
207     *                  the raw binary data to convert
208     * @return a String of 0 and 1 characters representing the binary data
209     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
210     */
211    public static String toAsciiString(final byte[] raw) {
212        return new String(toAsciiChars(raw));
213    }
214
215    /**
216     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
217     *
218     * @param ascii
219     *                  each byte represents an ASCII '0' or '1'
220     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
221     * @see org.apache.commons.codec.Decoder#decode(Object)
222     */
223    @Override
224    public byte[] decode(final byte[] ascii) {
225        return fromAscii(ascii);
226    }
227
228    /**
229     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
230     *
231     * @param ascii
232     *                  each byte represents an ASCII '0' or '1'
233     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
234     * @throws DecoderException
235     *                  if argument is not a byte[], char[] or String
236     * @see org.apache.commons.codec.Decoder#decode(Object)
237     */
238    @Override
239    public Object decode(final Object ascii) throws DecoderException {
240        if (ascii == null) {
241            return EMPTY_BYTE_ARRAY;
242        }
243        if (ascii instanceof byte[]) {
244            return fromAscii((byte[]) ascii);
245        }
246        if (ascii instanceof char[]) {
247            return fromAscii((char[]) ascii);
248        }
249        if (ascii instanceof String) {
250            return fromAscii(((String) ascii).toCharArray());
251        }
252        throw new DecoderException("argument not a byte array");
253    }
254
255    /**
256     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
257     *
258     * @param raw
259     *                  the raw binary data to convert
260     * @return 0 and 1 ASCII character bytes one for each bit of the argument
261     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
262     */
263    @Override
264    public byte[] encode(final byte[] raw) {
265        return toAsciiBytes(raw);
266    }
267
268    /**
269     * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
270     *
271     * @param raw
272     *                  the raw binary data to convert
273     * @return 0 and 1 ASCII character chars one for each bit of the argument
274     * @throws EncoderException
275     *                  if the argument is not a byte[]
276     * @see org.apache.commons.codec.Encoder#encode(Object)
277     */
278    @Override
279    public Object encode(final Object raw) throws EncoderException {
280        if (!(raw instanceof byte[])) {
281            throw new EncoderException("argument not a byte array");
282        }
283        return toAsciiChars((byte[]) raw);
284    }
285
286    /**
287     * Decodes a String where each char of the String represents an ASCII '0' or '1'.
288     *
289     * @param ascii
290     *                  String of '0' and '1' characters
291     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
292     * @see org.apache.commons.codec.Decoder#decode(Object)
293     */
294    public byte[] toByteArray(final String ascii) {
295        if (ascii == null) {
296            return EMPTY_BYTE_ARRAY;
297        }
298        return fromAscii(ascii.toCharArray());
299    }
300}