DecodedBitStreamParser.php 14.4 KB
<?php
/*
 * Copyright 2007 ZXing authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

namespace Zxing\Qrcode\Decoder;

use Zxing\DecodeHintType;
use Zxing\FormatException;
use Zxing\Common\BitSource;
use Zxing\Common\CharacterSetECI;
use Zxing\Common\DecoderResult;
use Zxing\Common\StringUtils;


/**
 * <p>QR Codes can encode text as bits in one of several modes, and can use multiple modes
 * in one QR Code. This class decodes the bits back into text.</p>
 *
 * <p>See ISO 18004:2006, 6.4.3 - 6.4.7</p>
 *
 * @author Sean Owen
 */
final class DecodedBitStreamParser
{

    /**
     * See ISO 18004:2006, 6.4.4 Table 5
     */
    private static $ALPHANUMERIC_CHARS = [
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
        'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
        'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
        ' ', '$', '%', '*', '+', '-', '.', '/', ':',
    ];
    private static $GB2312_SUBSET = 1;

    public static function decode($bytes,
                                  $version,
                                  $ecLevel,
                                  $hints)
    {
        $bits           = new BitSource($bytes);
        $result         = '';//new StringBuilder(50);
        $byteSegments   = [];
        $symbolSequence = -1;
        $parityData     = -1;

        try {
            $currentCharacterSetECI = null;
            $fc1InEffect            = false;
            $mode                   = '';
            do {
                // While still another segment to read...
                if ($bits->available() < 4) {
                    // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
                    $mode = Mode::$TERMINATOR;
                } else {
                    $mode = Mode::forBits($bits->readBits(4)); // mode is encoded by 4 bits
                }
                if ($mode != Mode::$TERMINATOR) {
                    if ($mode == Mode::$FNC1_FIRST_POSITION || $mode == Mode::$FNC1_SECOND_POSITION) {
                        // We do little with FNC1 except alter the parsed result a bit according to the spec
                        $fc1InEffect = true;
                    } else if ($mode == Mode::$STRUCTURED_APPEND) {
                        if ($bits->available() < 16) {
                            throw FormatException::getFormatInstance();
                        }
                        // sequence number and parity is added later to the result metadata
                        // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
                        $symbolSequence = $bits->readBits(8);
                        $parityData     = $bits->readBits(8);
                    } else if ($mode == Mode::$ECI) {
                        // Count doesn't apply to ECI
                        $value                  = self::parseECIValue($bits);
                        $currentCharacterSetECI = CharacterSetECI::getCharacterSetECIByValue($value);
                        if ($currentCharacterSetECI == null) {
                            throw FormatException::getFormatInstance();
                        }
                    } else {
                        // First handle Hanzi mode which does not start with character count
                        if ($mode == Mode::$HANZI) {
                            //chinese mode contains a sub set indicator right after mode indicator
                            $subset     = $bits->readBits(4);
                            $countHanzi = $bits->readBits($mode->getCharacterCountBits($version));
                            if ($subset == self::$GB2312_SUBSET) {
                                self::decodeHanziSegment($bits, $result, $countHanzi);
                            }
                        } else {
                            // "Normal" QR code modes:
                            // How many characters will follow, encoded in this mode?
                            $count = $bits->readBits($mode->getCharacterCountBits($version));
                            if ($mode == Mode::$NUMERIC) {
                                self::decodeNumericSegment($bits, $result, $count);
                            } else if ($mode == Mode::$ALPHANUMERIC) {
                                self::decodeAlphanumericSegment($bits, $result, $count, $fc1InEffect);
                            } else if ($mode == Mode::$BYTE) {
                                self::decodeByteSegment($bits, $result, $count, $currentCharacterSetECI, $byteSegments, $hints);
                            } else if ($mode == Mode::$KANJI) {
                                self::decodeKanjiSegment($bits, $result, $count);
                            } else {
                                throw FormatException::getFormatInstance();
                            }
                        }
                    }
                }
            } while ($mode != Mode::$TERMINATOR);
        } catch (\InvalidArgumentException $iae) {
            // from readBits() calls
            throw FormatException::getFormatInstance();
        }

        return new DecoderResult($bytes,
            $result,
            empty($byteSegments) ? null : $byteSegments,
            $ecLevel == null ? null : 'L',//ErrorCorrectionLevel::toString($ecLevel),
            $symbolSequence,
            $parityData);
    }

    private static function parseECIValue($bits)
    {
        $firstByte = $bits->readBits(8);
        if (($firstByte & 0x80) == 0) {
            // just one byte
            return $firstByte & 0x7F;
        }
        if (($firstByte & 0xC0) == 0x80) {
            // two bytes
            $secondByte = $bits->readBits(8);

            return (($firstByte & 0x3F) << 8) | $secondByte;
        }
        if (($firstByte & 0xE0) == 0xC0) {
            // three bytes
            $secondThirdBytes = $bits->readBits(16);

            return (($firstByte & 0x1F) << 16) | $secondThirdBytes;
        }
        throw FormatException::getFormatInstance();
    }

    /**
     * See specification GBT 18284-2000
     */
    private static function decodeHanziSegment($bits,
                                               &$result,
                                               $count)
    {
        // Don't crash trying to read more bits than we have available.
        if ($count * 13 > $bits->available()) {
            throw FormatException::getFormatInstance();
        }

        // Each character will require 2 bytes. Read the characters as 2-byte pairs
        // and decode as GB2312 afterwards
        $buffer = fill_array(0, 2 * $count, 0);
        $offset = 0;
        while ($count > 0) {
            // Each 13 bits encodes a 2-byte character
            $twoBytes          = $bits->readBits(13);
            $assembledTwoBytes = (($twoBytes / 0x060) << 8) | ($twoBytes % 0x060);
            if ($assembledTwoBytes < 0x003BF) {
                // In the 0xA1A1 to 0xAAFE range
                $assembledTwoBytes += 0x0A1A1;
            } else {
                // In the 0xB0A1 to 0xFAFE range
                $assembledTwoBytes += 0x0A6A1;
            }
            $buffer[$offset]     = (($assembledTwoBytes >> 8) & 0xFF);//(byte)
            $buffer[$offset + 1] = ($assembledTwoBytes & 0xFF);//(byte)
            $offset              += 2;
            $count--;
        }
        $result .= iconv('GB2312', 'UTF-8', implode($buffer));
    }

    private static function decodeNumericSegment($bits,
                                                 &$result,
                                                 $count)
    {
        // Read three digits at a time
        while ($count >= 3) {
            // Each 10 bits encodes three digits
            if ($bits->available() < 10) {
                throw FormatException::getFormatInstance();
            }
            $threeDigitsBits = $bits->readBits(10);
            if ($threeDigitsBits >= 1000) {
                throw FormatException::getFormatInstance();
            }
            $result .= (self::toAlphaNumericChar($threeDigitsBits / 100));
            $result .= (self::toAlphaNumericChar(($threeDigitsBits / 10) % 10));
            $result .= (self::toAlphaNumericChar($threeDigitsBits % 10));
            $count  -= 3;
        }
        if ($count == 2) {
            // Two digits left over to read, encoded in 7 bits
            if ($bits->available() < 7) {
                throw FormatException::getFormatInstance();
            }
            $twoDigitsBits = $bits->readBits(7);
            if ($twoDigitsBits >= 100) {
                throw FormatException::getFormatInstance();
            }
            $result .= (self::toAlphaNumericChar($twoDigitsBits / 10));
            $result .= (self::toAlphaNumericChar($twoDigitsBits % 10));
        } else if ($count == 1) {
            // One digit left over to read
            if ($bits->available() < 4) {
                throw FormatException::getFormatInstance();
            }
            $digitBits = $bits->readBits(4);
            if ($digitBits >= 10) {
                throw FormatException::getFormatInstance();
            }
            $result .= (self::toAlphaNumericChar($digitBits));
        }
    }

    private static function toAlphaNumericChar($value)
    {
        if ($value >= count(self::$ALPHANUMERIC_CHARS)) {
            throw FormatException::getFormatInstance();
        }

        return self::$ALPHANUMERIC_CHARS[$value];
    }

    private static function decodeAlphanumericSegment($bits,
                                                      &$result,
                                                      $count,
                                                      $fc1InEffect)
    {
        // Read two characters at a time
        $start = strlen($result);
        while ($count > 1) {
            if ($bits->available() < 11) {
                throw FormatException::getFormatInstance();
            }
            $nextTwoCharsBits = $bits->readBits(11);
            $result           .= (self::toAlphaNumericChar($nextTwoCharsBits / 45));
            $result           .= (self::toAlphaNumericChar($nextTwoCharsBits % 45));
            $count            -= 2;
        }
        if ($count == 1) {
            // special case: one character left
            if ($bits->available() < 6) {
                throw FormatException::getFormatInstance();
            }
            $result .= self::toAlphaNumericChar($bits->readBits(6));
        }
        // See section 6.4.8.1, 6.4.8.2
        if ($fc1InEffect) {
            // We need to massage the result a bit if in an FNC1 mode:
            for ($i = $start; $i < strlen($result); $i++) {
                if ($result{$i} == '%') {
                    if ($i < strlen($result) - 1 && $result{$i + 1} == '%') {
                        // %% is rendered as %
                        $result = substr_replace($result, '', $i + 1, 1);//deleteCharAt(i + 1);
                    } else {
                        // In alpha mode, % should be converted to FNC1 separator 0x1D
                        $result . setCharAt($i, chr(0x1D));
                    }
                }
            }
        }
    }

    private static function decodeByteSegment($bits,
                                              &$result,
                                              $count,
                                              $currentCharacterSetECI,
                                              &$byteSegments,
                                              $hints)
    {
        // Don't crash trying to read more bits than we have available.
        if (8 * $count > $bits->available()) {
            throw FormatException::getFormatInstance();
        }

        $readBytes = fill_array(0, $count, 0);
        for ($i = 0; $i < $count; $i++) {
            $readBytes[$i] = $bits->readBits(8);//(byte)
        }
        $text     = implode(array_map('chr', $readBytes));
        $encoding = '';
        if ($currentCharacterSetECI == null) {
            // The spec isn't clear on this mode; see
            // section 6.4.5: t does not say which encoding to assuming
            // upon decoding. I have seen ISO-8859-1 used as well as
            // Shift_JIS -- without anything like an ECI designator to
            // give a hint.

            $encoding = mb_detect_encoding($text, $hints);
        } else {
            $encoding = $currentCharacterSetECI->name();
        }
        //  $result.= mb_convert_encoding($text ,$encoding);//(new String(readBytes, encoding));
        $result .= $text;//(new String(readBytes, encoding));

        $byteSegments = array_merge($byteSegments, $readBytes);
    }

    private static function decodeKanjiSegment($bits,
                                               &$result,
                                               $count)
    {
        // Don't crash trying to read more bits than we have available.
        if ($count * 13 > $bits->available()) {
            throw FormatException::getFormatInstance();
        }

        // Each character will require 2 bytes. Read the characters as 2-byte pairs
        // and decode as Shift_JIS afterwards
        $buffer = [0, 2 * $count, 0];
        $offset = 0;
        while ($count > 0) {
            // Each 13 bits encodes a 2-byte character
            $twoBytes          = $bits->readBits(13);
            $assembledTwoBytes = (($twoBytes / 0x0C0) << 8) | ($twoBytes % 0x0C0);
            if ($assembledTwoBytes < 0x01F00) {
                // In the 0x8140 to 0x9FFC range
                $assembledTwoBytes += 0x08140;
            } else {
                // In the 0xE040 to 0xEBBF range
                $assembledTwoBytes += 0x0C140;
            }
            $buffer[$offset]     = ($assembledTwoBytes >> 8);//(byte)
            $buffer[$offset + 1] = $assembledTwoBytes; //(byte)
            $offset              += 2;
            $count--;
        }
        // Shift_JIS may not be supported in some environments:

        $result .= iconv('shift-jis', 'utf-8', implode($buffer));
    }

    private function DecodedBitStreamParser()
    {

    }
}