diff --git a/app/src/main/java/io/legado/app/utils/EncodingDetect.kt b/app/src/main/java/io/legado/app/utils/EncodingDetect.kt index 8447ed238..4d1a53037 100644 --- a/app/src/main/java/io/legado/app/utils/EncodingDetect.kt +++ b/app/src/main/java/io/legado/app/utils/EncodingDetect.kt @@ -1,4492 +1,102 @@ -package io.legado.app.utils; +package io.legado.app.utils -import androidx.annotation.NonNull; - -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; - -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStream; -import java.net.URL; -import java.nio.charset.StandardCharsets; - -import static android.text.TextUtils.isEmpty; +import android.text.TextUtils +import io.legado.app.utils.icu4j.CharsetDetector +import org.jsoup.Jsoup +import java.io.File +import java.io.FileInputStream +import java.nio.charset.StandardCharsets +import java.util.* /** - * Copyright (C) <2009> - *

+ * Copyright (C) <2009> ,ACC http://androidos.cc/dev> + * + * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. - *

+ * + * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. - *

- * EncodingDetect.java
+ * + * + * EncodingDetect.java

* 自动获取文件的编码 * * @author Billows.Van * @version 1.0 * @since Create on 2010-01-27 11:19:00 - */ -@SuppressWarnings("ALL") -public class EncodingDetect { + */ +@Suppress("MemberVisibilityCanBePrivate", "unused") +object EncodingDetect { - public static String getHtmlEncode(@NonNull byte[] bytes) { + fun getHtmlEncode(bytes: ByteArray): String? { try { - Document doc = Jsoup.parse(new String(bytes, StandardCharsets.UTF_8)); - Elements metaTags = doc.getElementsByTag("meta"); - String charsetStr; - for (Element metaTag : metaTags) { - charsetStr = metaTag.attr("charset"); - if (!isEmpty(charsetStr)) { - return charsetStr; + val doc = Jsoup.parse(String(bytes, StandardCharsets.UTF_8)) + val metaTags = doc.getElementsByTag("meta") + var charsetStr: String + for (metaTag in metaTags) { + charsetStr = metaTag.attr("charset") + if (!TextUtils.isEmpty(charsetStr)) { + return charsetStr } - String content = metaTag.attr("content"); - String http_equiv = metaTag.attr("http-equiv"); - if (http_equiv.toLowerCase().equals("content-type")) { - if (content.toLowerCase().contains("charset")) { - charsetStr = content.substring(content.toLowerCase().indexOf("charset") + "charset=".length()); + val content = metaTag.attr("content") + val httpEquiv = metaTag.attr("http-equiv") + if (httpEquiv.toLowerCase(Locale.getDefault()) == "content-type") { + charsetStr = if (content.toLowerCase(Locale.getDefault()).contains("charset")) { + content.substring( + content.toLowerCase(Locale.getDefault()) + .indexOf("charset") + "charset=".length + ) } else { - charsetStr = content.substring(content.toLowerCase().indexOf(";") + 1); + content.substring(content.toLowerCase(Locale.getDefault()).indexOf(";") + 1) } - if (!isEmpty(charsetStr)) { - return charsetStr; + if (!TextUtils.isEmpty(charsetStr)) { + return charsetStr } } } - } catch (Exception ignored) { + } catch (ignored: Exception) { } - return getEncode(bytes); + return getEncode(bytes) } - public static String getEncode(@NonNull byte[] bytes) { - int len = Math.min(bytes.length, 2000); - byte[] cBytes = new byte[len]; - System.arraycopy(bytes, 0, cBytes, 0, len); - BytesEncodingDetect bytesEncodingDetect = new BytesEncodingDetect(); - String code = BytesEncodingDetect.javaname[bytesEncodingDetect.detectEncoding(cBytes)]; - // UTF-16LE 特殊处理 - if ("Unicode".equals(code)) { - if (cBytes[0] == -1) { - code = "UTF-16LE"; - } - } - return code; + fun getEncode(bytes: ByteArray): String { + val detector = CharsetDetector() + detector.setText(bytes) + val match = detector.detect() + return match.name } /** * 得到文件的编码 */ - public static String getEncode(@NonNull String filePath) { - BytesEncodingDetect s = new BytesEncodingDetect(); - String fileCode = BytesEncodingDetect.javaname[s - .detectEncoding(new File(filePath))]; - - // UTF-16LE 特殊处理 - if ("Unicode".equals(fileCode)) { - byte[] tempByte = BytesEncodingDetect.getFileBytes(new File( - filePath)); - if (tempByte[0] == -1) { - fileCode = "UTF-16LE"; - } - } - return fileCode; + fun getEncode(filePath: String): String { + return getEncode(File(filePath)) } /** * 得到文件的编码 */ - public static String getEncode(@NonNull File file) { - BytesEncodingDetect s = new BytesEncodingDetect(); - String fileCode = BytesEncodingDetect.javaname[s.detectEncoding(file)]; - // UTF-16LE 特殊处理 - if ("Unicode".equals(fileCode)) { - byte[] tempByte = BytesEncodingDetect.getFileBytes(file); - if (tempByte[0] == -1) { - fileCode = "UTF-16LE"; - } - } - return fileCode; + fun getEncode(file: File): String { + val tempByte = getFileBytes(file) + return getEncode(tempByte) } -} - -@SuppressWarnings("ALL") -class BytesEncodingDetect extends Encoding { - // Frequency tables to hold the GB, Big5, and EUC-TW character - // frequencies - private int[][] GBFreq; - - private int[][] GBKFreq; - - private int[][] Big5Freq; - - private int[][] Big5PFreq; - - private int[][] EUC_TWFreq; - - private int[][] KRFreq; - - private int[][] JPFreq; - - public boolean debug; - - BytesEncodingDetect() { - super(); - debug = false; - GBFreq = new int[94][94]; - GBKFreq = new int[126][191]; - Big5Freq = new int[94][158]; - Big5PFreq = new int[126][191]; - EUC_TWFreq = new int[94][94]; - KRFreq = new int[94][94]; - JPFreq = new int[94][94]; - // Initialize the Frequency Table for GB, GBK, Big5, EUC-TW, KR, JP - initialize_frequencies(); - } - - /** - * Function : detectEncoding Aruguments: URL Returns : One of the encodings - * from the Encoding enumeration (GB2312, HZ, BIG5, EUC_TW, ASCII, or OTHER) - * Description: This function looks at the URL contents and assigns it a - * probability score for each encoding type. The encoding type with the - * highest probability is returned. - */ - public int detectEncoding(URL testurl) { - byte[] rawtext = new byte[10000]; - int bytesread = 0, byteoffset = 0; - int guess = OTHER; - InputStream chinesestream; + private fun getFileBytes(testFile: File?): ByteArray { + val fis: FileInputStream + val byteArray: ByteArray = ByteArray(2000) try { - chinesestream = testurl.openStream(); - while ((bytesread = chinesestream.read(rawtext, byteoffset, - rawtext.length - byteoffset)) > 0) { - byteoffset += bytesread; - } - chinesestream.close(); - guess = detectEncoding(rawtext); - } catch (Exception e) { - System.err.println("Error loading or using URL " + e.toString()); - guess = -1; - } - return guess; - } - - /** - * Function : detectEncoding Aruguments: File Returns : One of the encodings - * from the Encoding enumeration (GB2312, HZ, BIG5, EUC_TW, ASCII, or OTHER) - * Description: This function looks at the file and assigns it a probability - * score for each encoding type. The encoding type with the highest - * probability is returned. - */ - int detectEncoding(File testfile) { - byte[] rawtext = getFileBytes(testfile); - return detectEncoding(rawtext); - } - - static byte[] getFileBytes(File testfile) { - FileInputStream chinesefile; - byte[] rawtext; - rawtext = new byte[2000]; - try { - chinesefile = new FileInputStream(testfile); - chinesefile.read(rawtext); - chinesefile.close(); - } catch (Exception e) { - System.err.println("Error: " + e); - } - return rawtext; - } - - - /** - * Function : detectEncoding Aruguments: byte array Returns : One of the - * encodings from the Encoding enumeration (GB2312, HZ, BIG5, EUC_TW, ASCII, - * or OTHER) Description: This function looks at the byte array and assigns - * it a probability score for each encoding type. The encoding type with the - * highest probability is returned. - */ - int detectEncoding(byte[] rawtext) { - int[] scores; - int index, maxscore = 0; - int encoding_guess = OTHER; - scores = new int[TOTALTYPES]; - // Assign Scores - scores[GB2312] = gb2312_probability(rawtext); - scores[GBK] = gbk_probability(rawtext); - scores[GB18030] = gb18030_probability(rawtext); - scores[HZ] = hz_probability(rawtext); - scores[BIG5] = big5_probability(rawtext); - scores[CNS11643] = euc_tw_probability(rawtext); - scores[ISO2022CN] = iso_2022_cn_probability(rawtext); - scores[UTF8] = utf8_probability(rawtext); - scores[UNICODE] = utf16_probability(rawtext); - scores[EUC_KR] = euc_kr_probability(rawtext); - scores[CP949] = cp949_probability(rawtext); - scores[JOHAB] = 0; - scores[ISO2022KR] = iso_2022_kr_probability(rawtext); - scores[ASCII] = ascii_probability(rawtext); - scores[SJIS] = sjis_probability(rawtext); - scores[EUC_JP] = euc_jp_probability(rawtext); - scores[ISO2022JP] = iso_2022_jp_probability(rawtext); - scores[UNICODET] = 0; - scores[UNICODES] = 0; - scores[ISO2022CN_GB] = 0; - scores[ISO2022CN_CNS] = 0; - scores[OTHER] = 0; - // Tabulate Scores - for (index = 0; index < TOTALTYPES; index++) { - if (debug) - System.err.println("Encoding " + nicename[index] + " score " - + scores[index]); - if (scores[index] > maxscore) { - encoding_guess = index; - maxscore = scores[index]; - } - } - // Return OTHER if nothing scored above 50 - if (maxscore <= 50) { - encoding_guess = OTHER; - } - return encoding_guess; - } - - /* - * Function: gb2312_probability Argument: pointer to byte array Returns : - * number from 0 to 100 representing probability text in array uses GB-2312 - * encoding - */ - int gb2312_probability(byte[] rawtext) { - int i, rawtextlen = 0; - int dbchars = 1, gbchars = 1; - long gbfreq = 0, totalfreq = 1; - float rangeval = 0, freqval = 0; - int row, column; - // Stage 1: Check to see if characters fit into acceptable ranges - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen - 1; i++) { - // System.err.println(rawtext[i]); - if (rawtext[i] < 0) { - dbchars++; - if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 - && (byte) 0xA1 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0xFE) { - gbchars++; - totalfreq += 500; - row = rawtext[i] + 256 - 0xA1; - column = rawtext[i + 1] + 256 - 0xA1; - if (GBFreq[row][column] != 0) { - gbfreq += GBFreq[row][column]; - } else if (15 <= row && row < 55) { - // In GB high-freq character range - gbfreq += 200; - } - } - i++; - } - } - rangeval = 50 * ((float) gbchars / (float) dbchars); - freqval = 50 * ((float) gbfreq / (float) totalfreq); - return (int) (rangeval + freqval); - } - - /* - * Function: gbk_probability Argument: pointer to byte array Returns : - * number from 0 to 100 representing probability text in array uses GBK - * encoding - */ - int gbk_probability(byte[] rawtext) { - int i, rawtextlen = 0; - int dbchars = 1, gbchars = 1; - long gbfreq = 0, totalfreq = 1; - float rangeval = 0, freqval = 0; - int row, column; - // Stage 1: Check to see if characters fit into acceptable ranges - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen - 1; i++) { - // System.err.println(rawtext[i]); - if (rawtext[i] < 0) { - dbchars++; - if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 - && // Original GB range - (byte) 0xA1 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0xFE) { - gbchars++; - totalfreq += 500; - row = rawtext[i] + 256 - 0xA1; - column = rawtext[i + 1] + 256 - 0xA1; - // System.out.println("original row " + row + " column " + - // column); - if (GBFreq[row][column] != 0) { - gbfreq += GBFreq[row][column]; - } else if (15 <= row && row < 55) { - gbfreq += 200; - } - } else if ((byte) 0x81 <= rawtext[i] - && rawtext[i] <= (byte) 0xFE && // Extended GB range - (((byte) 0x80 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFE) || ((byte) 0x40 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0x7E))) { - gbchars++; - totalfreq += 500; - row = rawtext[i] + 256 - 0x81; - if (0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E) { - column = rawtext[i + 1] - 0x40; - } else { - column = rawtext[i + 1] + 256 - 0x40; - } - // System.out.println("extended row " + row + " column " + - // column + " rawtext[i] " + rawtext[i]); - if (GBKFreq[row][column] != 0) { - gbfreq += GBKFreq[row][column]; - } - } - i++; - } - } - rangeval = 50 * ((float) gbchars / (float) dbchars); - freqval = 50 * ((float) gbfreq / (float) totalfreq); - // For regular GB files, this would give the same score, so I handicap - // it slightly - return (int) (rangeval + freqval) - 1; - } - - /* - * Function: gb18030_probability Argument: pointer to byte array Returns : - * number from 0 to 100 representing probability text in array uses GBK - * encoding - */ - int gb18030_probability(byte[] rawtext) { - int i, rawtextlen = 0; - int dbchars = 1, gbchars = 1; - long gbfreq = 0, totalfreq = 1; - float rangeval = 0, freqval = 0; - int row, column; - // Stage 1: Check to see if characters fit into acceptable ranges - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen - 1; i++) { - // System.err.println(rawtext[i]); - if (rawtext[i] < 0) { - dbchars++; - if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 - && // Original GB range - i + 1 < rawtextlen && (byte) 0xA1 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0xFE) { - gbchars++; - totalfreq += 500; - row = rawtext[i] + 256 - 0xA1; - column = rawtext[i + 1] + 256 - 0xA1; - // System.out.println("original row " + row + " column " + - // column); - if (GBFreq[row][column] != 0) { - gbfreq += GBFreq[row][column]; - } else if (15 <= row && row < 55) { - gbfreq += 200; - } - } else if ((byte) 0x81 <= rawtext[i] - && rawtext[i] <= (byte) 0xFE - && // Extended GB range - i + 1 < rawtextlen - && (((byte) 0x80 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFE) || ((byte) 0x40 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0x7E))) { - gbchars++; - totalfreq += 500; - row = rawtext[i] + 256 - 0x81; - if (0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E) { - column = rawtext[i + 1] - 0x40; - } else { - column = rawtext[i + 1] + 256 - 0x40; - } - // System.out.println("extended row " + row + " column " + - // column + " rawtext[i] " + rawtext[i]); - if (GBKFreq[row][column] != 0) { - gbfreq += GBKFreq[row][column]; - } - } else if ((byte) 0x81 <= rawtext[i] - && rawtext[i] <= (byte) 0xFE - && // Extended GB range - i + 3 < rawtextlen && (byte) 0x30 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0x39 - && (byte) 0x81 <= rawtext[i + 2] - && rawtext[i + 2] <= (byte) 0xFE - && (byte) 0x30 <= rawtext[i + 3] - && rawtext[i + 3] <= (byte) 0x39) { - gbchars++; - } - i++; - } + fis = FileInputStream(testFile) + fis.read(byteArray) + fis.close() + } catch (e: Exception) { + System.err.println("Error: $e") } - rangeval = 50 * ((float) gbchars / (float) dbchars); - freqval = 50 * ((float) gbfreq / (float) totalfreq); - // For regular GB files, this would give the same score, so I handicap - // it slightly - return (int) (rangeval + freqval) - 1; + return byteArray } - - /* - * Function: hz_probability Argument: byte array Returns : number from 0 to - * 100 representing probability text in array uses HZ encoding - */ - int hz_probability(byte[] rawtext) { - int i, rawtextlen; - int hzchars = 0, dbchars = 1; - long hzfreq = 0, totalfreq = 1; - float rangeval = 0, freqval = 0; - int hzstart = 0, hzend = 0; - int row, column; - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen; i++) { - if (rawtext[i] == '~') { - if (rawtext[i + 1] == '{') { - hzstart++; - i += 2; - while (i < rawtextlen - 1) { - if (rawtext[i] == 0x0A || rawtext[i] == 0x0D) { - break; - } else if (rawtext[i] == '~' && rawtext[i + 1] == '}') { - hzend++; - i++; - break; - } else if ((0x21 <= rawtext[i] && rawtext[i] <= 0x77) - && (0x21 <= rawtext[i + 1] && rawtext[i + 1] <= 0x77)) { - hzchars += 2; - row = rawtext[i] - 0x21; - column = rawtext[i + 1] - 0x21; - totalfreq += 500; - if (GBFreq[row][column] != 0) { - hzfreq += GBFreq[row][column]; - } else if (15 <= row && row < 55) { - hzfreq += 200; - } - } else if ((0xA1 <= rawtext[i] && rawtext[i] <= 0xF7) - && (0xA1 <= rawtext[i + 1] && rawtext[i + 1] <= 0xF7)) { - hzchars += 2; - row = rawtext[i] + 256 - 0xA1; - column = rawtext[i + 1] + 256 - 0xA1; - totalfreq += 500; - if (GBFreq[row][column] != 0) { - hzfreq += GBFreq[row][column]; - } else if (15 <= row && row < 55) { - hzfreq += 200; - } - } - dbchars += 2; - i += 2; - } - } else if (rawtext[i + 1] == '}') { - hzend++; - i++; - } else if (rawtext[i + 1] == '~') { - i++; - } - } - } - if (hzstart > 4) { - rangeval = 50; - } else if (hzstart > 1) { - rangeval = 41; - } else if (hzstart > 0) { // Only 39 in case the sequence happened to - // occur - rangeval = 39; // in otherwise non-Hz text - } else { - rangeval = 0; - } - freqval = 50 * ((float) hzfreq / (float) totalfreq); - return (int) (rangeval + freqval); - } - - /** - * Function: big5_probability Argument: byte array Returns : number from 0 - * to 100 representing probability text in array uses Big5 encoding - */ - int big5_probability(byte[] rawtext) { - int i, rawtextlen = 0; - int dbchars = 1, bfchars = 1; - float rangeval = 0, freqval = 0; - long bffreq = 0, totalfreq = 1; - int row, column; - // Check to see if characters fit into acceptable ranges - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen - 1; i++) { - if (rawtext[i] < 0) { - dbchars++; - if ((byte) 0xA1 <= rawtext[i] - && rawtext[i] <= (byte) 0xF9 - && (((byte) 0x40 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0x7E) || ((byte) 0xA1 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFE))) { - bfchars++; - totalfreq += 500; - row = rawtext[i] + 256 - 0xA1; - if (0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E) { - column = rawtext[i + 1] - 0x40; - } else { - column = rawtext[i + 1] + 256 - 0x61; - } - if (Big5Freq[row][column] != 0) { - bffreq += Big5Freq[row][column]; - } else if (3 <= row && row <= 37) { - bffreq += 200; - } - } - i++; - } - } - rangeval = 50 * ((float) bfchars / (float) dbchars); - freqval = 50 * ((float) bffreq / (float) totalfreq); - return (int) (rangeval + freqval); - } - - /* - * Function: big5plus_probability Argument: pointer to unsigned char array - * Returns : number from 0 to 100 representing probability text in array - * uses Big5+ encoding - */ - int big5plus_probability(byte[] rawtext) { - int i, rawtextlen = 0; - int dbchars = 1, bfchars = 1; - long bffreq = 0, totalfreq = 1; - float rangeval = 0, freqval = 0; - int row, column; - // Stage 1: Check to see if characters fit into acceptable ranges - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen - 1; i++) { - // System.err.println(rawtext[i]); - if (rawtext[i] < 128) { - dbchars++; - if (0xA1 <= rawtext[i] - && rawtext[i] <= 0xF9 - && // Original Big5 range - ((0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E) || (0xA1 <= rawtext[i + 1] && rawtext[i + 1] <= 0xFE))) { - bfchars++; - totalfreq += 500; - row = rawtext[i] - 0xA1; - if (0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E) { - column = rawtext[i + 1] - 0x40; - } else { - column = rawtext[i + 1] - 0x61; - } - // System.out.println("original row " + row + " column " + - // column); - if (Big5Freq[row][column] != 0) { - bffreq += Big5Freq[row][column]; - } else if (3 <= row && row < 37) { - bffreq += 200; - } - } else if (0x81 <= rawtext[i] - && rawtext[i] <= 0xFE - && // Extended Big5 range - ((0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E) || (0x80 <= rawtext[i + 1] && rawtext[i + 1] <= 0xFE))) { - bfchars++; - totalfreq += 500; - row = rawtext[i] - 0x81; - if (0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E) { - column = rawtext[i + 1] - 0x40; - } else { - column = rawtext[i + 1] - 0x40; - } - // System.out.println("extended row " + row + " column " + - // column + " rawtext[i] " + rawtext[i]); - if (Big5PFreq[row][column] != 0) { - bffreq += Big5PFreq[row][column]; - } - } - i++; - } - } - rangeval = 50 * ((float) bfchars / (float) dbchars); - freqval = 50 * ((float) bffreq / (float) totalfreq); - // For regular Big5 files, this would give the same score, so I handicap - // it slightly - return (int) (rangeval + freqval) - 1; - } - - /* - * Function: euc_tw_probability Argument: byte array Returns : number from 0 - * to 100 representing probability text in array uses EUC-TW (CNS 11643) - * encoding - */ - int euc_tw_probability(byte[] rawtext) { - int i, rawtextlen = 0; - int dbchars = 1, cnschars = 1; - long cnsfreq = 0, totalfreq = 1; - float rangeval = 0, freqval = 0; - int row, column; - // Check to see if characters fit into acceptable ranges - // and have expected frequency of use - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen - 1; i++) { - if (rawtext[i] < 0) { // high bit set - dbchars++; - if (i + 3 < rawtextlen && (byte) 0x8E == rawtext[i] - && (byte) 0xA1 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0xB0 - && (byte) 0xA1 <= rawtext[i + 2] - && rawtext[i + 2] <= (byte) 0xFE - && (byte) 0xA1 <= rawtext[i + 3] - && rawtext[i + 3] <= (byte) 0xFE) { // Planes 1 - 16 - cnschars++; - // System.out.println("plane 2 or above CNS char"); - // These are all less frequent chars so just ignore freq - i += 3; - } else if ((byte) 0xA1 <= rawtext[i] - && rawtext[i] <= (byte) 0xFE - && // Plane 1 - (byte) 0xA1 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0xFE) { - cnschars++; - totalfreq += 500; - row = rawtext[i] + 256 - 0xA1; - column = rawtext[i + 1] + 256 - 0xA1; - if (EUC_TWFreq[row][column] != 0) { - cnsfreq += EUC_TWFreq[row][column]; - } else if (35 <= row && row <= 92) { - cnsfreq += 150; - } - i++; - } - } - } - rangeval = 50 * ((float) cnschars / (float) dbchars); - freqval = 50 * ((float) cnsfreq / (float) totalfreq); - return (int) (rangeval + freqval); - } - - /* - * Function: iso_2022_cn_probability Argument: byte array Returns : number - * from 0 to 100 representing probability text in array uses ISO 2022-CN - * encoding WORKS FOR BASIC CASES, BUT STILL NEEDS MORE WORK - */ - int iso_2022_cn_probability(byte[] rawtext) { - int i, rawtextlen = 0; - int dbchars = 1, isochars = 1; - long isofreq = 0, totalfreq = 1; - float rangeval = 0, freqval = 0; - int row, column; - // Check to see if characters fit into acceptable ranges - // and have expected frequency of use - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen - 1; i++) { - if (rawtext[i] == (byte) 0x1B && i + 3 < rawtextlen) { // Escape - // char ESC - if (rawtext[i + 1] == (byte) 0x24 && rawtext[i + 2] == 0x29 - && rawtext[i + 3] == (byte) 0x41) { // GB Escape $ ) A - i += 4; - while (rawtext[i] != (byte) 0x1B) { - dbchars++; - if ((0x21 <= rawtext[i] && rawtext[i] <= 0x77) - && (0x21 <= rawtext[i + 1] && rawtext[i + 1] <= 0x77)) { - isochars++; - row = rawtext[i] - 0x21; - column = rawtext[i + 1] - 0x21; - totalfreq += 500; - if (GBFreq[row][column] != 0) { - isofreq += GBFreq[row][column]; - } else if (15 <= row && row < 55) { - isofreq += 200; - } - i++; - } - i++; - } - } else if (i + 3 < rawtextlen && rawtext[i + 1] == (byte) 0x24 - && rawtext[i + 2] == (byte) 0x29 - && rawtext[i + 3] == (byte) 0x47) { - // CNS Escape $ ) G - i += 4; - while (rawtext[i] != (byte) 0x1B) { - dbchars++; - if ((byte) 0x21 <= rawtext[i] - && rawtext[i] <= (byte) 0x7E - && (byte) 0x21 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0x7E) { - isochars++; - totalfreq += 500; - row = rawtext[i] - 0x21; - column = rawtext[i + 1] - 0x21; - if (EUC_TWFreq[row][column] != 0) { - isofreq += EUC_TWFreq[row][column]; - } else if (35 <= row && row <= 92) { - isofreq += 150; - } - i++; - } - i++; - } - } - if (rawtext[i] == (byte) 0x1B && i + 2 < rawtextlen - && rawtext[i + 1] == (byte) 0x28 - && rawtext[i + 2] == (byte) 0x42) { // ASCII: - // ESC - // ( B - i += 2; - } - } - } - rangeval = 50 * ((float) isochars / (float) dbchars); - freqval = 50 * ((float) isofreq / (float) totalfreq); - // System.out.println("isochars dbchars isofreq totalfreq " + isochars + - // " " + dbchars + " " + isofreq + " " + totalfreq + " - // " + rangeval + " " + freqval); - return (int) (rangeval + freqval); - // return 0; - } - - /* - * Function: utf8_probability Argument: byte array Returns : number from 0 - * to 100 representing probability text in array uses UTF-8 encoding of - * Unicode - */ - int utf8_probability(byte[] rawtext) { - int score = 0; - int i, rawtextlen = 0; - int goodbytes = 0, asciibytes = 0; - // Maybe also use UTF8 Byte Order Mark: EF BB BF - // Check to see if characters fit into acceptable ranges - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen; i++) { - if ((rawtext[i] & (byte) 0x7F) == rawtext[i]) { // One byte - asciibytes++; - // Ignore ASCII, can throw off count - } else if (-64 <= rawtext[i] && rawtext[i] <= -33 - && // Two bytes - i + 1 < rawtextlen && -128 <= rawtext[i + 1] - && rawtext[i + 1] <= -65) { - goodbytes += 2; - i++; - } else if (-32 <= rawtext[i] - && rawtext[i] <= -17 - && // Three bytes - i + 2 < rawtextlen && -128 <= rawtext[i + 1] - && rawtext[i + 1] <= -65 && -128 <= rawtext[i + 2] - && rawtext[i + 2] <= -65) { - goodbytes += 3; - i += 2; - } - } - if (asciibytes == rawtextlen) { - return 0; - } - score = (int) (100 * ((float) goodbytes / (float) (rawtextlen - asciibytes))); - // System.out.println("rawtextlen " + rawtextlen + " goodbytes " + - // goodbytes + " asciibytes " + asciibytes + " score " + - // score); - // If not above 98, reduce to zero to prevent coincidental matches - // Allows for some (few) bad formed sequences - if (score > 98) { - return score; - } else if (score > 95 && goodbytes > 30) { - return score; - } else { - return 0; - } - } - - /* - * Function: utf16_probability Argument: byte array Returns : number from 0 - * to 100 representing probability text in array uses UTF-16 encoding of - * Unicode, guess based on BOM // NOT VERY GENERAL, NEEDS MUCH MORE WORK - */ - int utf16_probability(byte[] rawtext) { - if (rawtext.length > 1 - && ((byte) 0xFE == rawtext[0] && (byte) 0xFF == rawtext[1]) || // Big-endian - ((byte) 0xFF == rawtext[0] && (byte) 0xFE == rawtext[1])) { // Little-endian - return 100; - } - return 0; - } - - /* - * Function: ascii_probability Argument: byte array Returns : number from 0 - * to 100 representing probability text in array uses all ASCII Description: - * Sees if array has any characters not in ASCII range, if so, score is - * reduced - */ - int ascii_probability(byte[] rawtext) { - int score = 75; - int i, rawtextlen; - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen; i++) { - if (rawtext[i] < 0) { - score = score - 5; - } else if (rawtext[i] == (byte) 0x1B) { // ESC (used by ISO 2022) - score = score - 5; - } - if (score <= 0) { - return 0; - } - } - return score; - } - - /* - * Function: euc_kr__probability Argument: pointer to byte array Returns : - * number from 0 to 100 representing probability text in array uses EUC-KR - * encoding - */ - int euc_kr_probability(byte[] rawtext) { - int i, rawtextlen = 0; - int dbchars = 1, krchars = 1; - long krfreq = 0, totalfreq = 1; - float rangeval = 0, freqval = 0; - int row, column; - // Stage 1: Check to see if characters fit into acceptable ranges - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen - 1; i++) { - // System.err.println(rawtext[i]); - if (rawtext[i] < 0) { - dbchars++; - if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xFE - && (byte) 0xA1 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0xFE) { - krchars++; - totalfreq += 500; - row = rawtext[i] + 256 - 0xA1; - column = rawtext[i + 1] + 256 - 0xA1; - if (KRFreq[row][column] != 0) { - krfreq += KRFreq[row][column]; - } else if (15 <= row && row < 55) { - krfreq += 0; - } - } - i++; - } - } - rangeval = 50 * ((float) krchars / (float) dbchars); - freqval = 50 * ((float) krfreq / (float) totalfreq); - return (int) (rangeval + freqval); - } - - /* - * Function: cp949__probability Argument: pointer to byte array Returns : - * number from 0 to 100 representing probability text in array uses Cp949 - * encoding - */ - int cp949_probability(byte[] rawtext) { - int i, rawtextlen = 0; - int dbchars = 1, krchars = 1; - long krfreq = 0, totalfreq = 1; - float rangeval = 0, freqval = 0; - int row, column; - // Stage 1: Check to see if characters fit into acceptable ranges - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen - 1; i++) { - // System.err.println(rawtext[i]); - if (rawtext[i] < 0) { - dbchars++; - if ((byte) 0x81 <= rawtext[i] - && rawtext[i] <= (byte) 0xFE - && ((byte) 0x41 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0x5A - || (byte) 0x61 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0x7A || (byte) 0x81 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0xFE)) { - krchars++; - totalfreq += 500; - if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xFE - && (byte) 0xA1 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0xFE) { - row = rawtext[i] + 256 - 0xA1; - column = rawtext[i + 1] + 256 - 0xA1; - if (KRFreq[row][column] != 0) { - krfreq += KRFreq[row][column]; - } - } - } - i++; - } - } - rangeval = 50 * ((float) krchars / (float) dbchars); - freqval = 50 * ((float) krfreq / (float) totalfreq); - return (int) (rangeval + freqval); - } - - int iso_2022_kr_probability(byte[] rawtext) { - int i; - for (i = 0; i < rawtext.length; i++) { - if (i + 3 < rawtext.length && rawtext[i] == 0x1b - && (char) rawtext[i + 1] == '$' - && (char) rawtext[i + 2] == ')' - && (char) rawtext[i + 3] == 'C') { - return 100; - } - } - return 0; - } - - /* - * Function: euc_jp_probability Argument: pointer to byte array Returns : - * number from 0 to 100 representing probability text in array uses EUC-JP - * encoding - */ - int euc_jp_probability(byte[] rawtext) { - int i, rawtextlen = 0; - int dbchars = 1, jpchars = 1; - long jpfreq = 0, totalfreq = 1; - float rangeval = 0, freqval = 0; - int row, column; - // Stage 1: Check to see if characters fit into acceptable ranges - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen - 1; i++) { - // System.err.println(rawtext[i]); - if (rawtext[i] < 0) { - dbchars++; - if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xFE - && (byte) 0xA1 <= rawtext[i + 1] - && rawtext[i + 1] <= (byte) 0xFE) { - jpchars++; - totalfreq += 500; - row = rawtext[i] + 256 - 0xA1; - column = rawtext[i + 1] + 256 - 0xA1; - if (JPFreq[row][column] != 0) { - jpfreq += JPFreq[row][column]; - } else if (15 <= row && row < 55) { - jpfreq += 0; - } - } - i++; - } - } - rangeval = 50 * ((float) jpchars / (float) dbchars); - freqval = 50 * ((float) jpfreq / (float) totalfreq); - return (int) (rangeval + freqval); - } - - int iso_2022_jp_probability(byte[] rawtext) { - int i; - for (i = 0; i < rawtext.length; i++) { - if (i + 2 < rawtext.length && rawtext[i] == 0x1b - && (char) rawtext[i + 1] == '$' - && (char) rawtext[i + 2] == 'B') { - return 100; - } - } - return 0; - } - - /* - * Function: sjis_probability Argument: pointer to byte array Returns : - * number from 0 to 100 representing probability text in array uses - * Shift-JIS encoding - */ - int sjis_probability(byte[] rawtext) { - int i, rawtextlen = 0; - int dbchars = 1, jpchars = 1; - long jpfreq = 0, totalfreq = 1; - float rangeval = 0, freqval = 0; - int row, column, adjust; - // Stage 1: Check to see if characters fit into acceptable ranges - rawtextlen = rawtext.length; - for (i = 0; i < rawtextlen - 1; i++) { - // System.err.println(rawtext[i]); - if (rawtext[i] < 0) { - dbchars++; - if (i + 1 < rawtext.length - && (((byte) 0x81 <= rawtext[i] && rawtext[i] <= (byte) 0x9F) || ((byte) 0xE0 <= rawtext[i] && rawtext[i] <= (byte) 0xEF)) - && (((byte) 0x40 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0x7E) || ((byte) 0x80 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFC))) { - jpchars++; - totalfreq += 500; - row = rawtext[i] + 256; - column = rawtext[i + 1] + 256; - if (column < 0x9f) { - adjust = 1; - } else { - adjust = 0; - } - if (row < 0xa0) { - row = ((row - 0x70) << 1) - adjust; - } else { - row = ((row - 0xb0) << 1) - adjust; - } - row -= 0x20; - column = 0x20; - // System.out.println("original row " + row + " column " + - // column); - if (row < JPFreq.length && column < JPFreq[row].length - && JPFreq[row][column] != 0) { - jpfreq += JPFreq[row][column]; - } - i++; - } else if ((byte) 0xA1 <= rawtext[i] - && rawtext[i] <= (byte) 0xDF) { - // half-width katakana, convert to full-width - } - } - } - rangeval = 50 * ((float) jpchars / (float) dbchars); - freqval = 50 * ((float) jpfreq / (float) totalfreq); - // For regular GB files, this would give the same score, so I handicap - // it slightly - return (int) (rangeval + freqval) - 1; - } - - void initialize_frequencies() { - int i, j; - for (i = 93; i >= 0; i--) { - for (j = 93; j >= 0; j--) { - GBFreq[i][j] = 0; - } - } - for (i = 125; i >= 0; i--) { - for (j = 190; j >= 0; j--) { - GBKFreq[i][j] = 0; - } - } - for (i = 93; i >= 0; i--) { - for (j = 157; j >= 0; j--) { - Big5Freq[i][j] = 0; - } - } - for (i = 125; i >= 0; i--) { - for (j = 190; j >= 0; j--) { - Big5PFreq[i][j] = 0; - } - } - for (i = 93; i >= 0; i--) { - for (j = 93; j >= 0; j--) { - EUC_TWFreq[i][j] = 0; - } - } - for (i = 93; i >= 0; i--) { - for (j = 93; j >= 0; j--) { - JPFreq[i][j] = 0; - } - } - GBFreq[20][35] = 599; - GBFreq[49][26] = 598; - GBFreq[41][38] = 597; - GBFreq[17][26] = 596; - GBFreq[32][42] = 595; - GBFreq[39][42] = 594; - GBFreq[45][49] = 593; - GBFreq[51][57] = 592; - GBFreq[50][47] = 591; - GBFreq[42][90] = 590; - GBFreq[52][65] = 589; - GBFreq[53][47] = 588; - GBFreq[19][82] = 587; - GBFreq[31][19] = 586; - GBFreq[40][46] = 585; - GBFreq[24][89] = 584; - GBFreq[23][85] = 583; - GBFreq[20][28] = 582; - GBFreq[42][20] = 581; - GBFreq[34][38] = 580; - GBFreq[45][9] = 579; - GBFreq[54][50] = 578; - GBFreq[25][44] = 577; - GBFreq[35][66] = 576; - GBFreq[20][55] = 575; - GBFreq[18][85] = 574; - GBFreq[20][31] = 573; - GBFreq[49][17] = 572; - GBFreq[41][16] = 571; - GBFreq[35][73] = 570; - GBFreq[20][34] = 569; - GBFreq[29][44] = 568; - GBFreq[35][38] = 567; - GBFreq[49][9] = 566; - GBFreq[46][33] = 565; - GBFreq[49][51] = 564; - GBFreq[40][89] = 563; - GBFreq[26][64] = 562; - GBFreq[54][51] = 561; - GBFreq[54][36] = 560; - GBFreq[39][4] = 559; - GBFreq[53][13] = 558; - GBFreq[24][92] = 557; - GBFreq[27][49] = 556; - GBFreq[48][6] = 555; - GBFreq[21][51] = 554; - GBFreq[30][40] = 553; - GBFreq[42][92] = 552; - GBFreq[31][78] = 551; - GBFreq[25][82] = 550; - GBFreq[47][0] = 549; - GBFreq[34][19] = 548; - GBFreq[47][35] = 547; - GBFreq[21][63] = 546; - GBFreq[43][75] = 545; - GBFreq[21][87] = 544; - GBFreq[35][59] = 543; - GBFreq[25][34] = 542; - GBFreq[21][27] = 541; - GBFreq[39][26] = 540; - GBFreq[34][26] = 539; - GBFreq[39][52] = 538; - GBFreq[50][57] = 537; - GBFreq[37][79] = 536; - GBFreq[26][24] = 535; - GBFreq[22][1] = 534; - GBFreq[18][40] = 533; - GBFreq[41][33] = 532; - GBFreq[53][26] = 531; - GBFreq[54][86] = 530; - GBFreq[20][16] = 529; - GBFreq[46][74] = 528; - GBFreq[30][19] = 527; - GBFreq[45][35] = 526; - GBFreq[45][61] = 525; - GBFreq[30][9] = 524; - GBFreq[41][53] = 523; - GBFreq[41][13] = 522; - GBFreq[50][34] = 521; - GBFreq[53][86] = 520; - GBFreq[47][47] = 519; - GBFreq[22][28] = 518; - GBFreq[50][53] = 517; - GBFreq[39][70] = 516; - GBFreq[38][15] = 515; - GBFreq[42][88] = 514; - GBFreq[16][29] = 513; - GBFreq[27][90] = 512; - GBFreq[29][12] = 511; - GBFreq[44][22] = 510; - GBFreq[34][69] = 509; - GBFreq[24][10] = 508; - GBFreq[44][11] = 507; - GBFreq[39][92] = 506; - GBFreq[49][48] = 505; - GBFreq[31][46] = 504; - GBFreq[19][50] = 503; - GBFreq[21][14] = 502; - GBFreq[32][28] = 501; - GBFreq[18][3] = 500; - GBFreq[53][9] = 499; - GBFreq[34][80] = 498; - GBFreq[48][88] = 497; - GBFreq[46][53] = 496; - GBFreq[22][53] = 495; - GBFreq[28][10] = 494; - GBFreq[44][65] = 493; - GBFreq[20][10] = 492; - GBFreq[40][76] = 491; - GBFreq[47][8] = 490; - GBFreq[50][74] = 489; - GBFreq[23][62] = 488; - GBFreq[49][65] = 487; - GBFreq[28][87] = 486; - GBFreq[15][48] = 485; - GBFreq[22][7] = 484; - GBFreq[19][42] = 483; - GBFreq[41][20] = 482; - GBFreq[26][55] = 481; - GBFreq[21][93] = 480; - GBFreq[31][76] = 479; - GBFreq[34][31] = 478; - GBFreq[20][66] = 477; - GBFreq[51][33] = 476; - GBFreq[34][86] = 475; - GBFreq[37][67] = 474; - GBFreq[53][53] = 473; - GBFreq[40][88] = 472; - GBFreq[39][10] = 471; - GBFreq[24][3] = 470; - GBFreq[27][25] = 469; - GBFreq[26][15] = 468; - GBFreq[21][88] = 467; - GBFreq[52][62] = 466; - GBFreq[46][81] = 465; - GBFreq[38][72] = 464; - GBFreq[17][30] = 463; - GBFreq[52][92] = 462; - GBFreq[34][90] = 461; - GBFreq[21][7] = 460; - GBFreq[36][13] = 459; - GBFreq[45][41] = 458; - GBFreq[32][5] = 457; - GBFreq[26][89] = 456; - GBFreq[23][87] = 455; - GBFreq[20][39] = 454; - GBFreq[27][23] = 453; - GBFreq[25][59] = 452; - GBFreq[49][20] = 451; - GBFreq[54][77] = 450; - GBFreq[27][67] = 449; - GBFreq[47][33] = 448; - GBFreq[41][17] = 447; - GBFreq[19][81] = 446; - GBFreq[16][66] = 445; - GBFreq[45][26] = 444; - GBFreq[49][81] = 443; - GBFreq[53][55] = 442; - GBFreq[16][26] = 441; - GBFreq[54][62] = 440; - GBFreq[20][70] = 439; - GBFreq[42][35] = 438; - GBFreq[20][57] = 437; - GBFreq[34][36] = 436; - GBFreq[46][63] = 435; - GBFreq[19][45] = 434; - GBFreq[21][10] = 433; - GBFreq[52][93] = 432; - GBFreq[25][2] = 431; - GBFreq[30][57] = 430; - GBFreq[41][24] = 429; - GBFreq[28][43] = 428; - GBFreq[45][86] = 427; - GBFreq[51][56] = 426; - GBFreq[37][28] = 425; - GBFreq[52][69] = 424; - GBFreq[43][92] = 423; - GBFreq[41][31] = 422; - GBFreq[37][87] = 421; - GBFreq[47][36] = 420; - GBFreq[16][16] = 419; - GBFreq[40][56] = 418; - GBFreq[24][55] = 417; - GBFreq[17][1] = 416; - GBFreq[35][57] = 415; - GBFreq[27][50] = 414; - GBFreq[26][14] = 413; - GBFreq[50][40] = 412; - GBFreq[39][19] = 411; - GBFreq[19][89] = 410; - GBFreq[29][91] = 409; - GBFreq[17][89] = 408; - GBFreq[39][74] = 407; - GBFreq[46][39] = 406; - GBFreq[40][28] = 405; - GBFreq[45][68] = 404; - GBFreq[43][10] = 403; - GBFreq[42][13] = 402; - GBFreq[44][81] = 401; - GBFreq[41][47] = 400; - GBFreq[48][58] = 399; - GBFreq[43][68] = 398; - GBFreq[16][79] = 397; - GBFreq[19][5] = 396; - GBFreq[54][59] = 395; - GBFreq[17][36] = 394; - GBFreq[18][0] = 393; - GBFreq[41][5] = 392; - GBFreq[41][72] = 391; - GBFreq[16][39] = 390; - GBFreq[54][0] = 389; - GBFreq[51][16] = 388; - GBFreq[29][36] = 387; - GBFreq[47][5] = 386; - GBFreq[47][51] = 385; - GBFreq[44][7] = 384; - GBFreq[35][30] = 383; - GBFreq[26][9] = 382; - GBFreq[16][7] = 381; - GBFreq[32][1] = 380; - GBFreq[33][76] = 379; - GBFreq[34][91] = 378; - GBFreq[52][36] = 377; - GBFreq[26][77] = 376; - GBFreq[35][48] = 375; - GBFreq[40][80] = 374; - GBFreq[41][92] = 373; - GBFreq[27][93] = 372; - GBFreq[15][17] = 371; - GBFreq[16][76] = 370; - GBFreq[51][12] = 369; - GBFreq[18][20] = 368; - GBFreq[15][54] = 367; - GBFreq[50][5] = 366; - GBFreq[33][22] = 365; - GBFreq[37][57] = 364; - GBFreq[28][47] = 363; - GBFreq[42][31] = 362; - GBFreq[18][2] = 361; - GBFreq[43][64] = 360; - GBFreq[23][47] = 359; - GBFreq[28][79] = 358; - GBFreq[25][45] = 357; - GBFreq[23][91] = 356; - GBFreq[22][19] = 355; - GBFreq[25][46] = 354; - GBFreq[22][36] = 353; - GBFreq[54][85] = 352; - GBFreq[46][20] = 351; - GBFreq[27][37] = 350; - GBFreq[26][81] = 349; - GBFreq[42][29] = 348; - GBFreq[31][90] = 347; - GBFreq[41][59] = 346; - GBFreq[24][65] = 345; - GBFreq[44][84] = 344; - GBFreq[24][90] = 343; - GBFreq[38][54] = 342; - GBFreq[28][70] = 341; - GBFreq[27][15] = 340; - GBFreq[28][80] = 339; - GBFreq[29][8] = 338; - GBFreq[45][80] = 337; - GBFreq[53][37] = 336; - GBFreq[28][65] = 335; - GBFreq[23][86] = 334; - GBFreq[39][45] = 333; - GBFreq[53][32] = 332; - GBFreq[38][68] = 331; - GBFreq[45][78] = 330; - GBFreq[43][7] = 329; - GBFreq[46][82] = 328; - GBFreq[27][38] = 327; - GBFreq[16][62] = 326; - GBFreq[24][17] = 325; - GBFreq[22][70] = 324; - GBFreq[52][28] = 323; - GBFreq[23][40] = 322; - GBFreq[28][50] = 321; - GBFreq[42][91] = 320; - GBFreq[47][76] = 319; - GBFreq[15][42] = 318; - GBFreq[43][55] = 317; - GBFreq[29][84] = 316; - GBFreq[44][90] = 315; - GBFreq[53][16] = 314; - GBFreq[22][93] = 313; - GBFreq[34][10] = 312; - GBFreq[32][53] = 311; - GBFreq[43][65] = 310; - GBFreq[28][7] = 309; - GBFreq[35][46] = 308; - GBFreq[21][39] = 307; - GBFreq[44][18] = 306; - GBFreq[40][10] = 305; - GBFreq[54][53] = 304; - GBFreq[38][74] = 303; - GBFreq[28][26] = 302; - GBFreq[15][13] = 301; - GBFreq[39][34] = 300; - GBFreq[39][46] = 299; - GBFreq[42][66] = 298; - GBFreq[33][58] = 297; - GBFreq[15][56] = 296; - GBFreq[18][51] = 295; - GBFreq[49][68] = 294; - GBFreq[30][37] = 293; - GBFreq[51][84] = 292; - GBFreq[51][9] = 291; - GBFreq[40][70] = 290; - GBFreq[41][84] = 289; - GBFreq[28][64] = 288; - GBFreq[32][88] = 287; - GBFreq[24][5] = 286; - GBFreq[53][23] = 285; - GBFreq[42][27] = 284; - GBFreq[22][38] = 283; - GBFreq[32][86] = 282; - GBFreq[34][30] = 281; - GBFreq[38][63] = 280; - GBFreq[24][59] = 279; - GBFreq[22][81] = 278; - GBFreq[32][11] = 277; - GBFreq[51][21] = 276; - GBFreq[54][41] = 275; - GBFreq[21][50] = 274; - GBFreq[23][89] = 273; - GBFreq[19][87] = 272; - GBFreq[26][7] = 271; - GBFreq[30][75] = 270; - GBFreq[43][84] = 269; - GBFreq[51][25] = 268; - GBFreq[16][67] = 267; - GBFreq[32][9] = 266; - GBFreq[48][51] = 265; - GBFreq[39][7] = 264; - GBFreq[44][88] = 263; - GBFreq[52][24] = 262; - GBFreq[23][34] = 261; - GBFreq[32][75] = 260; - GBFreq[19][10] = 259; - GBFreq[28][91] = 258; - GBFreq[32][83] = 257; - GBFreq[25][75] = 256; - GBFreq[53][45] = 255; - GBFreq[29][85] = 254; - GBFreq[53][59] = 253; - GBFreq[16][2] = 252; - GBFreq[19][78] = 251; - GBFreq[15][75] = 250; - GBFreq[51][42] = 249; - GBFreq[45][67] = 248; - GBFreq[15][74] = 247; - GBFreq[25][81] = 246; - GBFreq[37][62] = 245; - GBFreq[16][55] = 244; - GBFreq[18][38] = 243; - GBFreq[23][23] = 242; - GBFreq[38][30] = 241; - GBFreq[17][28] = 240; - GBFreq[44][73] = 239; - GBFreq[23][78] = 238; - GBFreq[40][77] = 237; - GBFreq[38][87] = 236; - GBFreq[27][19] = 235; - GBFreq[38][82] = 234; - GBFreq[37][22] = 233; - GBFreq[41][30] = 232; - GBFreq[54][9] = 231; - GBFreq[32][30] = 230; - GBFreq[30][52] = 229; - GBFreq[40][84] = 228; - GBFreq[53][57] = 227; - GBFreq[27][27] = 226; - GBFreq[38][64] = 225; - GBFreq[18][43] = 224; - GBFreq[23][69] = 223; - GBFreq[28][12] = 222; - GBFreq[50][78] = 221; - GBFreq[50][1] = 220; - GBFreq[26][88] = 219; - GBFreq[36][40] = 218; - GBFreq[33][89] = 217; - GBFreq[41][28] = 216; - GBFreq[31][77] = 215; - GBFreq[46][1] = 214; - GBFreq[47][19] = 213; - GBFreq[35][55] = 212; - GBFreq[41][21] = 211; - GBFreq[27][10] = 210; - GBFreq[32][77] = 209; - GBFreq[26][37] = 208; - GBFreq[20][33] = 207; - GBFreq[41][52] = 206; - GBFreq[32][18] = 205; - GBFreq[38][13] = 204; - GBFreq[20][18] = 203; - GBFreq[20][24] = 202; - GBFreq[45][19] = 201; - GBFreq[18][53] = 200; - - Big5Freq[9][89] = 600; - Big5Freq[11][15] = 599; - Big5Freq[3][66] = 598; - Big5Freq[6][121] = 597; - Big5Freq[3][0] = 596; - Big5Freq[5][82] = 595; - Big5Freq[3][42] = 594; - Big5Freq[5][34] = 593; - Big5Freq[3][8] = 592; - Big5Freq[3][6] = 591; - Big5Freq[3][67] = 590; - Big5Freq[7][139] = 589; - Big5Freq[23][137] = 588; - Big5Freq[12][46] = 587; - Big5Freq[4][8] = 586; - Big5Freq[4][41] = 585; - Big5Freq[18][47] = 584; - Big5Freq[12][114] = 583; - Big5Freq[6][1] = 582; - Big5Freq[22][60] = 581; - Big5Freq[5][46] = 580; - Big5Freq[11][79] = 579; - Big5Freq[3][23] = 578; - Big5Freq[7][114] = 577; - Big5Freq[29][102] = 576; - Big5Freq[19][14] = 575; - Big5Freq[4][133] = 574; - Big5Freq[3][29] = 573; - Big5Freq[4][109] = 572; - Big5Freq[14][127] = 571; - Big5Freq[5][48] = 570; - Big5Freq[13][104] = 569; - Big5Freq[3][132] = 568; - Big5Freq[26][64] = 567; - Big5Freq[7][19] = 566; - Big5Freq[4][12] = 565; - Big5Freq[11][124] = 564; - Big5Freq[7][89] = 563; - Big5Freq[15][124] = 562; - Big5Freq[4][108] = 561; - Big5Freq[19][66] = 560; - Big5Freq[3][21] = 559; - Big5Freq[24][12] = 558; - Big5Freq[28][111] = 557; - Big5Freq[12][107] = 556; - Big5Freq[3][112] = 555; - Big5Freq[8][113] = 554; - Big5Freq[5][40] = 553; - Big5Freq[26][145] = 552; - Big5Freq[3][48] = 551; - Big5Freq[3][70] = 550; - Big5Freq[22][17] = 549; - Big5Freq[16][47] = 548; - Big5Freq[3][53] = 547; - Big5Freq[4][24] = 546; - Big5Freq[32][120] = 545; - Big5Freq[24][49] = 544; - Big5Freq[24][142] = 543; - Big5Freq[18][66] = 542; - Big5Freq[29][150] = 541; - Big5Freq[5][122] = 540; - Big5Freq[5][114] = 539; - Big5Freq[3][44] = 538; - Big5Freq[10][128] = 537; - Big5Freq[15][20] = 536; - Big5Freq[13][33] = 535; - Big5Freq[14][87] = 534; - Big5Freq[3][126] = 533; - Big5Freq[4][53] = 532; - Big5Freq[4][40] = 531; - Big5Freq[9][93] = 530; - Big5Freq[15][137] = 529; - Big5Freq[10][123] = 528; - Big5Freq[4][56] = 527; - Big5Freq[5][71] = 526; - Big5Freq[10][8] = 525; - Big5Freq[5][16] = 524; - Big5Freq[5][146] = 523; - Big5Freq[18][88] = 522; - Big5Freq[24][4] = 521; - Big5Freq[20][47] = 520; - Big5Freq[5][33] = 519; - Big5Freq[9][43] = 518; - Big5Freq[20][12] = 517; - Big5Freq[20][13] = 516; - Big5Freq[5][156] = 515; - Big5Freq[22][140] = 514; - Big5Freq[8][146] = 513; - Big5Freq[21][123] = 512; - Big5Freq[4][90] = 511; - Big5Freq[5][62] = 510; - Big5Freq[17][59] = 509; - Big5Freq[10][37] = 508; - Big5Freq[18][107] = 507; - Big5Freq[14][53] = 506; - Big5Freq[22][51] = 505; - Big5Freq[8][13] = 504; - Big5Freq[5][29] = 503; - Big5Freq[9][7] = 502; - Big5Freq[22][14] = 501; - Big5Freq[8][55] = 500; - Big5Freq[33][9] = 499; - Big5Freq[16][64] = 498; - Big5Freq[7][131] = 497; - Big5Freq[34][4] = 496; - Big5Freq[7][101] = 495; - Big5Freq[11][139] = 494; - Big5Freq[3][135] = 493; - Big5Freq[7][102] = 492; - Big5Freq[17][13] = 491; - Big5Freq[3][20] = 490; - Big5Freq[27][106] = 489; - Big5Freq[5][88] = 488; - Big5Freq[6][33] = 487; - Big5Freq[5][139] = 486; - Big5Freq[6][0] = 485; - Big5Freq[17][58] = 484; - Big5Freq[5][133] = 483; - Big5Freq[9][107] = 482; - Big5Freq[23][39] = 481; - Big5Freq[5][23] = 480; - Big5Freq[3][79] = 479; - Big5Freq[32][97] = 478; - Big5Freq[3][136] = 477; - Big5Freq[4][94] = 476; - Big5Freq[21][61] = 475; - Big5Freq[23][123] = 474; - Big5Freq[26][16] = 473; - Big5Freq[24][137] = 472; - Big5Freq[22][18] = 471; - Big5Freq[5][1] = 470; - Big5Freq[20][119] = 469; - Big5Freq[3][7] = 468; - Big5Freq[10][79] = 467; - Big5Freq[15][105] = 466; - Big5Freq[3][144] = 465; - Big5Freq[12][80] = 464; - Big5Freq[15][73] = 463; - Big5Freq[3][19] = 462; - Big5Freq[8][109] = 461; - Big5Freq[3][15] = 460; - Big5Freq[31][82] = 459; - Big5Freq[3][43] = 458; - Big5Freq[25][119] = 457; - Big5Freq[16][111] = 456; - Big5Freq[7][77] = 455; - Big5Freq[3][95] = 454; - Big5Freq[24][82] = 453; - Big5Freq[7][52] = 452; - Big5Freq[9][151] = 451; - Big5Freq[3][129] = 450; - Big5Freq[5][87] = 449; - Big5Freq[3][55] = 448; - Big5Freq[8][153] = 447; - Big5Freq[4][83] = 446; - Big5Freq[3][114] = 445; - Big5Freq[23][147] = 444; - Big5Freq[15][31] = 443; - Big5Freq[3][54] = 442; - Big5Freq[11][122] = 441; - Big5Freq[4][4] = 440; - Big5Freq[34][149] = 439; - Big5Freq[3][17] = 438; - Big5Freq[21][64] = 437; - Big5Freq[26][144] = 436; - Big5Freq[4][62] = 435; - Big5Freq[8][15] = 434; - Big5Freq[35][80] = 433; - Big5Freq[7][110] = 432; - Big5Freq[23][114] = 431; - Big5Freq[3][108] = 430; - Big5Freq[3][62] = 429; - Big5Freq[21][41] = 428; - Big5Freq[15][99] = 427; - Big5Freq[5][47] = 426; - Big5Freq[4][96] = 425; - Big5Freq[20][122] = 424; - Big5Freq[5][21] = 423; - Big5Freq[4][157] = 422; - Big5Freq[16][14] = 421; - Big5Freq[3][117] = 420; - Big5Freq[7][129] = 419; - Big5Freq[4][27] = 418; - Big5Freq[5][30] = 417; - Big5Freq[22][16] = 416; - Big5Freq[5][64] = 415; - Big5Freq[17][99] = 414; - Big5Freq[17][57] = 413; - Big5Freq[8][105] = 412; - Big5Freq[5][112] = 411; - Big5Freq[20][59] = 410; - Big5Freq[6][129] = 409; - Big5Freq[18][17] = 408; - Big5Freq[3][92] = 407; - Big5Freq[28][118] = 406; - Big5Freq[3][109] = 405; - Big5Freq[31][51] = 404; - Big5Freq[13][116] = 403; - Big5Freq[6][15] = 402; - Big5Freq[36][136] = 401; - Big5Freq[12][74] = 400; - Big5Freq[20][88] = 399; - Big5Freq[36][68] = 398; - Big5Freq[3][147] = 397; - Big5Freq[15][84] = 396; - Big5Freq[16][32] = 395; - Big5Freq[16][58] = 394; - Big5Freq[7][66] = 393; - Big5Freq[23][107] = 392; - Big5Freq[9][6] = 391; - Big5Freq[12][86] = 390; - Big5Freq[23][112] = 389; - Big5Freq[37][23] = 388; - Big5Freq[3][138] = 387; - Big5Freq[20][68] = 386; - Big5Freq[15][116] = 385; - Big5Freq[18][64] = 384; - Big5Freq[12][139] = 383; - Big5Freq[11][155] = 382; - Big5Freq[4][156] = 381; - Big5Freq[12][84] = 380; - Big5Freq[18][49] = 379; - Big5Freq[25][125] = 378; - Big5Freq[25][147] = 377; - Big5Freq[15][110] = 376; - Big5Freq[19][96] = 375; - Big5Freq[30][152] = 374; - Big5Freq[6][31] = 373; - Big5Freq[27][117] = 372; - Big5Freq[3][10] = 371; - Big5Freq[6][131] = 370; - Big5Freq[13][112] = 369; - Big5Freq[36][156] = 368; - Big5Freq[4][60] = 367; - Big5Freq[15][121] = 366; - Big5Freq[4][112] = 365; - Big5Freq[30][142] = 364; - Big5Freq[23][154] = 363; - Big5Freq[27][101] = 362; - Big5Freq[9][140] = 361; - Big5Freq[3][89] = 360; - Big5Freq[18][148] = 359; - Big5Freq[4][69] = 358; - Big5Freq[16][49] = 357; - Big5Freq[6][117] = 356; - Big5Freq[36][55] = 355; - Big5Freq[5][123] = 354; - Big5Freq[4][126] = 353; - Big5Freq[4][119] = 352; - Big5Freq[9][95] = 351; - Big5Freq[5][24] = 350; - Big5Freq[16][133] = 349; - Big5Freq[10][134] = 348; - Big5Freq[26][59] = 347; - Big5Freq[6][41] = 346; - Big5Freq[6][146] = 345; - Big5Freq[19][24] = 344; - Big5Freq[5][113] = 343; - Big5Freq[10][118] = 342; - Big5Freq[34][151] = 341; - Big5Freq[9][72] = 340; - Big5Freq[31][25] = 339; - Big5Freq[18][126] = 338; - Big5Freq[18][28] = 337; - Big5Freq[4][153] = 336; - Big5Freq[3][84] = 335; - Big5Freq[21][18] = 334; - Big5Freq[25][129] = 333; - Big5Freq[6][107] = 332; - Big5Freq[12][25] = 331; - Big5Freq[17][109] = 330; - Big5Freq[7][76] = 329; - Big5Freq[15][15] = 328; - Big5Freq[4][14] = 327; - Big5Freq[23][88] = 326; - Big5Freq[18][2] = 325; - Big5Freq[6][88] = 324; - Big5Freq[16][84] = 323; - Big5Freq[12][48] = 322; - Big5Freq[7][68] = 321; - Big5Freq[5][50] = 320; - Big5Freq[13][54] = 319; - Big5Freq[7][98] = 318; - Big5Freq[11][6] = 317; - Big5Freq[9][80] = 316; - Big5Freq[16][41] = 315; - Big5Freq[7][43] = 314; - Big5Freq[28][117] = 313; - Big5Freq[3][51] = 312; - Big5Freq[7][3] = 311; - Big5Freq[20][81] = 310; - Big5Freq[4][2] = 309; - Big5Freq[11][16] = 308; - Big5Freq[10][4] = 307; - Big5Freq[10][119] = 306; - Big5Freq[6][142] = 305; - Big5Freq[18][51] = 304; - Big5Freq[8][144] = 303; - Big5Freq[10][65] = 302; - Big5Freq[11][64] = 301; - Big5Freq[11][130] = 300; - Big5Freq[9][92] = 299; - Big5Freq[18][29] = 298; - Big5Freq[18][78] = 297; - Big5Freq[18][151] = 296; - Big5Freq[33][127] = 295; - Big5Freq[35][113] = 294; - Big5Freq[10][155] = 293; - Big5Freq[3][76] = 292; - Big5Freq[36][123] = 291; - Big5Freq[13][143] = 290; - Big5Freq[5][135] = 289; - Big5Freq[23][116] = 288; - Big5Freq[6][101] = 287; - Big5Freq[14][74] = 286; - Big5Freq[7][153] = 285; - Big5Freq[3][101] = 284; - Big5Freq[9][74] = 283; - Big5Freq[3][156] = 282; - Big5Freq[4][147] = 281; - Big5Freq[9][12] = 280; - Big5Freq[18][133] = 279; - Big5Freq[4][0] = 278; - Big5Freq[7][155] = 277; - Big5Freq[9][144] = 276; - Big5Freq[23][49] = 275; - Big5Freq[5][89] = 274; - Big5Freq[10][11] = 273; - Big5Freq[3][110] = 272; - Big5Freq[3][40] = 271; - Big5Freq[29][115] = 270; - Big5Freq[9][100] = 269; - Big5Freq[21][67] = 268; - Big5Freq[23][145] = 267; - Big5Freq[10][47] = 266; - Big5Freq[4][31] = 265; - Big5Freq[4][81] = 264; - Big5Freq[22][62] = 263; - Big5Freq[4][28] = 262; - Big5Freq[27][39] = 261; - Big5Freq[27][54] = 260; - Big5Freq[32][46] = 259; - Big5Freq[4][76] = 258; - Big5Freq[26][15] = 257; - Big5Freq[12][154] = 256; - Big5Freq[9][150] = 255; - Big5Freq[15][17] = 254; - Big5Freq[5][129] = 253; - Big5Freq[10][40] = 252; - Big5Freq[13][37] = 251; - Big5Freq[31][104] = 250; - Big5Freq[3][152] = 249; - Big5Freq[5][22] = 248; - Big5Freq[8][48] = 247; - Big5Freq[4][74] = 246; - Big5Freq[6][17] = 245; - Big5Freq[30][82] = 244; - Big5Freq[4][116] = 243; - Big5Freq[16][42] = 242; - Big5Freq[5][55] = 241; - Big5Freq[4][64] = 240; - Big5Freq[14][19] = 239; - Big5Freq[35][82] = 238; - Big5Freq[30][139] = 237; - Big5Freq[26][152] = 236; - Big5Freq[32][32] = 235; - Big5Freq[21][102] = 234; - Big5Freq[10][131] = 233; - Big5Freq[9][128] = 232; - Big5Freq[3][87] = 231; - Big5Freq[4][51] = 230; - Big5Freq[10][15] = 229; - Big5Freq[4][150] = 228; - Big5Freq[7][4] = 227; - Big5Freq[7][51] = 226; - Big5Freq[7][157] = 225; - Big5Freq[4][146] = 224; - Big5Freq[4][91] = 223; - Big5Freq[7][13] = 222; - Big5Freq[17][116] = 221; - Big5Freq[23][21] = 220; - Big5Freq[5][106] = 219; - Big5Freq[14][100] = 218; - Big5Freq[10][152] = 217; - Big5Freq[14][89] = 216; - Big5Freq[6][138] = 215; - Big5Freq[12][157] = 214; - Big5Freq[10][102] = 213; - Big5Freq[19][94] = 212; - Big5Freq[7][74] = 211; - Big5Freq[18][128] = 210; - Big5Freq[27][111] = 209; - Big5Freq[11][57] = 208; - Big5Freq[3][131] = 207; - Big5Freq[30][23] = 206; - Big5Freq[30][126] = 205; - Big5Freq[4][36] = 204; - Big5Freq[26][124] = 203; - Big5Freq[4][19] = 202; - Big5Freq[9][152] = 201; - - Big5PFreq[41][122] = 600; - Big5PFreq[35][0] = 599; - Big5PFreq[43][15] = 598; - Big5PFreq[35][99] = 597; - Big5PFreq[35][6] = 596; - Big5PFreq[35][8] = 595; - Big5PFreq[38][154] = 594; - Big5PFreq[37][34] = 593; - Big5PFreq[37][115] = 592; - Big5PFreq[36][12] = 591; - Big5PFreq[18][77] = 590; - Big5PFreq[35][100] = 589; - Big5PFreq[35][42] = 588; - Big5PFreq[120][75] = 587; - Big5PFreq[35][23] = 586; - Big5PFreq[13][72] = 585; - Big5PFreq[0][67] = 584; - Big5PFreq[39][172] = 583; - Big5PFreq[22][182] = 582; - Big5PFreq[15][186] = 581; - Big5PFreq[15][165] = 580; - Big5PFreq[35][44] = 579; - Big5PFreq[40][13] = 578; - Big5PFreq[38][1] = 577; - Big5PFreq[37][33] = 576; - Big5PFreq[36][24] = 575; - Big5PFreq[56][4] = 574; - Big5PFreq[35][29] = 573; - Big5PFreq[9][96] = 572; - Big5PFreq[37][62] = 571; - Big5PFreq[48][47] = 570; - Big5PFreq[51][14] = 569; - Big5PFreq[39][122] = 568; - Big5PFreq[44][46] = 567; - Big5PFreq[35][21] = 566; - Big5PFreq[36][8] = 565; - Big5PFreq[36][141] = 564; - Big5PFreq[3][81] = 563; - Big5PFreq[37][155] = 562; - Big5PFreq[42][84] = 561; - Big5PFreq[36][40] = 560; - Big5PFreq[35][103] = 559; - Big5PFreq[11][84] = 558; - Big5PFreq[45][33] = 557; - Big5PFreq[121][79] = 556; - Big5PFreq[2][77] = 555; - Big5PFreq[36][41] = 554; - Big5PFreq[37][47] = 553; - Big5PFreq[39][125] = 552; - Big5PFreq[37][26] = 551; - Big5PFreq[35][48] = 550; - Big5PFreq[35][28] = 549; - Big5PFreq[35][159] = 548; - Big5PFreq[37][40] = 547; - Big5PFreq[35][145] = 546; - Big5PFreq[37][147] = 545; - Big5PFreq[46][160] = 544; - Big5PFreq[37][46] = 543; - Big5PFreq[50][99] = 542; - Big5PFreq[52][13] = 541; - Big5PFreq[10][82] = 540; - Big5PFreq[35][169] = 539; - Big5PFreq[35][31] = 538; - Big5PFreq[47][31] = 537; - Big5PFreq[18][79] = 536; - Big5PFreq[16][113] = 535; - Big5PFreq[37][104] = 534; - Big5PFreq[39][134] = 533; - Big5PFreq[36][53] = 532; - Big5PFreq[38][0] = 531; - Big5PFreq[4][86] = 530; - Big5PFreq[54][17] = 529; - Big5PFreq[43][157] = 528; - Big5PFreq[35][165] = 527; - Big5PFreq[69][147] = 526; - Big5PFreq[117][95] = 525; - Big5PFreq[35][162] = 524; - Big5PFreq[35][17] = 523; - Big5PFreq[36][142] = 522; - Big5PFreq[36][4] = 521; - Big5PFreq[37][166] = 520; - Big5PFreq[35][168] = 519; - Big5PFreq[35][19] = 518; - Big5PFreq[37][48] = 517; - Big5PFreq[42][37] = 516; - Big5PFreq[40][146] = 515; - Big5PFreq[36][123] = 514; - Big5PFreq[22][41] = 513; - Big5PFreq[20][119] = 512; - Big5PFreq[2][74] = 511; - Big5PFreq[44][113] = 510; - Big5PFreq[35][125] = 509; - Big5PFreq[37][16] = 508; - Big5PFreq[35][20] = 507; - Big5PFreq[35][55] = 506; - Big5PFreq[37][145] = 505; - Big5PFreq[0][88] = 504; - Big5PFreq[3][94] = 503; - Big5PFreq[6][65] = 502; - Big5PFreq[26][15] = 501; - Big5PFreq[41][126] = 500; - Big5PFreq[36][129] = 499; - Big5PFreq[31][75] = 498; - Big5PFreq[19][61] = 497; - Big5PFreq[35][128] = 496; - Big5PFreq[29][79] = 495; - Big5PFreq[36][62] = 494; - Big5PFreq[37][189] = 493; - Big5PFreq[39][109] = 492; - Big5PFreq[39][135] = 491; - Big5PFreq[72][15] = 490; - Big5PFreq[47][106] = 489; - Big5PFreq[54][14] = 488; - Big5PFreq[24][52] = 487; - Big5PFreq[38][162] = 486; - Big5PFreq[41][43] = 485; - Big5PFreq[37][121] = 484; - Big5PFreq[14][66] = 483; - Big5PFreq[37][30] = 482; - Big5PFreq[35][7] = 481; - Big5PFreq[49][58] = 480; - Big5PFreq[43][188] = 479; - Big5PFreq[24][66] = 478; - Big5PFreq[35][171] = 477; - Big5PFreq[40][186] = 476; - Big5PFreq[39][164] = 475; - Big5PFreq[78][186] = 474; - Big5PFreq[8][72] = 473; - Big5PFreq[36][190] = 472; - Big5PFreq[35][53] = 471; - Big5PFreq[35][54] = 470; - Big5PFreq[22][159] = 469; - Big5PFreq[35][9] = 468; - Big5PFreq[41][140] = 467; - Big5PFreq[37][22] = 466; - Big5PFreq[48][97] = 465; - Big5PFreq[50][97] = 464; - Big5PFreq[36][127] = 463; - Big5PFreq[37][23] = 462; - Big5PFreq[40][55] = 461; - Big5PFreq[35][43] = 460; - Big5PFreq[26][22] = 459; - Big5PFreq[35][15] = 458; - Big5PFreq[72][179] = 457; - Big5PFreq[20][129] = 456; - Big5PFreq[52][101] = 455; - Big5PFreq[35][12] = 454; - Big5PFreq[42][156] = 453; - Big5PFreq[15][157] = 452; - Big5PFreq[50][140] = 451; - Big5PFreq[26][28] = 450; - Big5PFreq[54][51] = 449; - Big5PFreq[35][112] = 448; - Big5PFreq[36][116] = 447; - Big5PFreq[42][11] = 446; - Big5PFreq[37][172] = 445; - Big5PFreq[37][29] = 444; - Big5PFreq[44][107] = 443; - Big5PFreq[50][17] = 442; - Big5PFreq[39][107] = 441; - Big5PFreq[19][109] = 440; - Big5PFreq[36][60] = 439; - Big5PFreq[49][132] = 438; - Big5PFreq[26][16] = 437; - Big5PFreq[43][155] = 436; - Big5PFreq[37][120] = 435; - Big5PFreq[15][159] = 434; - Big5PFreq[43][6] = 433; - Big5PFreq[45][188] = 432; - Big5PFreq[35][38] = 431; - Big5PFreq[39][143] = 430; - Big5PFreq[48][144] = 429; - Big5PFreq[37][168] = 428; - Big5PFreq[37][1] = 427; - Big5PFreq[36][109] = 426; - Big5PFreq[46][53] = 425; - Big5PFreq[38][54] = 424; - Big5PFreq[36][0] = 423; - Big5PFreq[72][33] = 422; - Big5PFreq[42][8] = 421; - Big5PFreq[36][31] = 420; - Big5PFreq[35][150] = 419; - Big5PFreq[118][93] = 418; - Big5PFreq[37][61] = 417; - Big5PFreq[0][85] = 416; - Big5PFreq[36][27] = 415; - Big5PFreq[35][134] = 414; - Big5PFreq[36][145] = 413; - Big5PFreq[6][96] = 412; - Big5PFreq[36][14] = 411; - Big5PFreq[16][36] = 410; - Big5PFreq[15][175] = 409; - Big5PFreq[35][10] = 408; - Big5PFreq[36][189] = 407; - Big5PFreq[35][51] = 406; - Big5PFreq[35][109] = 405; - Big5PFreq[35][147] = 404; - Big5PFreq[35][180] = 403; - Big5PFreq[72][5] = 402; - Big5PFreq[36][107] = 401; - Big5PFreq[49][116] = 400; - Big5PFreq[73][30] = 399; - Big5PFreq[6][90] = 398; - Big5PFreq[2][70] = 397; - Big5PFreq[17][141] = 396; - Big5PFreq[35][62] = 395; - Big5PFreq[16][180] = 394; - Big5PFreq[4][91] = 393; - Big5PFreq[15][171] = 392; - Big5PFreq[35][177] = 391; - Big5PFreq[37][173] = 390; - Big5PFreq[16][121] = 389; - Big5PFreq[35][5] = 388; - Big5PFreq[46][122] = 387; - Big5PFreq[40][138] = 386; - Big5PFreq[50][49] = 385; - Big5PFreq[36][152] = 384; - Big5PFreq[13][43] = 383; - Big5PFreq[9][88] = 382; - Big5PFreq[36][159] = 381; - Big5PFreq[27][62] = 380; - Big5PFreq[40][18] = 379; - Big5PFreq[17][129] = 378; - Big5PFreq[43][97] = 377; - Big5PFreq[13][131] = 376; - Big5PFreq[46][107] = 375; - Big5PFreq[60][64] = 374; - Big5PFreq[36][179] = 373; - Big5PFreq[37][55] = 372; - Big5PFreq[41][173] = 371; - Big5PFreq[44][172] = 370; - Big5PFreq[23][187] = 369; - Big5PFreq[36][149] = 368; - Big5PFreq[17][125] = 367; - Big5PFreq[55][180] = 366; - Big5PFreq[51][129] = 365; - Big5PFreq[36][51] = 364; - Big5PFreq[37][122] = 363; - Big5PFreq[48][32] = 362; - Big5PFreq[51][99] = 361; - Big5PFreq[54][16] = 360; - Big5PFreq[41][183] = 359; - Big5PFreq[37][179] = 358; - Big5PFreq[38][179] = 357; - Big5PFreq[35][143] = 356; - Big5PFreq[37][24] = 355; - Big5PFreq[40][177] = 354; - Big5PFreq[47][117] = 353; - Big5PFreq[39][52] = 352; - Big5PFreq[22][99] = 351; - Big5PFreq[40][142] = 350; - Big5PFreq[36][49] = 349; - Big5PFreq[38][17] = 348; - Big5PFreq[39][188] = 347; - Big5PFreq[36][186] = 346; - Big5PFreq[35][189] = 345; - Big5PFreq[41][7] = 344; - Big5PFreq[18][91] = 343; - Big5PFreq[43][137] = 342; - Big5PFreq[35][142] = 341; - Big5PFreq[35][117] = 340; - Big5PFreq[39][138] = 339; - Big5PFreq[16][59] = 338; - Big5PFreq[39][174] = 337; - Big5PFreq[55][145] = 336; - Big5PFreq[37][21] = 335; - Big5PFreq[36][180] = 334; - Big5PFreq[37][156] = 333; - Big5PFreq[49][13] = 332; - Big5PFreq[41][107] = 331; - Big5PFreq[36][56] = 330; - Big5PFreq[53][8] = 329; - Big5PFreq[22][114] = 328; - Big5PFreq[5][95] = 327; - Big5PFreq[37][0] = 326; - Big5PFreq[26][183] = 325; - Big5PFreq[22][66] = 324; - Big5PFreq[35][58] = 323; - Big5PFreq[48][117] = 322; - Big5PFreq[36][102] = 321; - Big5PFreq[22][122] = 320; - Big5PFreq[35][11] = 319; - Big5PFreq[46][19] = 318; - Big5PFreq[22][49] = 317; - Big5PFreq[48][166] = 316; - Big5PFreq[41][125] = 315; - Big5PFreq[41][1] = 314; - Big5PFreq[35][178] = 313; - Big5PFreq[41][12] = 312; - Big5PFreq[26][167] = 311; - Big5PFreq[42][152] = 310; - Big5PFreq[42][46] = 309; - Big5PFreq[42][151] = 308; - Big5PFreq[20][135] = 307; - Big5PFreq[37][162] = 306; - Big5PFreq[37][50] = 305; - Big5PFreq[22][185] = 304; - Big5PFreq[36][166] = 303; - Big5PFreq[19][40] = 302; - Big5PFreq[22][107] = 301; - Big5PFreq[22][102] = 300; - Big5PFreq[57][162] = 299; - Big5PFreq[22][124] = 298; - Big5PFreq[37][138] = 297; - Big5PFreq[37][25] = 296; - Big5PFreq[0][69] = 295; - Big5PFreq[43][172] = 294; - Big5PFreq[42][167] = 293; - Big5PFreq[35][120] = 292; - Big5PFreq[41][128] = 291; - Big5PFreq[2][88] = 290; - Big5PFreq[20][123] = 289; - Big5PFreq[35][123] = 288; - Big5PFreq[36][28] = 287; - Big5PFreq[42][188] = 286; - Big5PFreq[42][164] = 285; - Big5PFreq[42][4] = 284; - Big5PFreq[43][57] = 283; - Big5PFreq[39][3] = 282; - Big5PFreq[42][3] = 281; - Big5PFreq[57][158] = 280; - Big5PFreq[35][146] = 279; - Big5PFreq[24][54] = 278; - Big5PFreq[13][110] = 277; - Big5PFreq[23][132] = 276; - Big5PFreq[26][102] = 275; - Big5PFreq[55][178] = 274; - Big5PFreq[17][117] = 273; - Big5PFreq[41][161] = 272; - Big5PFreq[38][150] = 271; - Big5PFreq[10][71] = 270; - Big5PFreq[47][60] = 269; - Big5PFreq[16][114] = 268; - Big5PFreq[21][47] = 267; - Big5PFreq[39][101] = 266; - Big5PFreq[18][45] = 265; - Big5PFreq[40][121] = 264; - Big5PFreq[45][41] = 263; - Big5PFreq[22][167] = 262; - Big5PFreq[26][149] = 261; - Big5PFreq[15][189] = 260; - Big5PFreq[41][177] = 259; - Big5PFreq[46][36] = 258; - Big5PFreq[20][40] = 257; - Big5PFreq[41][54] = 256; - Big5PFreq[3][87] = 255; - Big5PFreq[40][16] = 254; - Big5PFreq[42][15] = 253; - Big5PFreq[11][83] = 252; - Big5PFreq[0][94] = 251; - Big5PFreq[122][81] = 250; - Big5PFreq[41][26] = 249; - Big5PFreq[36][34] = 248; - Big5PFreq[44][148] = 247; - Big5PFreq[35][3] = 246; - Big5PFreq[36][114] = 245; - Big5PFreq[42][112] = 244; - Big5PFreq[35][183] = 243; - Big5PFreq[49][73] = 242; - Big5PFreq[39][2] = 241; - Big5PFreq[38][121] = 240; - Big5PFreq[44][114] = 239; - Big5PFreq[49][32] = 238; - Big5PFreq[1][65] = 237; - Big5PFreq[38][25] = 236; - Big5PFreq[39][4] = 235; - Big5PFreq[42][62] = 234; - Big5PFreq[35][40] = 233; - Big5PFreq[24][2] = 232; - Big5PFreq[53][49] = 231; - Big5PFreq[41][133] = 230; - Big5PFreq[43][134] = 229; - Big5PFreq[3][83] = 228; - Big5PFreq[38][158] = 227; - Big5PFreq[24][17] = 226; - Big5PFreq[52][59] = 225; - Big5PFreq[38][41] = 224; - Big5PFreq[37][127] = 223; - Big5PFreq[22][175] = 222; - Big5PFreq[44][30] = 221; - Big5PFreq[47][178] = 220; - Big5PFreq[43][99] = 219; - Big5PFreq[19][4] = 218; - Big5PFreq[37][97] = 217; - Big5PFreq[38][181] = 216; - Big5PFreq[45][103] = 215; - Big5PFreq[1][86] = 214; - Big5PFreq[40][15] = 213; - Big5PFreq[22][136] = 212; - Big5PFreq[75][165] = 211; - Big5PFreq[36][15] = 210; - Big5PFreq[46][80] = 209; - Big5PFreq[59][55] = 208; - Big5PFreq[37][108] = 207; - Big5PFreq[21][109] = 206; - Big5PFreq[24][165] = 205; - Big5PFreq[79][158] = 204; - Big5PFreq[44][139] = 203; - Big5PFreq[36][124] = 202; - Big5PFreq[42][185] = 201; - Big5PFreq[39][186] = 200; - Big5PFreq[22][128] = 199; - Big5PFreq[40][44] = 198; - Big5PFreq[41][105] = 197; - Big5PFreq[1][70] = 196; - Big5PFreq[1][68] = 195; - Big5PFreq[53][22] = 194; - Big5PFreq[36][54] = 193; - Big5PFreq[47][147] = 192; - Big5PFreq[35][36] = 191; - Big5PFreq[35][185] = 190; - Big5PFreq[45][37] = 189; - Big5PFreq[43][163] = 188; - Big5PFreq[56][115] = 187; - Big5PFreq[38][164] = 186; - Big5PFreq[35][141] = 185; - Big5PFreq[42][132] = 184; - Big5PFreq[46][120] = 183; - Big5PFreq[69][142] = 182; - Big5PFreq[38][175] = 181; - Big5PFreq[22][112] = 180; - Big5PFreq[38][142] = 179; - Big5PFreq[40][37] = 178; - Big5PFreq[37][109] = 177; - Big5PFreq[40][144] = 176; - Big5PFreq[44][117] = 175; - Big5PFreq[35][181] = 174; - Big5PFreq[26][105] = 173; - Big5PFreq[16][48] = 172; - Big5PFreq[44][122] = 171; - Big5PFreq[12][86] = 170; - Big5PFreq[84][53] = 169; - Big5PFreq[17][44] = 168; - Big5PFreq[59][54] = 167; - Big5PFreq[36][98] = 166; - Big5PFreq[45][115] = 165; - Big5PFreq[73][9] = 164; - Big5PFreq[44][123] = 163; - Big5PFreq[37][188] = 162; - Big5PFreq[51][117] = 161; - Big5PFreq[15][156] = 160; - Big5PFreq[36][155] = 159; - Big5PFreq[44][25] = 158; - Big5PFreq[38][12] = 157; - Big5PFreq[38][140] = 156; - Big5PFreq[23][4] = 155; - Big5PFreq[45][149] = 154; - Big5PFreq[22][189] = 153; - Big5PFreq[38][147] = 152; - Big5PFreq[27][5] = 151; - Big5PFreq[22][42] = 150; - Big5PFreq[3][68] = 149; - Big5PFreq[39][51] = 148; - Big5PFreq[36][29] = 147; - Big5PFreq[20][108] = 146; - Big5PFreq[50][57] = 145; - Big5PFreq[55][104] = 144; - Big5PFreq[22][46] = 143; - Big5PFreq[18][164] = 142; - Big5PFreq[50][159] = 141; - Big5PFreq[85][131] = 140; - Big5PFreq[26][79] = 139; - Big5PFreq[38][100] = 138; - Big5PFreq[53][112] = 137; - Big5PFreq[20][190] = 136; - Big5PFreq[14][69] = 135; - Big5PFreq[23][11] = 134; - Big5PFreq[40][114] = 133; - Big5PFreq[40][148] = 132; - Big5PFreq[53][130] = 131; - Big5PFreq[36][2] = 130; - Big5PFreq[66][82] = 129; - Big5PFreq[45][166] = 128; - Big5PFreq[4][88] = 127; - Big5PFreq[16][57] = 126; - Big5PFreq[22][116] = 125; - Big5PFreq[36][108] = 124; - Big5PFreq[13][48] = 123; - Big5PFreq[54][12] = 122; - Big5PFreq[40][136] = 121; - Big5PFreq[36][128] = 120; - Big5PFreq[23][6] = 119; - Big5PFreq[38][125] = 118; - Big5PFreq[45][154] = 117; - Big5PFreq[51][127] = 116; - Big5PFreq[44][163] = 115; - Big5PFreq[16][173] = 114; - Big5PFreq[43][49] = 113; - Big5PFreq[20][112] = 112; - Big5PFreq[15][168] = 111; - Big5PFreq[35][129] = 110; - Big5PFreq[20][45] = 109; - Big5PFreq[38][10] = 108; - Big5PFreq[57][171] = 107; - Big5PFreq[44][190] = 106; - Big5PFreq[40][56] = 105; - Big5PFreq[36][156] = 104; - Big5PFreq[3][88] = 103; - Big5PFreq[50][122] = 102; - Big5PFreq[36][7] = 101; - Big5PFreq[39][43] = 100; - Big5PFreq[15][166] = 99; - Big5PFreq[42][136] = 98; - Big5PFreq[22][131] = 97; - Big5PFreq[44][23] = 96; - Big5PFreq[54][147] = 95; - Big5PFreq[41][32] = 94; - Big5PFreq[23][121] = 93; - Big5PFreq[39][108] = 92; - Big5PFreq[2][78] = 91; - Big5PFreq[40][155] = 90; - Big5PFreq[55][51] = 89; - Big5PFreq[19][34] = 88; - Big5PFreq[48][128] = 87; - Big5PFreq[48][159] = 86; - Big5PFreq[20][70] = 85; - Big5PFreq[34][71] = 84; - Big5PFreq[16][31] = 83; - Big5PFreq[42][157] = 82; - Big5PFreq[20][44] = 81; - Big5PFreq[11][92] = 80; - Big5PFreq[44][180] = 79; - Big5PFreq[84][33] = 78; - Big5PFreq[16][116] = 77; - Big5PFreq[61][163] = 76; - Big5PFreq[35][164] = 75; - Big5PFreq[36][42] = 74; - Big5PFreq[13][40] = 73; - Big5PFreq[43][176] = 72; - Big5PFreq[2][66] = 71; - Big5PFreq[20][133] = 70; - Big5PFreq[36][65] = 69; - Big5PFreq[38][33] = 68; - Big5PFreq[12][91] = 67; - Big5PFreq[36][26] = 66; - Big5PFreq[15][174] = 65; - Big5PFreq[77][32] = 64; - Big5PFreq[16][1] = 63; - Big5PFreq[25][86] = 62; - Big5PFreq[17][13] = 61; - Big5PFreq[5][75] = 60; - Big5PFreq[36][52] = 59; - Big5PFreq[51][164] = 58; - Big5PFreq[12][85] = 57; - Big5PFreq[39][168] = 56; - Big5PFreq[43][16] = 55; - Big5PFreq[40][69] = 54; - Big5PFreq[26][108] = 53; - Big5PFreq[51][56] = 52; - Big5PFreq[16][37] = 51; - Big5PFreq[40][29] = 50; - Big5PFreq[46][171] = 49; - Big5PFreq[40][128] = 48; - Big5PFreq[72][114] = 47; - Big5PFreq[21][103] = 46; - Big5PFreq[22][44] = 45; - Big5PFreq[40][115] = 44; - Big5PFreq[43][7] = 43; - Big5PFreq[43][153] = 42; - Big5PFreq[17][20] = 41; - Big5PFreq[16][49] = 40; - Big5PFreq[36][57] = 39; - Big5PFreq[18][38] = 38; - Big5PFreq[45][184] = 37; - Big5PFreq[37][167] = 36; - Big5PFreq[26][106] = 35; - Big5PFreq[61][121] = 34; - Big5PFreq[89][140] = 33; - Big5PFreq[46][61] = 32; - Big5PFreq[39][163] = 31; - Big5PFreq[40][62] = 30; - Big5PFreq[38][165] = 29; - Big5PFreq[47][37] = 28; - Big5PFreq[18][155] = 27; - Big5PFreq[20][33] = 26; - Big5PFreq[29][90] = 25; - Big5PFreq[20][103] = 24; - Big5PFreq[37][51] = 23; - Big5PFreq[57][0] = 22; - Big5PFreq[40][31] = 21; - Big5PFreq[45][32] = 20; - Big5PFreq[59][23] = 19; - Big5PFreq[18][47] = 18; - Big5PFreq[45][134] = 17; - Big5PFreq[37][59] = 16; - Big5PFreq[21][128] = 15; - Big5PFreq[36][106] = 14; - Big5PFreq[31][39] = 13; - Big5PFreq[40][182] = 12; - Big5PFreq[52][155] = 11; - Big5PFreq[42][166] = 10; - Big5PFreq[35][27] = 9; - Big5PFreq[38][3] = 8; - Big5PFreq[13][44] = 7; - Big5PFreq[58][157] = 6; - Big5PFreq[47][51] = 5; - Big5PFreq[41][37] = 4; - Big5PFreq[41][172] = 3; - Big5PFreq[51][165] = 2; - Big5PFreq[15][161] = 1; - Big5PFreq[24][181] = 0; - EUC_TWFreq[48][49] = 599; - EUC_TWFreq[35][65] = 598; - EUC_TWFreq[41][27] = 597; - EUC_TWFreq[35][0] = 596; - EUC_TWFreq[39][19] = 595; - EUC_TWFreq[35][42] = 594; - EUC_TWFreq[38][66] = 593; - EUC_TWFreq[35][8] = 592; - EUC_TWFreq[35][6] = 591; - EUC_TWFreq[35][66] = 590; - EUC_TWFreq[43][14] = 589; - EUC_TWFreq[69][80] = 588; - EUC_TWFreq[50][48] = 587; - EUC_TWFreq[36][71] = 586; - EUC_TWFreq[37][10] = 585; - EUC_TWFreq[60][52] = 584; - EUC_TWFreq[51][21] = 583; - EUC_TWFreq[40][2] = 582; - EUC_TWFreq[67][35] = 581; - EUC_TWFreq[38][78] = 580; - EUC_TWFreq[49][18] = 579; - EUC_TWFreq[35][23] = 578; - EUC_TWFreq[42][83] = 577; - EUC_TWFreq[79][47] = 576; - EUC_TWFreq[61][82] = 575; - EUC_TWFreq[38][7] = 574; - EUC_TWFreq[35][29] = 573; - EUC_TWFreq[37][77] = 572; - EUC_TWFreq[54][67] = 571; - EUC_TWFreq[38][80] = 570; - EUC_TWFreq[52][74] = 569; - EUC_TWFreq[36][37] = 568; - EUC_TWFreq[74][8] = 567; - EUC_TWFreq[41][83] = 566; - EUC_TWFreq[36][75] = 565; - EUC_TWFreq[49][63] = 564; - EUC_TWFreq[42][58] = 563; - EUC_TWFreq[56][33] = 562; - EUC_TWFreq[37][76] = 561; - EUC_TWFreq[62][39] = 560; - EUC_TWFreq[35][21] = 559; - EUC_TWFreq[70][19] = 558; - EUC_TWFreq[77][88] = 557; - EUC_TWFreq[51][14] = 556; - EUC_TWFreq[36][17] = 555; - EUC_TWFreq[44][51] = 554; - EUC_TWFreq[38][72] = 553; - EUC_TWFreq[74][90] = 552; - EUC_TWFreq[35][48] = 551; - EUC_TWFreq[35][69] = 550; - EUC_TWFreq[66][86] = 549; - EUC_TWFreq[57][20] = 548; - EUC_TWFreq[35][53] = 547; - EUC_TWFreq[36][87] = 546; - EUC_TWFreq[84][67] = 545; - EUC_TWFreq[70][56] = 544; - EUC_TWFreq[71][54] = 543; - EUC_TWFreq[60][70] = 542; - EUC_TWFreq[80][1] = 541; - EUC_TWFreq[39][59] = 540; - EUC_TWFreq[39][51] = 539; - EUC_TWFreq[35][44] = 538; - EUC_TWFreq[48][4] = 537; - EUC_TWFreq[55][24] = 536; - EUC_TWFreq[52][4] = 535; - EUC_TWFreq[54][26] = 534; - EUC_TWFreq[36][31] = 533; - EUC_TWFreq[37][22] = 532; - EUC_TWFreq[37][9] = 531; - EUC_TWFreq[46][0] = 530; - EUC_TWFreq[56][46] = 529; - EUC_TWFreq[47][93] = 528; - EUC_TWFreq[37][25] = 527; - EUC_TWFreq[39][8] = 526; - EUC_TWFreq[46][73] = 525; - EUC_TWFreq[38][48] = 524; - EUC_TWFreq[39][83] = 523; - EUC_TWFreq[60][92] = 522; - EUC_TWFreq[70][11] = 521; - EUC_TWFreq[63][84] = 520; - EUC_TWFreq[38][65] = 519; - EUC_TWFreq[45][45] = 518; - EUC_TWFreq[63][49] = 517; - EUC_TWFreq[63][50] = 516; - EUC_TWFreq[39][93] = 515; - EUC_TWFreq[68][20] = 514; - EUC_TWFreq[44][84] = 513; - EUC_TWFreq[66][34] = 512; - EUC_TWFreq[37][58] = 511; - EUC_TWFreq[39][0] = 510; - EUC_TWFreq[59][1] = 509; - EUC_TWFreq[47][8] = 508; - EUC_TWFreq[61][17] = 507; - EUC_TWFreq[53][87] = 506; - EUC_TWFreq[67][26] = 505; - EUC_TWFreq[43][46] = 504; - EUC_TWFreq[38][61] = 503; - EUC_TWFreq[45][9] = 502; - EUC_TWFreq[66][83] = 501; - EUC_TWFreq[43][88] = 500; - EUC_TWFreq[85][20] = 499; - EUC_TWFreq[57][36] = 498; - EUC_TWFreq[43][6] = 497; - EUC_TWFreq[86][77] = 496; - EUC_TWFreq[42][70] = 495; - EUC_TWFreq[49][78] = 494; - EUC_TWFreq[36][40] = 493; - EUC_TWFreq[42][71] = 492; - EUC_TWFreq[58][49] = 491; - EUC_TWFreq[35][20] = 490; - EUC_TWFreq[76][20] = 489; - EUC_TWFreq[39][25] = 488; - EUC_TWFreq[40][34] = 487; - EUC_TWFreq[39][76] = 486; - EUC_TWFreq[40][1] = 485; - EUC_TWFreq[59][0] = 484; - EUC_TWFreq[39][70] = 483; - EUC_TWFreq[46][14] = 482; - EUC_TWFreq[68][77] = 481; - EUC_TWFreq[38][55] = 480; - EUC_TWFreq[35][78] = 479; - EUC_TWFreq[84][44] = 478; - EUC_TWFreq[36][41] = 477; - EUC_TWFreq[37][62] = 476; - EUC_TWFreq[65][67] = 475; - EUC_TWFreq[69][66] = 474; - EUC_TWFreq[73][55] = 473; - EUC_TWFreq[71][49] = 472; - EUC_TWFreq[66][87] = 471; - EUC_TWFreq[38][33] = 470; - EUC_TWFreq[64][61] = 469; - EUC_TWFreq[35][7] = 468; - EUC_TWFreq[47][49] = 467; - EUC_TWFreq[56][14] = 466; - EUC_TWFreq[36][49] = 465; - EUC_TWFreq[50][81] = 464; - EUC_TWFreq[55][76] = 463; - EUC_TWFreq[35][19] = 462; - EUC_TWFreq[44][47] = 461; - EUC_TWFreq[35][15] = 460; - EUC_TWFreq[82][59] = 459; - EUC_TWFreq[35][43] = 458; - EUC_TWFreq[73][0] = 457; - EUC_TWFreq[57][83] = 456; - EUC_TWFreq[42][46] = 455; - EUC_TWFreq[36][0] = 454; - EUC_TWFreq[70][88] = 453; - EUC_TWFreq[42][22] = 452; - EUC_TWFreq[46][58] = 451; - EUC_TWFreq[36][34] = 450; - EUC_TWFreq[39][24] = 449; - EUC_TWFreq[35][55] = 448; - EUC_TWFreq[44][91] = 447; - EUC_TWFreq[37][51] = 446; - EUC_TWFreq[36][19] = 445; - EUC_TWFreq[69][90] = 444; - EUC_TWFreq[55][35] = 443; - EUC_TWFreq[35][54] = 442; - EUC_TWFreq[49][61] = 441; - EUC_TWFreq[36][67] = 440; - EUC_TWFreq[88][34] = 439; - EUC_TWFreq[35][17] = 438; - EUC_TWFreq[65][69] = 437; - EUC_TWFreq[74][89] = 436; - EUC_TWFreq[37][31] = 435; - EUC_TWFreq[43][48] = 434; - EUC_TWFreq[89][27] = 433; - EUC_TWFreq[42][79] = 432; - EUC_TWFreq[69][57] = 431; - EUC_TWFreq[36][13] = 430; - EUC_TWFreq[35][62] = 429; - EUC_TWFreq[65][47] = 428; - EUC_TWFreq[56][8] = 427; - EUC_TWFreq[38][79] = 426; - EUC_TWFreq[37][64] = 425; - EUC_TWFreq[64][64] = 424; - EUC_TWFreq[38][53] = 423; - EUC_TWFreq[38][31] = 422; - EUC_TWFreq[56][81] = 421; - EUC_TWFreq[36][22] = 420; - EUC_TWFreq[43][4] = 419; - EUC_TWFreq[36][90] = 418; - EUC_TWFreq[38][62] = 417; - EUC_TWFreq[66][85] = 416; - EUC_TWFreq[39][1] = 415; - EUC_TWFreq[59][40] = 414; - EUC_TWFreq[58][93] = 413; - EUC_TWFreq[44][43] = 412; - EUC_TWFreq[39][49] = 411; - EUC_TWFreq[64][2] = 410; - EUC_TWFreq[41][35] = 409; - EUC_TWFreq[60][22] = 408; - EUC_TWFreq[35][91] = 407; - EUC_TWFreq[78][1] = 406; - EUC_TWFreq[36][14] = 405; - EUC_TWFreq[82][29] = 404; - EUC_TWFreq[52][86] = 403; - EUC_TWFreq[40][16] = 402; - EUC_TWFreq[91][52] = 401; - EUC_TWFreq[50][75] = 400; - EUC_TWFreq[64][30] = 399; - EUC_TWFreq[90][78] = 398; - EUC_TWFreq[36][52] = 397; - EUC_TWFreq[55][87] = 396; - EUC_TWFreq[57][5] = 395; - EUC_TWFreq[57][31] = 394; - EUC_TWFreq[42][35] = 393; - EUC_TWFreq[69][50] = 392; - EUC_TWFreq[45][8] = 391; - EUC_TWFreq[50][87] = 390; - EUC_TWFreq[69][55] = 389; - EUC_TWFreq[92][3] = 388; - EUC_TWFreq[36][43] = 387; - EUC_TWFreq[64][10] = 386; - EUC_TWFreq[56][25] = 385; - EUC_TWFreq[60][68] = 384; - EUC_TWFreq[51][46] = 383; - EUC_TWFreq[50][0] = 382; - EUC_TWFreq[38][30] = 381; - EUC_TWFreq[50][85] = 380; - EUC_TWFreq[60][54] = 379; - EUC_TWFreq[73][6] = 378; - EUC_TWFreq[73][28] = 377; - EUC_TWFreq[56][19] = 376; - EUC_TWFreq[62][69] = 375; - EUC_TWFreq[81][66] = 374; - EUC_TWFreq[40][32] = 373; - EUC_TWFreq[76][31] = 372; - EUC_TWFreq[35][10] = 371; - EUC_TWFreq[41][37] = 370; - EUC_TWFreq[52][82] = 369; - EUC_TWFreq[91][72] = 368; - EUC_TWFreq[37][29] = 367; - EUC_TWFreq[56][30] = 366; - EUC_TWFreq[37][80] = 365; - EUC_TWFreq[81][56] = 364; - EUC_TWFreq[70][3] = 363; - EUC_TWFreq[76][15] = 362; - EUC_TWFreq[46][47] = 361; - EUC_TWFreq[35][88] = 360; - EUC_TWFreq[61][58] = 359; - EUC_TWFreq[37][37] = 358; - EUC_TWFreq[57][22] = 357; - EUC_TWFreq[41][23] = 356; - EUC_TWFreq[90][66] = 355; - EUC_TWFreq[39][60] = 354; - EUC_TWFreq[38][0] = 353; - EUC_TWFreq[37][87] = 352; - EUC_TWFreq[46][2] = 351; - EUC_TWFreq[38][56] = 350; - EUC_TWFreq[58][11] = 349; - EUC_TWFreq[48][10] = 348; - EUC_TWFreq[74][4] = 347; - EUC_TWFreq[40][42] = 346; - EUC_TWFreq[41][52] = 345; - EUC_TWFreq[61][92] = 344; - EUC_TWFreq[39][50] = 343; - EUC_TWFreq[47][88] = 342; - EUC_TWFreq[88][36] = 341; - EUC_TWFreq[45][73] = 340; - EUC_TWFreq[82][3] = 339; - EUC_TWFreq[61][36] = 338; - EUC_TWFreq[60][33] = 337; - EUC_TWFreq[38][27] = 336; - EUC_TWFreq[35][83] = 335; - EUC_TWFreq[65][24] = 334; - EUC_TWFreq[73][10] = 333; - EUC_TWFreq[41][13] = 332; - EUC_TWFreq[50][27] = 331; - EUC_TWFreq[59][50] = 330; - EUC_TWFreq[42][45] = 329; - EUC_TWFreq[55][19] = 328; - EUC_TWFreq[36][77] = 327; - EUC_TWFreq[69][31] = 326; - EUC_TWFreq[60][7] = 325; - EUC_TWFreq[40][88] = 324; - EUC_TWFreq[57][56] = 323; - EUC_TWFreq[50][50] = 322; - EUC_TWFreq[42][37] = 321; - EUC_TWFreq[38][82] = 320; - EUC_TWFreq[52][25] = 319; - EUC_TWFreq[42][67] = 318; - EUC_TWFreq[48][40] = 317; - EUC_TWFreq[45][81] = 316; - EUC_TWFreq[57][14] = 315; - EUC_TWFreq[42][13] = 314; - EUC_TWFreq[78][0] = 313; - EUC_TWFreq[35][51] = 312; - EUC_TWFreq[41][67] = 311; - EUC_TWFreq[64][23] = 310; - EUC_TWFreq[36][65] = 309; - EUC_TWFreq[48][50] = 308; - EUC_TWFreq[46][69] = 307; - EUC_TWFreq[47][89] = 306; - EUC_TWFreq[41][48] = 305; - EUC_TWFreq[60][56] = 304; - EUC_TWFreq[44][82] = 303; - EUC_TWFreq[47][35] = 302; - EUC_TWFreq[49][3] = 301; - EUC_TWFreq[49][69] = 300; - EUC_TWFreq[45][93] = 299; - EUC_TWFreq[60][34] = 298; - EUC_TWFreq[60][82] = 297; - EUC_TWFreq[61][61] = 296; - EUC_TWFreq[86][42] = 295; - EUC_TWFreq[89][60] = 294; - EUC_TWFreq[48][31] = 293; - EUC_TWFreq[35][75] = 292; - EUC_TWFreq[91][39] = 291; - EUC_TWFreq[53][19] = 290; - EUC_TWFreq[39][72] = 289; - EUC_TWFreq[69][59] = 288; - EUC_TWFreq[41][7] = 287; - EUC_TWFreq[54][13] = 286; - EUC_TWFreq[43][28] = 285; - EUC_TWFreq[36][6] = 284; - EUC_TWFreq[45][75] = 283; - EUC_TWFreq[36][61] = 282; - EUC_TWFreq[38][21] = 281; - EUC_TWFreq[45][14] = 280; - EUC_TWFreq[61][43] = 279; - EUC_TWFreq[36][63] = 278; - EUC_TWFreq[43][30] = 277; - EUC_TWFreq[46][51] = 276; - EUC_TWFreq[68][87] = 275; - EUC_TWFreq[39][26] = 274; - EUC_TWFreq[46][76] = 273; - EUC_TWFreq[36][15] = 272; - EUC_TWFreq[35][40] = 271; - EUC_TWFreq[79][60] = 270; - EUC_TWFreq[46][7] = 269; - EUC_TWFreq[65][72] = 268; - EUC_TWFreq[69][88] = 267; - EUC_TWFreq[47][18] = 266; - EUC_TWFreq[37][0] = 265; - EUC_TWFreq[37][49] = 264; - EUC_TWFreq[67][37] = 263; - EUC_TWFreq[36][91] = 262; - EUC_TWFreq[75][48] = 261; - EUC_TWFreq[75][63] = 260; - EUC_TWFreq[83][87] = 259; - EUC_TWFreq[37][44] = 258; - EUC_TWFreq[73][54] = 257; - EUC_TWFreq[51][61] = 256; - EUC_TWFreq[46][57] = 255; - EUC_TWFreq[55][21] = 254; - EUC_TWFreq[39][66] = 253; - EUC_TWFreq[47][11] = 252; - EUC_TWFreq[52][8] = 251; - EUC_TWFreq[82][81] = 250; - EUC_TWFreq[36][57] = 249; - EUC_TWFreq[38][54] = 248; - EUC_TWFreq[43][81] = 247; - EUC_TWFreq[37][42] = 246; - EUC_TWFreq[40][18] = 245; - EUC_TWFreq[80][90] = 244; - EUC_TWFreq[37][84] = 243; - EUC_TWFreq[57][15] = 242; - EUC_TWFreq[38][87] = 241; - EUC_TWFreq[37][32] = 240; - EUC_TWFreq[53][53] = 239; - EUC_TWFreq[89][29] = 238; - EUC_TWFreq[81][53] = 237; - EUC_TWFreq[75][3] = 236; - EUC_TWFreq[83][73] = 235; - EUC_TWFreq[66][13] = 234; - EUC_TWFreq[48][7] = 233; - EUC_TWFreq[46][35] = 232; - EUC_TWFreq[35][86] = 231; - EUC_TWFreq[37][20] = 230; - EUC_TWFreq[46][80] = 229; - EUC_TWFreq[38][24] = 228; - EUC_TWFreq[41][68] = 227; - EUC_TWFreq[42][21] = 226; - EUC_TWFreq[43][32] = 225; - EUC_TWFreq[38][20] = 224; - EUC_TWFreq[37][59] = 223; - EUC_TWFreq[41][77] = 222; - EUC_TWFreq[59][57] = 221; - EUC_TWFreq[68][59] = 220; - EUC_TWFreq[39][43] = 219; - EUC_TWFreq[54][39] = 218; - EUC_TWFreq[48][28] = 217; - EUC_TWFreq[54][28] = 216; - EUC_TWFreq[41][44] = 215; - EUC_TWFreq[51][64] = 214; - EUC_TWFreq[47][72] = 213; - EUC_TWFreq[62][67] = 212; - EUC_TWFreq[42][43] = 211; - EUC_TWFreq[61][38] = 210; - EUC_TWFreq[76][25] = 209; - EUC_TWFreq[48][91] = 208; - EUC_TWFreq[36][36] = 207; - EUC_TWFreq[80][32] = 206; - EUC_TWFreq[81][40] = 205; - EUC_TWFreq[37][5] = 204; - EUC_TWFreq[74][69] = 203; - EUC_TWFreq[36][82] = 202; - EUC_TWFreq[46][59] = 201; - - GBKFreq[52][132] = 600; - GBKFreq[73][135] = 599; - GBKFreq[49][123] = 598; - GBKFreq[77][146] = 597; - GBKFreq[81][123] = 596; - GBKFreq[82][144] = 595; - GBKFreq[51][179] = 594; - GBKFreq[83][154] = 593; - GBKFreq[71][139] = 592; - GBKFreq[64][139] = 591; - GBKFreq[85][144] = 590; - GBKFreq[52][125] = 589; - GBKFreq[88][25] = 588; - GBKFreq[81][106] = 587; - GBKFreq[81][148] = 586; - GBKFreq[62][137] = 585; - GBKFreq[94][0] = 584; - GBKFreq[1][64] = 583; - GBKFreq[67][163] = 582; - GBKFreq[20][190] = 581; - GBKFreq[57][131] = 580; - GBKFreq[29][169] = 579; - GBKFreq[72][143] = 578; - GBKFreq[0][173] = 577; - GBKFreq[11][23] = 576; - GBKFreq[61][141] = 575; - GBKFreq[60][123] = 574; - GBKFreq[81][114] = 573; - GBKFreq[82][131] = 572; - GBKFreq[67][156] = 571; - GBKFreq[71][167] = 570; - GBKFreq[20][50] = 569; - GBKFreq[77][132] = 568; - GBKFreq[84][38] = 567; - GBKFreq[26][29] = 566; - GBKFreq[74][187] = 565; - GBKFreq[62][116] = 564; - GBKFreq[67][135] = 563; - GBKFreq[5][86] = 562; - GBKFreq[72][186] = 561; - GBKFreq[75][161] = 560; - GBKFreq[78][130] = 559; - GBKFreq[94][30] = 558; - GBKFreq[84][72] = 557; - GBKFreq[1][67] = 556; - GBKFreq[75][172] = 555; - GBKFreq[74][185] = 554; - GBKFreq[53][160] = 553; - GBKFreq[123][14] = 552; - GBKFreq[79][97] = 551; - GBKFreq[85][110] = 550; - GBKFreq[78][171] = 549; - GBKFreq[52][131] = 548; - GBKFreq[56][100] = 547; - GBKFreq[50][182] = 546; - GBKFreq[94][64] = 545; - GBKFreq[106][74] = 544; - GBKFreq[11][102] = 543; - GBKFreq[53][124] = 542; - GBKFreq[24][3] = 541; - GBKFreq[86][148] = 540; - GBKFreq[53][184] = 539; - GBKFreq[86][147] = 538; - GBKFreq[96][161] = 537; - GBKFreq[82][77] = 536; - GBKFreq[59][146] = 535; - GBKFreq[84][126] = 534; - GBKFreq[79][132] = 533; - GBKFreq[85][123] = 532; - GBKFreq[71][101] = 531; - GBKFreq[85][106] = 530; - GBKFreq[6][184] = 529; - GBKFreq[57][156] = 528; - GBKFreq[75][104] = 527; - GBKFreq[50][137] = 526; - GBKFreq[79][133] = 525; - GBKFreq[76][108] = 524; - GBKFreq[57][142] = 523; - GBKFreq[84][130] = 522; - GBKFreq[52][128] = 521; - GBKFreq[47][44] = 520; - GBKFreq[52][152] = 519; - GBKFreq[54][104] = 518; - GBKFreq[30][47] = 517; - GBKFreq[71][123] = 516; - GBKFreq[52][107] = 515; - GBKFreq[45][84] = 514; - GBKFreq[107][118] = 513; - GBKFreq[5][161] = 512; - GBKFreq[48][126] = 511; - GBKFreq[67][170] = 510; - GBKFreq[43][6] = 509; - GBKFreq[70][112] = 508; - GBKFreq[86][174] = 507; - GBKFreq[84][166] = 506; - GBKFreq[79][130] = 505; - GBKFreq[57][141] = 504; - GBKFreq[81][178] = 503; - GBKFreq[56][187] = 502; - GBKFreq[81][162] = 501; - GBKFreq[53][104] = 500; - GBKFreq[123][35] = 499; - GBKFreq[70][169] = 498; - GBKFreq[69][164] = 497; - GBKFreq[109][61] = 496; - GBKFreq[73][130] = 495; - GBKFreq[62][134] = 494; - GBKFreq[54][125] = 493; - GBKFreq[79][105] = 492; - GBKFreq[70][165] = 491; - GBKFreq[71][189] = 490; - GBKFreq[23][147] = 489; - GBKFreq[51][139] = 488; - GBKFreq[47][137] = 487; - GBKFreq[77][123] = 486; - GBKFreq[86][183] = 485; - GBKFreq[63][173] = 484; - GBKFreq[79][144] = 483; - GBKFreq[84][159] = 482; - GBKFreq[60][91] = 481; - GBKFreq[66][187] = 480; - GBKFreq[73][114] = 479; - GBKFreq[85][56] = 478; - GBKFreq[71][149] = 477; - GBKFreq[84][189] = 476; - GBKFreq[104][31] = 475; - GBKFreq[83][82] = 474; - GBKFreq[68][35] = 473; - GBKFreq[11][77] = 472; - GBKFreq[15][155] = 471; - GBKFreq[83][153] = 470; - GBKFreq[71][1] = 469; - GBKFreq[53][190] = 468; - GBKFreq[50][135] = 467; - GBKFreq[3][147] = 466; - GBKFreq[48][136] = 465; - GBKFreq[66][166] = 464; - GBKFreq[55][159] = 463; - GBKFreq[82][150] = 462; - GBKFreq[58][178] = 461; - GBKFreq[64][102] = 460; - GBKFreq[16][106] = 459; - GBKFreq[68][110] = 458; - GBKFreq[54][14] = 457; - GBKFreq[60][140] = 456; - GBKFreq[91][71] = 455; - GBKFreq[54][150] = 454; - GBKFreq[78][177] = 453; - GBKFreq[78][117] = 452; - GBKFreq[104][12] = 451; - GBKFreq[73][150] = 450; - GBKFreq[51][142] = 449; - GBKFreq[81][145] = 448; - GBKFreq[66][183] = 447; - GBKFreq[51][178] = 446; - GBKFreq[75][107] = 445; - GBKFreq[65][119] = 444; - GBKFreq[69][176] = 443; - GBKFreq[59][122] = 442; - GBKFreq[78][160] = 441; - GBKFreq[85][183] = 440; - GBKFreq[105][16] = 439; - GBKFreq[73][110] = 438; - GBKFreq[104][39] = 437; - GBKFreq[119][16] = 436; - GBKFreq[76][162] = 435; - GBKFreq[67][152] = 434; - GBKFreq[82][24] = 433; - GBKFreq[73][121] = 432; - GBKFreq[83][83] = 431; - GBKFreq[82][145] = 430; - GBKFreq[49][133] = 429; - GBKFreq[94][13] = 428; - GBKFreq[58][139] = 427; - GBKFreq[74][189] = 426; - GBKFreq[66][177] = 425; - GBKFreq[85][184] = 424; - GBKFreq[55][183] = 423; - GBKFreq[71][107] = 422; - GBKFreq[11][98] = 421; - GBKFreq[72][153] = 420; - GBKFreq[2][137] = 419; - GBKFreq[59][147] = 418; - GBKFreq[58][152] = 417; - GBKFreq[55][144] = 416; - GBKFreq[73][125] = 415; - GBKFreq[52][154] = 414; - GBKFreq[70][178] = 413; - GBKFreq[79][148] = 412; - GBKFreq[63][143] = 411; - GBKFreq[50][140] = 410; - GBKFreq[47][145] = 409; - GBKFreq[48][123] = 408; - GBKFreq[56][107] = 407; - GBKFreq[84][83] = 406; - GBKFreq[59][112] = 405; - GBKFreq[124][72] = 404; - GBKFreq[79][99] = 403; - GBKFreq[3][37] = 402; - GBKFreq[114][55] = 401; - GBKFreq[85][152] = 400; - GBKFreq[60][47] = 399; - GBKFreq[65][96] = 398; - GBKFreq[74][110] = 397; - GBKFreq[86][182] = 396; - GBKFreq[50][99] = 395; - GBKFreq[67][186] = 394; - GBKFreq[81][74] = 393; - GBKFreq[80][37] = 392; - GBKFreq[21][60] = 391; - GBKFreq[110][12] = 390; - GBKFreq[60][162] = 389; - GBKFreq[29][115] = 388; - GBKFreq[83][130] = 387; - GBKFreq[52][136] = 386; - GBKFreq[63][114] = 385; - GBKFreq[49][127] = 384; - GBKFreq[83][109] = 383; - GBKFreq[66][128] = 382; - GBKFreq[78][136] = 381; - GBKFreq[81][180] = 380; - GBKFreq[76][104] = 379; - GBKFreq[56][156] = 378; - GBKFreq[61][23] = 377; - GBKFreq[4][30] = 376; - GBKFreq[69][154] = 375; - GBKFreq[100][37] = 374; - GBKFreq[54][177] = 373; - GBKFreq[23][119] = 372; - GBKFreq[71][171] = 371; - GBKFreq[84][146] = 370; - GBKFreq[20][184] = 369; - GBKFreq[86][76] = 368; - GBKFreq[74][132] = 367; - GBKFreq[47][97] = 366; - GBKFreq[82][137] = 365; - GBKFreq[94][56] = 364; - GBKFreq[92][30] = 363; - GBKFreq[19][117] = 362; - GBKFreq[48][173] = 361; - GBKFreq[2][136] = 360; - GBKFreq[7][182] = 359; - GBKFreq[74][188] = 358; - GBKFreq[14][132] = 357; - GBKFreq[62][172] = 356; - GBKFreq[25][39] = 355; - GBKFreq[85][129] = 354; - GBKFreq[64][98] = 353; - GBKFreq[67][127] = 352; - GBKFreq[72][167] = 351; - GBKFreq[57][143] = 350; - GBKFreq[76][187] = 349; - GBKFreq[83][181] = 348; - GBKFreq[84][10] = 347; - GBKFreq[55][166] = 346; - GBKFreq[55][188] = 345; - GBKFreq[13][151] = 344; - GBKFreq[62][124] = 343; - GBKFreq[53][136] = 342; - GBKFreq[106][57] = 341; - GBKFreq[47][166] = 340; - GBKFreq[109][30] = 339; - GBKFreq[78][114] = 338; - GBKFreq[83][19] = 337; - GBKFreq[56][162] = 336; - GBKFreq[60][177] = 335; - GBKFreq[88][9] = 334; - GBKFreq[74][163] = 333; - GBKFreq[52][156] = 332; - GBKFreq[71][180] = 331; - GBKFreq[60][57] = 330; - GBKFreq[72][173] = 329; - GBKFreq[82][91] = 328; - GBKFreq[51][186] = 327; - GBKFreq[75][86] = 326; - GBKFreq[75][78] = 325; - GBKFreq[76][170] = 324; - GBKFreq[60][147] = 323; - GBKFreq[82][75] = 322; - GBKFreq[80][148] = 321; - GBKFreq[86][150] = 320; - GBKFreq[13][95] = 319; - GBKFreq[0][11] = 318; - GBKFreq[84][190] = 317; - GBKFreq[76][166] = 316; - GBKFreq[14][72] = 315; - GBKFreq[67][144] = 314; - GBKFreq[84][44] = 313; - GBKFreq[72][125] = 312; - GBKFreq[66][127] = 311; - GBKFreq[60][25] = 310; - GBKFreq[70][146] = 309; - GBKFreq[79][135] = 308; - GBKFreq[54][135] = 307; - GBKFreq[60][104] = 306; - GBKFreq[55][132] = 305; - GBKFreq[94][2] = 304; - GBKFreq[54][133] = 303; - GBKFreq[56][190] = 302; - GBKFreq[58][174] = 301; - GBKFreq[80][144] = 300; - GBKFreq[85][113] = 299; - - KRFreq[31][43] = 600; - KRFreq[19][56] = 599; - KRFreq[38][46] = 598; - KRFreq[3][3] = 597; - KRFreq[29][77] = 596; - KRFreq[19][33] = 595; - KRFreq[30][0] = 594; - KRFreq[29][89] = 593; - KRFreq[31][26] = 592; - KRFreq[31][38] = 591; - KRFreq[32][85] = 590; - KRFreq[15][0] = 589; - KRFreq[16][54] = 588; - KRFreq[15][76] = 587; - KRFreq[31][25] = 586; - KRFreq[23][13] = 585; - KRFreq[28][34] = 584; - KRFreq[18][9] = 583; - KRFreq[29][37] = 582; - KRFreq[22][45] = 581; - KRFreq[19][46] = 580; - KRFreq[16][65] = 579; - KRFreq[23][5] = 578; - KRFreq[26][70] = 577; - KRFreq[31][53] = 576; - KRFreq[27][12] = 575; - KRFreq[30][67] = 574; - KRFreq[31][57] = 573; - KRFreq[20][20] = 572; - KRFreq[30][31] = 571; - KRFreq[20][72] = 570; - KRFreq[15][51] = 569; - KRFreq[3][8] = 568; - KRFreq[32][53] = 567; - KRFreq[27][85] = 566; - KRFreq[25][23] = 565; - KRFreq[15][44] = 564; - KRFreq[32][3] = 563; - KRFreq[31][68] = 562; - KRFreq[30][24] = 561; - KRFreq[29][49] = 560; - KRFreq[27][49] = 559; - KRFreq[23][23] = 558; - KRFreq[31][91] = 557; - KRFreq[31][46] = 556; - KRFreq[19][74] = 555; - KRFreq[27][27] = 554; - KRFreq[3][17] = 553; - KRFreq[20][38] = 552; - KRFreq[21][82] = 551; - KRFreq[28][25] = 550; - KRFreq[32][5] = 549; - KRFreq[31][23] = 548; - KRFreq[25][45] = 547; - KRFreq[32][87] = 546; - KRFreq[18][26] = 545; - KRFreq[24][10] = 544; - KRFreq[26][82] = 543; - KRFreq[15][89] = 542; - KRFreq[28][36] = 541; - KRFreq[28][31] = 540; - KRFreq[16][23] = 539; - KRFreq[16][77] = 538; - KRFreq[19][84] = 537; - KRFreq[23][72] = 536; - KRFreq[38][48] = 535; - KRFreq[23][2] = 534; - KRFreq[30][20] = 533; - KRFreq[38][47] = 532; - KRFreq[39][12] = 531; - KRFreq[23][21] = 530; - KRFreq[18][17] = 529; - KRFreq[30][87] = 528; - KRFreq[29][62] = 527; - KRFreq[29][87] = 526; - KRFreq[34][53] = 525; - KRFreq[32][29] = 524; - KRFreq[35][0] = 523; - KRFreq[24][43] = 522; - KRFreq[36][44] = 521; - KRFreq[20][30] = 520; - KRFreq[39][86] = 519; - KRFreq[22][14] = 518; - KRFreq[29][39] = 517; - KRFreq[28][38] = 516; - KRFreq[23][79] = 515; - KRFreq[24][56] = 514; - KRFreq[29][63] = 513; - KRFreq[31][45] = 512; - KRFreq[23][26] = 511; - KRFreq[15][87] = 510; - KRFreq[30][74] = 509; - KRFreq[24][69] = 508; - KRFreq[20][4] = 507; - KRFreq[27][50] = 506; - KRFreq[30][75] = 505; - KRFreq[24][13] = 504; - KRFreq[30][8] = 503; - KRFreq[31][6] = 502; - KRFreq[25][80] = 501; - KRFreq[36][8] = 500; - KRFreq[15][18] = 499; - KRFreq[39][23] = 498; - KRFreq[16][24] = 497; - KRFreq[31][89] = 496; - KRFreq[15][71] = 495; - KRFreq[15][57] = 494; - KRFreq[30][11] = 493; - KRFreq[15][36] = 492; - KRFreq[16][60] = 491; - KRFreq[24][45] = 490; - KRFreq[37][35] = 489; - KRFreq[24][87] = 488; - KRFreq[20][45] = 487; - KRFreq[31][90] = 486; - KRFreq[32][21] = 485; - KRFreq[19][70] = 484; - KRFreq[24][15] = 483; - KRFreq[26][92] = 482; - KRFreq[37][13] = 481; - KRFreq[39][2] = 480; - KRFreq[23][70] = 479; - KRFreq[27][25] = 478; - KRFreq[15][69] = 477; - KRFreq[19][61] = 476; - KRFreq[31][58] = 475; - KRFreq[24][57] = 474; - KRFreq[36][74] = 473; - KRFreq[21][6] = 472; - KRFreq[30][44] = 471; - KRFreq[15][91] = 470; - KRFreq[27][16] = 469; - KRFreq[29][42] = 468; - KRFreq[33][86] = 467; - KRFreq[29][41] = 466; - KRFreq[20][68] = 465; - KRFreq[25][47] = 464; - KRFreq[22][0] = 463; - KRFreq[18][14] = 462; - KRFreq[31][28] = 461; - KRFreq[15][2] = 460; - KRFreq[23][76] = 459; - KRFreq[38][32] = 458; - KRFreq[29][82] = 457; - KRFreq[21][86] = 456; - KRFreq[24][62] = 455; - KRFreq[31][64] = 454; - KRFreq[38][26] = 453; - KRFreq[32][86] = 452; - KRFreq[22][32] = 451; - KRFreq[19][59] = 450; - KRFreq[34][18] = 449; - KRFreq[18][54] = 448; - KRFreq[38][63] = 447; - KRFreq[36][23] = 446; - KRFreq[35][35] = 445; - KRFreq[32][62] = 444; - KRFreq[28][35] = 443; - KRFreq[27][13] = 442; - KRFreq[31][59] = 441; - KRFreq[29][29] = 440; - KRFreq[15][64] = 439; - KRFreq[26][84] = 438; - KRFreq[21][90] = 437; - KRFreq[20][24] = 436; - KRFreq[16][18] = 435; - KRFreq[22][23] = 434; - KRFreq[31][14] = 433; - KRFreq[15][1] = 432; - KRFreq[18][63] = 431; - KRFreq[19][10] = 430; - KRFreq[25][49] = 429; - KRFreq[36][57] = 428; - KRFreq[20][22] = 427; - KRFreq[15][15] = 426; - KRFreq[31][51] = 425; - KRFreq[24][60] = 424; - KRFreq[31][70] = 423; - KRFreq[15][7] = 422; - KRFreq[28][40] = 421; - KRFreq[18][41] = 420; - KRFreq[15][38] = 419; - KRFreq[32][0] = 418; - KRFreq[19][51] = 417; - KRFreq[34][62] = 416; - KRFreq[16][27] = 415; - KRFreq[20][70] = 414; - KRFreq[22][33] = 413; - KRFreq[26][73] = 412; - KRFreq[20][79] = 411; - KRFreq[23][6] = 410; - KRFreq[24][85] = 409; - KRFreq[38][51] = 408; - KRFreq[29][88] = 407; - KRFreq[38][55] = 406; - KRFreq[32][32] = 405; - KRFreq[27][18] = 404; - KRFreq[23][87] = 403; - KRFreq[35][6] = 402; - KRFreq[34][27] = 401; - KRFreq[39][35] = 400; - KRFreq[30][88] = 399; - KRFreq[32][92] = 398; - KRFreq[32][49] = 397; - KRFreq[24][61] = 396; - KRFreq[18][74] = 395; - KRFreq[23][77] = 394; - KRFreq[23][50] = 393; - KRFreq[23][32] = 392; - KRFreq[23][36] = 391; - KRFreq[38][38] = 390; - KRFreq[29][86] = 389; - KRFreq[36][15] = 388; - KRFreq[31][50] = 387; - KRFreq[15][86] = 386; - KRFreq[39][13] = 385; - KRFreq[34][26] = 384; - KRFreq[19][34] = 383; - KRFreq[16][3] = 382; - KRFreq[26][93] = 381; - KRFreq[19][67] = 380; - KRFreq[24][72] = 379; - KRFreq[29][17] = 378; - KRFreq[23][24] = 377; - KRFreq[25][19] = 376; - KRFreq[18][65] = 375; - KRFreq[30][78] = 374; - KRFreq[27][52] = 373; - KRFreq[22][18] = 372; - KRFreq[16][38] = 371; - KRFreq[21][26] = 370; - KRFreq[34][20] = 369; - KRFreq[15][42] = 368; - KRFreq[16][71] = 367; - KRFreq[17][17] = 366; - KRFreq[24][71] = 365; - KRFreq[18][84] = 364; - KRFreq[15][40] = 363; - KRFreq[31][62] = 362; - KRFreq[15][8] = 361; - KRFreq[16][69] = 360; - KRFreq[29][79] = 359; - KRFreq[38][91] = 358; - KRFreq[31][92] = 357; - KRFreq[20][77] = 356; - KRFreq[3][16] = 355; - KRFreq[27][87] = 354; - KRFreq[16][25] = 353; - KRFreq[36][33] = 352; - KRFreq[37][76] = 351; - KRFreq[30][12] = 350; - KRFreq[26][75] = 349; - KRFreq[25][14] = 348; - KRFreq[32][26] = 347; - KRFreq[23][22] = 346; - KRFreq[20][90] = 345; - KRFreq[19][8] = 344; - KRFreq[38][41] = 343; - KRFreq[34][2] = 342; - KRFreq[39][4] = 341; - KRFreq[27][89] = 340; - KRFreq[28][41] = 339; - KRFreq[28][44] = 338; - KRFreq[24][92] = 337; - KRFreq[34][65] = 336; - KRFreq[39][14] = 335; - KRFreq[21][38] = 334; - KRFreq[19][31] = 333; - KRFreq[37][39] = 332; - KRFreq[33][41] = 331; - KRFreq[38][4] = 330; - KRFreq[23][80] = 329; - KRFreq[25][24] = 328; - KRFreq[37][17] = 327; - KRFreq[22][16] = 326; - KRFreq[22][46] = 325; - KRFreq[33][91] = 324; - KRFreq[24][89] = 323; - KRFreq[30][52] = 322; - KRFreq[29][38] = 321; - KRFreq[38][85] = 320; - KRFreq[15][12] = 319; - KRFreq[27][58] = 318; - KRFreq[29][52] = 317; - KRFreq[37][38] = 316; - KRFreq[34][41] = 315; - KRFreq[31][65] = 314; - KRFreq[29][53] = 313; - KRFreq[22][47] = 312; - KRFreq[22][19] = 311; - KRFreq[26][0] = 310; - KRFreq[37][86] = 309; - KRFreq[35][4] = 308; - KRFreq[36][54] = 307; - KRFreq[20][76] = 306; - KRFreq[30][9] = 305; - KRFreq[30][33] = 304; - KRFreq[23][17] = 303; - KRFreq[23][33] = 302; - KRFreq[38][52] = 301; - KRFreq[15][19] = 300; - KRFreq[28][45] = 299; - KRFreq[29][78] = 298; - KRFreq[23][15] = 297; - KRFreq[33][5] = 296; - KRFreq[17][40] = 295; - KRFreq[30][83] = 294; - KRFreq[18][1] = 293; - KRFreq[30][81] = 292; - KRFreq[19][40] = 291; - KRFreq[24][47] = 290; - KRFreq[17][56] = 289; - KRFreq[39][80] = 288; - KRFreq[30][46] = 287; - KRFreq[16][61] = 286; - KRFreq[26][78] = 285; - KRFreq[26][57] = 284; - KRFreq[20][46] = 283; - KRFreq[25][15] = 282; - KRFreq[25][91] = 281; - KRFreq[21][83] = 280; - KRFreq[30][77] = 279; - KRFreq[35][30] = 278; - KRFreq[30][34] = 277; - KRFreq[20][69] = 276; - KRFreq[35][10] = 275; - KRFreq[29][70] = 274; - KRFreq[22][50] = 273; - KRFreq[18][0] = 272; - KRFreq[22][64] = 271; - KRFreq[38][65] = 270; - KRFreq[22][70] = 269; - KRFreq[24][58] = 268; - KRFreq[19][66] = 267; - KRFreq[30][59] = 266; - KRFreq[37][14] = 265; - KRFreq[16][56] = 264; - KRFreq[29][85] = 263; - KRFreq[31][15] = 262; - KRFreq[36][84] = 261; - KRFreq[39][15] = 260; - KRFreq[39][90] = 259; - KRFreq[18][12] = 258; - KRFreq[21][93] = 257; - KRFreq[24][66] = 256; - KRFreq[27][90] = 255; - KRFreq[25][90] = 254; - KRFreq[22][24] = 253; - KRFreq[36][67] = 252; - KRFreq[33][90] = 251; - KRFreq[15][60] = 250; - KRFreq[23][85] = 249; - KRFreq[34][1] = 248; - KRFreq[39][37] = 247; - KRFreq[21][18] = 246; - KRFreq[34][4] = 245; - KRFreq[28][33] = 244; - KRFreq[15][13] = 243; - KRFreq[32][22] = 242; - KRFreq[30][76] = 241; - KRFreq[20][21] = 240; - KRFreq[38][66] = 239; - KRFreq[32][55] = 238; - KRFreq[32][89] = 237; - KRFreq[25][26] = 236; - KRFreq[16][80] = 235; - KRFreq[15][43] = 234; - KRFreq[38][54] = 233; - KRFreq[39][68] = 232; - KRFreq[22][88] = 231; - KRFreq[21][84] = 230; - KRFreq[21][17] = 229; - KRFreq[20][28] = 228; - KRFreq[32][1] = 227; - KRFreq[33][87] = 226; - KRFreq[38][71] = 225; - KRFreq[37][47] = 224; - KRFreq[18][77] = 223; - KRFreq[37][58] = 222; - KRFreq[34][74] = 221; - KRFreq[32][54] = 220; - KRFreq[27][33] = 219; - KRFreq[32][93] = 218; - KRFreq[23][51] = 217; - KRFreq[20][57] = 216; - KRFreq[22][37] = 215; - KRFreq[39][10] = 214; - KRFreq[39][17] = 213; - KRFreq[33][4] = 212; - KRFreq[32][84] = 211; - KRFreq[34][3] = 210; - KRFreq[28][27] = 209; - KRFreq[15][79] = 208; - KRFreq[34][21] = 207; - KRFreq[34][69] = 206; - KRFreq[21][62] = 205; - KRFreq[36][24] = 204; - KRFreq[16][89] = 203; - KRFreq[18][48] = 202; - KRFreq[38][15] = 201; - KRFreq[36][58] = 200; - KRFreq[21][56] = 199; - KRFreq[34][48] = 198; - KRFreq[21][15] = 197; - KRFreq[39][3] = 196; - KRFreq[16][44] = 195; - KRFreq[18][79] = 194; - KRFreq[25][13] = 193; - KRFreq[29][47] = 192; - KRFreq[38][88] = 191; - KRFreq[20][71] = 190; - KRFreq[16][58] = 189; - KRFreq[35][57] = 188; - KRFreq[29][30] = 187; - KRFreq[29][23] = 186; - KRFreq[34][93] = 185; - KRFreq[30][85] = 184; - KRFreq[15][80] = 183; - KRFreq[32][78] = 182; - KRFreq[37][82] = 181; - KRFreq[22][40] = 180; - KRFreq[21][69] = 179; - KRFreq[26][85] = 178; - KRFreq[31][31] = 177; - KRFreq[28][64] = 176; - KRFreq[38][13] = 175; - KRFreq[25][2] = 174; - KRFreq[22][34] = 173; - KRFreq[28][28] = 172; - KRFreq[24][91] = 171; - KRFreq[33][74] = 170; - KRFreq[29][40] = 169; - KRFreq[15][77] = 168; - KRFreq[32][80] = 167; - KRFreq[30][41] = 166; - KRFreq[23][30] = 165; - KRFreq[24][63] = 164; - KRFreq[30][53] = 163; - KRFreq[39][70] = 162; - KRFreq[23][61] = 161; - KRFreq[37][27] = 160; - KRFreq[16][55] = 159; - KRFreq[22][74] = 158; - KRFreq[26][50] = 157; - KRFreq[16][10] = 156; - KRFreq[34][63] = 155; - KRFreq[35][14] = 154; - KRFreq[17][7] = 153; - KRFreq[15][59] = 152; - KRFreq[27][23] = 151; - KRFreq[18][70] = 150; - KRFreq[32][56] = 149; - KRFreq[37][87] = 148; - KRFreq[17][61] = 147; - KRFreq[18][83] = 146; - KRFreq[23][86] = 145; - KRFreq[17][31] = 144; - KRFreq[23][83] = 143; - KRFreq[35][2] = 142; - KRFreq[18][64] = 141; - KRFreq[27][43] = 140; - KRFreq[32][42] = 139; - KRFreq[25][76] = 138; - KRFreq[19][85] = 137; - KRFreq[37][81] = 136; - KRFreq[38][83] = 135; - KRFreq[35][7] = 134; - KRFreq[16][51] = 133; - KRFreq[27][22] = 132; - KRFreq[16][76] = 131; - KRFreq[22][4] = 130; - KRFreq[38][84] = 129; - KRFreq[17][83] = 128; - KRFreq[24][46] = 127; - KRFreq[33][15] = 126; - KRFreq[20][48] = 125; - KRFreq[17][30] = 124; - KRFreq[30][93] = 123; - KRFreq[28][11] = 122; - KRFreq[28][30] = 121; - KRFreq[15][62] = 120; - KRFreq[17][87] = 119; - KRFreq[32][81] = 118; - KRFreq[23][37] = 117; - KRFreq[30][22] = 116; - KRFreq[32][66] = 115; - KRFreq[33][78] = 114; - KRFreq[21][4] = 113; - KRFreq[31][17] = 112; - KRFreq[39][61] = 111; - KRFreq[18][76] = 110; - KRFreq[15][85] = 109; - KRFreq[31][47] = 108; - KRFreq[19][57] = 107; - KRFreq[23][55] = 106; - KRFreq[27][29] = 105; - KRFreq[29][46] = 104; - KRFreq[33][0] = 103; - KRFreq[16][83] = 102; - KRFreq[39][78] = 101; - KRFreq[32][77] = 100; - KRFreq[36][25] = 99; - KRFreq[34][19] = 98; - KRFreq[38][49] = 97; - KRFreq[19][25] = 96; - KRFreq[23][53] = 95; - KRFreq[28][43] = 94; - KRFreq[31][44] = 93; - KRFreq[36][34] = 92; - KRFreq[16][34] = 91; - KRFreq[35][1] = 90; - KRFreq[19][87] = 89; - KRFreq[18][53] = 88; - KRFreq[29][54] = 87; - KRFreq[22][41] = 86; - KRFreq[38][18] = 85; - KRFreq[22][2] = 84; - KRFreq[20][3] = 83; - KRFreq[39][69] = 82; - KRFreq[30][29] = 81; - KRFreq[28][19] = 80; - KRFreq[29][90] = 79; - KRFreq[17][86] = 78; - KRFreq[15][9] = 77; - KRFreq[39][73] = 76; - KRFreq[15][37] = 75; - KRFreq[35][40] = 74; - KRFreq[33][77] = 73; - KRFreq[27][86] = 72; - KRFreq[36][79] = 71; - KRFreq[23][18] = 70; - KRFreq[34][87] = 69; - KRFreq[39][24] = 68; - KRFreq[26][8] = 67; - KRFreq[33][48] = 66; - KRFreq[39][30] = 65; - KRFreq[33][28] = 64; - KRFreq[16][67] = 63; - KRFreq[31][78] = 62; - KRFreq[32][23] = 61; - KRFreq[24][55] = 60; - KRFreq[30][68] = 59; - KRFreq[18][60] = 58; - KRFreq[15][17] = 57; - KRFreq[23][34] = 56; - KRFreq[20][49] = 55; - KRFreq[15][78] = 54; - KRFreq[24][14] = 53; - KRFreq[19][41] = 52; - KRFreq[31][55] = 51; - KRFreq[21][39] = 50; - KRFreq[35][9] = 49; - KRFreq[30][15] = 48; - KRFreq[20][52] = 47; - KRFreq[35][71] = 46; - KRFreq[20][7] = 45; - KRFreq[29][72] = 44; - KRFreq[37][77] = 43; - KRFreq[22][35] = 42; - KRFreq[20][61] = 41; - KRFreq[31][60] = 40; - KRFreq[20][93] = 39; - KRFreq[27][92] = 38; - KRFreq[28][16] = 37; - KRFreq[36][26] = 36; - KRFreq[18][89] = 35; - KRFreq[21][63] = 34; - KRFreq[22][52] = 33; - KRFreq[24][65] = 32; - KRFreq[31][8] = 31; - KRFreq[31][49] = 30; - KRFreq[33][30] = 29; - KRFreq[37][15] = 28; - KRFreq[18][18] = 27; - KRFreq[25][50] = 26; - KRFreq[29][20] = 25; - KRFreq[35][48] = 24; - KRFreq[38][75] = 23; - KRFreq[26][83] = 22; - KRFreq[21][87] = 21; - KRFreq[27][71] = 20; - KRFreq[32][91] = 19; - KRFreq[25][73] = 18; - KRFreq[16][84] = 17; - KRFreq[25][31] = 16; - KRFreq[17][90] = 15; - KRFreq[18][40] = 14; - KRFreq[17][77] = 13; - KRFreq[17][35] = 12; - KRFreq[23][52] = 11; - KRFreq[23][35] = 10; - KRFreq[16][5] = 9; - KRFreq[23][58] = 8; - KRFreq[19][60] = 7; - KRFreq[30][32] = 6; - KRFreq[38][34] = 5; - KRFreq[23][4] = 4; - KRFreq[23][1] = 3; - KRFreq[27][57] = 2; - KRFreq[39][38] = 1; - KRFreq[32][33] = 0; - JPFreq[3][74] = 600; - JPFreq[3][45] = 599; - JPFreq[3][3] = 598; - JPFreq[3][24] = 597; - JPFreq[3][30] = 596; - JPFreq[3][42] = 595; - JPFreq[3][46] = 594; - JPFreq[3][39] = 593; - JPFreq[3][11] = 592; - JPFreq[3][37] = 591; - JPFreq[3][38] = 590; - JPFreq[3][31] = 589; - JPFreq[3][41] = 588; - JPFreq[3][5] = 587; - JPFreq[3][10] = 586; - JPFreq[3][75] = 585; - JPFreq[3][65] = 584; - JPFreq[3][72] = 583; - JPFreq[37][91] = 582; - JPFreq[0][27] = 581; - JPFreq[3][18] = 580; - JPFreq[3][22] = 579; - JPFreq[3][61] = 578; - JPFreq[3][14] = 577; - JPFreq[24][80] = 576; - JPFreq[4][82] = 575; - JPFreq[17][80] = 574; - JPFreq[30][44] = 573; - JPFreq[3][73] = 572; - JPFreq[3][64] = 571; - JPFreq[38][14] = 570; - JPFreq[33][70] = 569; - JPFreq[3][1] = 568; - JPFreq[3][16] = 567; - JPFreq[3][35] = 566; - JPFreq[3][40] = 565; - JPFreq[4][74] = 564; - JPFreq[4][24] = 563; - JPFreq[42][59] = 562; - JPFreq[3][7] = 561; - JPFreq[3][71] = 560; - JPFreq[3][12] = 559; - JPFreq[15][75] = 558; - JPFreq[3][20] = 557; - JPFreq[4][39] = 556; - JPFreq[34][69] = 555; - JPFreq[3][28] = 554; - JPFreq[35][24] = 553; - JPFreq[3][82] = 552; - JPFreq[28][47] = 551; - JPFreq[3][67] = 550; - JPFreq[37][16] = 549; - JPFreq[26][93] = 548; - JPFreq[4][1] = 547; - JPFreq[26][85] = 546; - JPFreq[31][14] = 545; - JPFreq[4][3] = 544; - JPFreq[4][72] = 543; - JPFreq[24][51] = 542; - JPFreq[27][51] = 541; - JPFreq[27][49] = 540; - JPFreq[22][77] = 539; - JPFreq[27][10] = 538; - JPFreq[29][68] = 537; - JPFreq[20][35] = 536; - JPFreq[41][11] = 535; - JPFreq[24][70] = 534; - JPFreq[36][61] = 533; - JPFreq[31][23] = 532; - JPFreq[43][16] = 531; - JPFreq[23][68] = 530; - JPFreq[32][15] = 529; - JPFreq[3][32] = 528; - JPFreq[19][53] = 527; - JPFreq[40][83] = 526; - JPFreq[4][14] = 525; - JPFreq[36][9] = 524; - JPFreq[4][73] = 523; - JPFreq[23][10] = 522; - JPFreq[3][63] = 521; - JPFreq[39][14] = 520; - JPFreq[3][78] = 519; - JPFreq[33][47] = 518; - JPFreq[21][39] = 517; - JPFreq[34][46] = 516; - JPFreq[36][75] = 515; - JPFreq[41][92] = 514; - JPFreq[37][93] = 513; - JPFreq[4][34] = 512; - JPFreq[15][86] = 511; - JPFreq[46][1] = 510; - JPFreq[37][65] = 509; - JPFreq[3][62] = 508; - JPFreq[32][73] = 507; - JPFreq[21][65] = 506; - JPFreq[29][75] = 505; - JPFreq[26][51] = 504; - JPFreq[3][34] = 503; - JPFreq[4][10] = 502; - JPFreq[30][22] = 501; - JPFreq[35][73] = 500; - JPFreq[17][82] = 499; - JPFreq[45][8] = 498; - JPFreq[27][73] = 497; - JPFreq[18][55] = 496; - JPFreq[25][2] = 495; - JPFreq[3][26] = 494; - JPFreq[45][46] = 493; - JPFreq[4][22] = 492; - JPFreq[4][40] = 491; - JPFreq[18][10] = 490; - JPFreq[32][9] = 489; - JPFreq[26][49] = 488; - JPFreq[3][47] = 487; - JPFreq[24][65] = 486; - JPFreq[4][76] = 485; - JPFreq[43][67] = 484; - JPFreq[3][9] = 483; - JPFreq[41][37] = 482; - JPFreq[33][68] = 481; - JPFreq[43][31] = 480; - JPFreq[19][55] = 479; - JPFreq[4][30] = 478; - JPFreq[27][33] = 477; - JPFreq[16][62] = 476; - JPFreq[36][35] = 475; - JPFreq[37][15] = 474; - JPFreq[27][70] = 473; - JPFreq[22][71] = 472; - JPFreq[33][45] = 471; - JPFreq[31][78] = 470; - JPFreq[43][59] = 469; - JPFreq[32][19] = 468; - JPFreq[17][28] = 467; - JPFreq[40][28] = 466; - JPFreq[20][93] = 465; - JPFreq[18][15] = 464; - JPFreq[4][23] = 463; - JPFreq[3][23] = 462; - JPFreq[26][64] = 461; - JPFreq[44][92] = 460; - JPFreq[17][27] = 459; - JPFreq[3][56] = 458; - JPFreq[25][38] = 457; - JPFreq[23][31] = 456; - JPFreq[35][43] = 455; - JPFreq[4][54] = 454; - JPFreq[35][19] = 453; - JPFreq[22][47] = 452; - JPFreq[42][0] = 451; - JPFreq[23][28] = 450; - JPFreq[46][33] = 449; - JPFreq[36][85] = 448; - JPFreq[31][12] = 447; - JPFreq[3][76] = 446; - JPFreq[4][75] = 445; - JPFreq[36][56] = 444; - JPFreq[4][64] = 443; - JPFreq[25][77] = 442; - JPFreq[15][52] = 441; - JPFreq[33][73] = 440; - JPFreq[3][55] = 439; - JPFreq[43][82] = 438; - JPFreq[27][82] = 437; - JPFreq[20][3] = 436; - JPFreq[40][51] = 435; - JPFreq[3][17] = 434; - JPFreq[27][71] = 433; - JPFreq[4][52] = 432; - JPFreq[44][48] = 431; - JPFreq[27][2] = 430; - JPFreq[17][39] = 429; - JPFreq[31][8] = 428; - JPFreq[44][54] = 427; - JPFreq[43][18] = 426; - JPFreq[43][77] = 425; - JPFreq[4][61] = 424; - JPFreq[19][91] = 423; - JPFreq[31][13] = 422; - JPFreq[44][71] = 421; - JPFreq[20][0] = 420; - JPFreq[23][87] = 419; - JPFreq[21][14] = 418; - JPFreq[29][13] = 417; - JPFreq[3][58] = 416; - JPFreq[26][18] = 415; - JPFreq[4][47] = 414; - JPFreq[4][18] = 413; - JPFreq[3][53] = 412; - JPFreq[26][92] = 411; - JPFreq[21][7] = 410; - JPFreq[4][37] = 409; - JPFreq[4][63] = 408; - JPFreq[36][51] = 407; - JPFreq[4][32] = 406; - JPFreq[28][73] = 405; - JPFreq[4][50] = 404; - JPFreq[41][60] = 403; - JPFreq[23][1] = 402; - JPFreq[36][92] = 401; - JPFreq[15][41] = 400; - JPFreq[21][71] = 399; - JPFreq[41][30] = 398; - JPFreq[32][76] = 397; - JPFreq[17][34] = 396; - JPFreq[26][15] = 395; - JPFreq[26][25] = 394; - JPFreq[31][77] = 393; - JPFreq[31][3] = 392; - JPFreq[46][34] = 391; - JPFreq[27][84] = 390; - JPFreq[23][8] = 389; - JPFreq[16][0] = 388; - JPFreq[28][80] = 387; - JPFreq[26][54] = 386; - JPFreq[33][18] = 385; - JPFreq[31][20] = 384; - JPFreq[31][62] = 383; - JPFreq[30][41] = 382; - JPFreq[33][30] = 381; - JPFreq[45][45] = 380; - JPFreq[37][82] = 379; - JPFreq[15][33] = 378; - JPFreq[20][12] = 377; - JPFreq[18][5] = 376; - JPFreq[28][86] = 375; - JPFreq[30][19] = 374; - JPFreq[42][43] = 373; - JPFreq[36][31] = 372; - JPFreq[17][93] = 371; - JPFreq[4][15] = 370; - JPFreq[21][20] = 369; - JPFreq[23][21] = 368; - JPFreq[28][72] = 367; - JPFreq[4][20] = 366; - JPFreq[26][55] = 365; - JPFreq[21][5] = 364; - JPFreq[19][16] = 363; - JPFreq[23][64] = 362; - JPFreq[40][59] = 361; - JPFreq[37][26] = 360; - JPFreq[26][56] = 359; - JPFreq[4][12] = 358; - JPFreq[33][71] = 357; - JPFreq[32][39] = 356; - JPFreq[38][40] = 355; - JPFreq[22][74] = 354; - JPFreq[3][25] = 353; - JPFreq[15][48] = 352; - JPFreq[41][82] = 351; - JPFreq[41][9] = 350; - JPFreq[25][48] = 349; - JPFreq[31][71] = 348; - JPFreq[43][29] = 347; - JPFreq[26][80] = 346; - JPFreq[4][5] = 345; - JPFreq[18][71] = 344; - JPFreq[29][0] = 343; - JPFreq[43][43] = 342; - JPFreq[23][81] = 341; - JPFreq[4][42] = 340; - JPFreq[44][28] = 339; - JPFreq[23][93] = 338; - JPFreq[17][81] = 337; - JPFreq[25][25] = 336; - JPFreq[41][23] = 335; - JPFreq[34][35] = 334; - JPFreq[4][53] = 333; - JPFreq[28][36] = 332; - JPFreq[4][41] = 331; - JPFreq[25][60] = 330; - JPFreq[23][20] = 329; - JPFreq[3][43] = 328; - JPFreq[24][79] = 327; - JPFreq[29][41] = 326; - JPFreq[30][83] = 325; - JPFreq[3][50] = 324; - JPFreq[22][18] = 323; - JPFreq[18][3] = 322; - JPFreq[39][30] = 321; - JPFreq[4][28] = 320; - JPFreq[21][64] = 319; - JPFreq[4][68] = 318; - JPFreq[17][71] = 317; - JPFreq[27][0] = 316; - JPFreq[39][28] = 315; - JPFreq[30][13] = 314; - JPFreq[36][70] = 313; - JPFreq[20][82] = 312; - JPFreq[33][38] = 311; - JPFreq[44][87] = 310; - JPFreq[34][45] = 309; - JPFreq[4][26] = 308; - JPFreq[24][44] = 307; - JPFreq[38][67] = 306; - JPFreq[38][6] = 305; - JPFreq[30][68] = 304; - JPFreq[15][89] = 303; - JPFreq[24][93] = 302; - JPFreq[40][41] = 301; - JPFreq[38][3] = 300; - JPFreq[28][23] = 299; - JPFreq[26][17] = 298; - JPFreq[4][38] = 297; - JPFreq[22][78] = 296; - JPFreq[15][37] = 295; - JPFreq[25][85] = 294; - JPFreq[4][9] = 293; - JPFreq[4][7] = 292; - JPFreq[27][53] = 291; - JPFreq[39][29] = 290; - JPFreq[41][43] = 289; - JPFreq[25][62] = 288; - JPFreq[4][48] = 287; - JPFreq[28][28] = 286; - JPFreq[21][40] = 285; - JPFreq[36][73] = 284; - JPFreq[26][39] = 283; - JPFreq[22][54] = 282; - JPFreq[33][5] = 281; - JPFreq[19][21] = 280; - JPFreq[46][31] = 279; - JPFreq[20][64] = 278; - JPFreq[26][63] = 277; - JPFreq[22][23] = 276; - JPFreq[25][81] = 275; - JPFreq[4][62] = 274; - JPFreq[37][31] = 273; - JPFreq[40][52] = 272; - JPFreq[29][79] = 271; - JPFreq[41][48] = 270; - JPFreq[31][57] = 269; - JPFreq[32][92] = 268; - JPFreq[36][36] = 267; - JPFreq[27][7] = 266; - JPFreq[35][29] = 265; - JPFreq[37][34] = 264; - JPFreq[34][42] = 263; - JPFreq[27][15] = 262; - JPFreq[33][27] = 261; - JPFreq[31][38] = 260; - JPFreq[19][79] = 259; - JPFreq[4][31] = 258; - JPFreq[4][66] = 257; - JPFreq[17][32] = 256; - JPFreq[26][67] = 255; - JPFreq[16][30] = 254; - JPFreq[26][46] = 253; - JPFreq[24][26] = 252; - JPFreq[35][10] = 251; - JPFreq[18][37] = 250; - JPFreq[3][19] = 249; - JPFreq[33][69] = 248; - JPFreq[31][9] = 247; - JPFreq[45][29] = 246; - JPFreq[3][15] = 245; - JPFreq[18][54] = 244; - JPFreq[3][44] = 243; - JPFreq[31][29] = 242; - JPFreq[18][45] = 241; - JPFreq[38][28] = 240; - JPFreq[24][12] = 239; - JPFreq[35][82] = 238; - JPFreq[17][43] = 237; - JPFreq[28][9] = 236; - JPFreq[23][25] = 235; - JPFreq[44][37] = 234; - JPFreq[23][75] = 233; - JPFreq[23][92] = 232; - JPFreq[0][24] = 231; - JPFreq[19][74] = 230; - JPFreq[45][32] = 229; - JPFreq[16][72] = 228; - JPFreq[16][93] = 227; - JPFreq[45][13] = 226; - JPFreq[24][8] = 225; - JPFreq[25][47] = 224; - JPFreq[28][26] = 223; - JPFreq[43][81] = 222; - JPFreq[32][71] = 221; - JPFreq[18][41] = 220; - JPFreq[26][62] = 219; - JPFreq[41][24] = 218; - JPFreq[40][11] = 217; - JPFreq[43][57] = 216; - JPFreq[34][53] = 215; - JPFreq[20][32] = 214; - JPFreq[34][43] = 213; - JPFreq[41][91] = 212; - JPFreq[29][57] = 211; - JPFreq[15][43] = 210; - JPFreq[22][89] = 209; - JPFreq[33][83] = 208; - JPFreq[43][20] = 207; - JPFreq[25][58] = 206; - JPFreq[30][30] = 205; - JPFreq[4][56] = 204; - JPFreq[17][64] = 203; - JPFreq[23][0] = 202; - JPFreq[44][12] = 201; - JPFreq[25][37] = 200; - JPFreq[35][13] = 199; - JPFreq[20][30] = 198; - JPFreq[21][84] = 197; - JPFreq[29][14] = 196; - JPFreq[30][5] = 195; - JPFreq[37][2] = 194; - JPFreq[4][78] = 193; - JPFreq[29][78] = 192; - JPFreq[29][84] = 191; - JPFreq[32][86] = 190; - JPFreq[20][68] = 189; - JPFreq[30][39] = 188; - JPFreq[15][69] = 187; - JPFreq[4][60] = 186; - JPFreq[20][61] = 185; - JPFreq[41][67] = 184; - JPFreq[16][35] = 183; - JPFreq[36][57] = 182; - JPFreq[39][80] = 181; - JPFreq[4][59] = 180; - JPFreq[4][44] = 179; - JPFreq[40][54] = 178; - JPFreq[30][8] = 177; - JPFreq[44][30] = 176; - JPFreq[31][93] = 175; - JPFreq[31][47] = 174; - JPFreq[16][70] = 173; - JPFreq[21][0] = 172; - JPFreq[17][35] = 171; - JPFreq[21][67] = 170; - JPFreq[44][18] = 169; - JPFreq[36][29] = 168; - JPFreq[18][67] = 167; - JPFreq[24][28] = 166; - JPFreq[36][24] = 165; - JPFreq[23][5] = 164; - JPFreq[31][65] = 163; - JPFreq[26][59] = 162; - JPFreq[28][2] = 161; - JPFreq[39][69] = 160; - JPFreq[42][40] = 159; - JPFreq[37][80] = 158; - JPFreq[15][66] = 157; - JPFreq[34][38] = 156; - JPFreq[28][48] = 155; - JPFreq[37][77] = 154; - JPFreq[29][34] = 153; - JPFreq[33][12] = 152; - JPFreq[4][65] = 151; - JPFreq[30][31] = 150; - JPFreq[27][92] = 149; - JPFreq[4][2] = 148; - JPFreq[4][51] = 147; - JPFreq[23][77] = 146; - JPFreq[4][35] = 145; - JPFreq[3][13] = 144; - JPFreq[26][26] = 143; - JPFreq[44][4] = 142; - JPFreq[39][53] = 141; - JPFreq[20][11] = 140; - JPFreq[40][33] = 139; - JPFreq[45][7] = 138; - JPFreq[4][70] = 137; - JPFreq[3][49] = 136; - JPFreq[20][59] = 135; - JPFreq[21][12] = 134; - JPFreq[33][53] = 133; - JPFreq[20][14] = 132; - JPFreq[37][18] = 131; - JPFreq[18][17] = 130; - JPFreq[36][23] = 129; - JPFreq[18][57] = 128; - JPFreq[26][74] = 127; - JPFreq[35][2] = 126; - JPFreq[38][58] = 125; - JPFreq[34][68] = 124; - JPFreq[29][81] = 123; - JPFreq[20][69] = 122; - JPFreq[39][86] = 121; - JPFreq[4][16] = 120; - JPFreq[16][49] = 119; - JPFreq[15][72] = 118; - JPFreq[26][35] = 117; - JPFreq[32][14] = 116; - JPFreq[40][90] = 115; - JPFreq[33][79] = 114; - JPFreq[35][4] = 113; - JPFreq[23][33] = 112; - JPFreq[19][19] = 111; - JPFreq[31][41] = 110; - JPFreq[44][1] = 109; - JPFreq[22][56] = 108; - JPFreq[31][27] = 107; - JPFreq[32][18] = 106; - JPFreq[27][32] = 105; - JPFreq[37][39] = 104; - JPFreq[42][11] = 103; - JPFreq[29][71] = 102; - JPFreq[32][58] = 101; - JPFreq[46][10] = 100; - JPFreq[17][30] = 99; - JPFreq[38][15] = 98; - JPFreq[29][60] = 97; - JPFreq[4][11] = 96; - JPFreq[38][31] = 95; - JPFreq[40][79] = 94; - JPFreq[28][49] = 93; - JPFreq[28][84] = 92; - JPFreq[26][77] = 91; - JPFreq[22][32] = 90; - JPFreq[33][17] = 89; - JPFreq[23][18] = 88; - JPFreq[32][64] = 87; - JPFreq[4][6] = 86; - JPFreq[33][51] = 85; - JPFreq[44][77] = 84; - JPFreq[29][5] = 83; - JPFreq[46][25] = 82; - JPFreq[19][58] = 81; - JPFreq[4][46] = 80; - JPFreq[15][71] = 79; - JPFreq[18][58] = 78; - JPFreq[26][45] = 77; - JPFreq[45][66] = 76; - JPFreq[34][10] = 75; - JPFreq[19][37] = 74; - JPFreq[33][65] = 73; - JPFreq[44][52] = 72; - JPFreq[16][38] = 71; - JPFreq[36][46] = 70; - JPFreq[20][26] = 69; - JPFreq[30][37] = 68; - JPFreq[4][58] = 67; - JPFreq[43][2] = 66; - JPFreq[30][18] = 65; - JPFreq[19][35] = 64; - JPFreq[15][68] = 63; - JPFreq[3][36] = 62; - JPFreq[35][40] = 61; - JPFreq[36][32] = 60; - JPFreq[37][14] = 59; - JPFreq[17][11] = 58; - JPFreq[19][78] = 57; - JPFreq[37][11] = 56; - JPFreq[28][63] = 55; - JPFreq[29][61] = 54; - JPFreq[33][3] = 53; - JPFreq[41][52] = 52; - JPFreq[33][63] = 51; - JPFreq[22][41] = 50; - JPFreq[4][19] = 49; - JPFreq[32][41] = 48; - JPFreq[24][4] = 47; - JPFreq[31][28] = 46; - JPFreq[43][30] = 45; - JPFreq[17][3] = 44; - JPFreq[43][70] = 43; - JPFreq[34][19] = 42; - JPFreq[20][77] = 41; - JPFreq[18][83] = 40; - JPFreq[17][15] = 39; - JPFreq[23][61] = 38; - JPFreq[40][27] = 37; - JPFreq[16][48] = 36; - JPFreq[39][78] = 35; - JPFreq[41][53] = 34; - JPFreq[40][91] = 33; - JPFreq[40][72] = 32; - JPFreq[18][52] = 31; - JPFreq[35][66] = 30; - JPFreq[39][93] = 29; - JPFreq[19][48] = 28; - JPFreq[26][36] = 27; - JPFreq[27][25] = 26; - JPFreq[42][71] = 25; - JPFreq[42][85] = 24; - JPFreq[26][48] = 23; - JPFreq[28][15] = 22; - JPFreq[3][66] = 21; - JPFreq[25][24] = 20; - JPFreq[27][43] = 19; - JPFreq[27][78] = 18; - JPFreq[45][43] = 17; - JPFreq[27][72] = 16; - JPFreq[40][29] = 15; - JPFreq[41][0] = 14; - JPFreq[19][57] = 13; - JPFreq[15][59] = 12; - JPFreq[29][29] = 11; - JPFreq[4][25] = 10; - JPFreq[21][42] = 9; - JPFreq[23][35] = 8; - JPFreq[33][1] = 7; - JPFreq[4][57] = 6; - JPFreq[17][60] = 5; - JPFreq[25][19] = 4; - JPFreq[22][65] = 3; - JPFreq[42][29] = 2; - JPFreq[27][66] = 1; - JPFreq[26][89] = 0; - } -} - -@SuppressWarnings("ALL") -class Encoding { - // Supported Encoding Types - static int GB2312 = 0; - static int GBK = 1; - static int GB18030 = 2; - static int HZ = 3; - static int BIG5 = 4; - static int CNS11643 = 5; - static int UTF8 = 6; - static int UTF8T = 7; - static int UTF8S = 8; - static int UNICODE = 9; - static int UNICODET = 10; - static int UNICODES = 11; - static int ISO2022CN = 12; - static int ISO2022CN_CNS = 13; - static int ISO2022CN_GB = 14; - static int EUC_KR = 15; - static int CP949 = 16; - static int ISO2022KR = 17; - static int JOHAB = 18; - static int SJIS = 19; - static int EUC_JP = 20; - static int ISO2022JP = 21; - static int ASCII = 22; - static int OTHER = 23; - static int TOTALTYPES = 24; - - public final static int SIMP = 0; - - public final static int TRAD = 1; - - // Names of the encodings as understood by Java - static String[] javaname; - - // Names of the encodings for human viewing - static String[] nicename; - - // Names of charsets as used in charset parameter of HTML Meta tag - static String[] htmlname; - - // Constructor - Encoding() { - javaname = new String[TOTALTYPES]; - nicename = new String[TOTALTYPES]; - htmlname = new String[TOTALTYPES]; - // Assign encoding names - javaname[GB2312] = "GB2312"; - javaname[GBK] = "GBK"; - javaname[GB18030] = "GB18030"; - javaname[HZ] = "ASCII"; // What to put here? Sun doesn't support HZ - javaname[ISO2022CN_GB] = "ISO2022CN_GB"; - javaname[BIG5] = "BIG5"; - javaname[CNS11643] = "EUC-TW"; - javaname[ISO2022CN_CNS] = "ISO2022CN_CNS"; - javaname[ISO2022CN] = "ISO2022CN"; - javaname[UTF8] = "UTF-8"; - javaname[UTF8T] = "UTF-8"; - javaname[UTF8S] = "UTF-8"; - javaname[UNICODE] = "Unicode"; - javaname[UNICODET] = "Unicode"; - javaname[UNICODES] = "Unicode"; - javaname[EUC_KR] = "EUC_KR"; - javaname[CP949] = "MS949"; - javaname[ISO2022KR] = "ISO2022KR"; - javaname[JOHAB] = "Johab"; - javaname[SJIS] = "SJIS"; - javaname[EUC_JP] = "EUC_JP"; - javaname[ISO2022JP] = "ISO2022JP"; - javaname[ASCII] = "ASCII"; - javaname[OTHER] = "ISO8859_1"; - // Assign encoding names - htmlname[GB2312] = "GB2312"; - htmlname[GBK] = "GBK"; - htmlname[GB18030] = "GB18030"; - htmlname[HZ] = "HZ-GB-2312"; - htmlname[ISO2022CN_GB] = "ISO-2022-CN-EXT"; - htmlname[BIG5] = "BIG5"; - htmlname[CNS11643] = "EUC-TW"; - htmlname[ISO2022CN_CNS] = "ISO-2022-CN-EXT"; - htmlname[ISO2022CN] = "ISO-2022-CN"; - htmlname[UTF8] = "UTF-8"; - htmlname[UTF8T] = "UTF-8"; - htmlname[UTF8S] = "UTF-8"; - htmlname[UNICODE] = "UTF-16"; - htmlname[UNICODET] = "UTF-16"; - htmlname[UNICODES] = "UTF-16"; - htmlname[EUC_KR] = "EUC-KR"; - htmlname[CP949] = "x-windows-949"; - htmlname[ISO2022KR] = "ISO-2022-KR"; - htmlname[JOHAB] = "x-Johab"; - htmlname[SJIS] = "Shift_JIS"; - htmlname[EUC_JP] = "EUC-JP"; - htmlname[ISO2022JP] = "ISO-2022-JP"; - htmlname[ASCII] = "ASCII"; - htmlname[OTHER] = "ISO8859-1"; - // Assign Human readable names - nicename[GB2312] = "GB-2312"; - nicename[GBK] = "GBK"; - nicename[GB18030] = "GB18030"; - nicename[HZ] = "HZ"; - nicename[ISO2022CN_GB] = "ISO2022CN-GB"; - nicename[BIG5] = "Big5"; - nicename[CNS11643] = "CNS11643"; - nicename[ISO2022CN_CNS] = "ISO2022CN-CNS"; - nicename[ISO2022CN] = "ISO2022 CN"; - nicename[UTF8] = "UTF-8"; - nicename[UTF8T] = "UTF-8 (Trad)"; - nicename[UTF8S] = "UTF-8 (Simp)"; - nicename[UNICODE] = "Unicode"; - nicename[UNICODET] = "Unicode (Trad)"; - nicename[UNICODES] = "Unicode (Simp)"; - nicename[EUC_KR] = "EUC-KR"; - nicename[CP949] = "CP949"; - nicename[ISO2022KR] = "ISO 2022 KR"; - nicename[JOHAB] = "Johab"; - nicename[SJIS] = "Shift-JIS"; - nicename[EUC_JP] = "EUC-JP"; - nicename[ISO2022JP] = "ISO 2022 JP"; - nicename[ASCII] = "ASCII"; - nicename[OTHER] = "OTHER"; - } - } \ No newline at end of file diff --git a/app/src/main/java/io/legado/app/utils/icu4j/CharsetDetector.java b/app/src/main/java/io/legado/app/utils/icu4j/CharsetDetector.java new file mode 100644 index 000000000..23c8c7d13 --- /dev/null +++ b/app/src/main/java/io/legado/app/utils/icu4j/CharsetDetector.java @@ -0,0 +1,568 @@ +/* GENERATED SOURCE. DO NOT MODIFY. */ +/** + * ****************************************************************************** + * Copyright (C) 2005-2014, International Business Machines Corporation and * + * others. All Rights Reserved. * + * ****************************************************************************** + */ +package io.legado.app.utils.icu4j; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + + +/** + * CharsetDetector provides a facility for detecting the + * charset or encoding of character data in an unknown format. + * The input data can either be from an input stream or an array of bytes. + * The result of the detection operation is a list of possibly matching + * charsets, or, for simple use, you can just ask for a Java Reader that + * will will work over the input data. + *

+ * Character set detection is at best an imprecise operation. The detection + * process will attempt to identify the charset that best matches the characteristics + * of the byte data, but the process is partly statistical in nature, and + * the results can not be guaranteed to always be correct. + *

+ * For best accuracy in charset detection, the input data should be primarily + * in a single language, and a minimum of a few hundred bytes worth of plain text + * in the language are needed. The detection process will attempt to + * ignore html or xml style markup that could otherwise obscure the content. + *

+ * + * @stable ICU 3.4 + * @hide All android.icu classes are currently hidden + */ +public class CharsetDetector { + +// Question: Should we have getters corresponding to the setters for input text +// and declared encoding? + +// A thought: If we were to create our own type of Java Reader, we could defer +// figuring out an actual charset for data that starts out with too much English +// only ASCII until the user actually read through to something that didn't look +// like 7 bit English. If nothing else ever appeared, we would never need to +// actually choose the "real" charset. All assuming that the application just +// wants the data, and doesn't care about a char set name. + + /** + * Constructor + * + * @stable ICU 3.4 + */ + public CharsetDetector() { + } + + /** + * Set the declared encoding for charset detection. + * The declared encoding of an input text is an encoding obtained + * from an http header or xml declaration or similar source that + * can be provided as additional information to the charset detector. + * A match between a declared encoding and a possible detected encoding + * will raise the quality of that detected encoding by a small delta, + * and will also appear as a "reason" for the match. + *

+ * A declared encoding that is incompatible with the input data being + * analyzed will not be added to the list of possible encodings. + * + * @param encoding The declared encoding + * @stable ICU 3.4 + */ + public CharsetDetector setDeclaredEncoding(String encoding) { + fDeclaredEncoding = encoding; + return this; + } + + /** + * Set the input text (byte) data whose charset is to be detected. + * + * @param in the input text of unknown encoding + * @return This CharsetDetector + * @stable ICU 3.4 + */ + public CharsetDetector setText(byte[] in) { + fRawInput = in; + fRawLength = in.length; + + return this; + } + + private static final int kBufSize = 8000; + + /** + * Set the input text (byte) data whose charset is to be detected. + *

+ * The input stream that supplies the character data must have markSupported() + * == true; the charset detection process will read a small amount of data, + * then return the stream to its original position via + * the InputStream.reset() operation. The exact amount that will + * be read depends on the characteristics of the data itself. + * + * @param in the input text of unknown encoding + * @return This CharsetDetector + * @stable ICU 3.4 + */ + + public CharsetDetector setText(InputStream in) throws IOException { + fInputStream = in; + fInputStream.mark(kBufSize); + fRawInput = new byte[kBufSize]; // Always make a new buffer because the + // previous one may have come from the caller, + // in which case we can't touch it. + fRawLength = 0; + int remainingLength = kBufSize; + while (remainingLength > 0) { + // read() may give data in smallish chunks, esp. for remote sources. Hence, this loop. + int bytesRead = fInputStream.read(fRawInput, fRawLength, remainingLength); + if (bytesRead <= 0) { + break; + } + fRawLength += bytesRead; + remainingLength -= bytesRead; + } + fInputStream.reset(); + + return this; + } + + + /** + * Return the charset that best matches the supplied input data. + *

+ * Note though, that because the detection + * only looks at the start of the input data, + * there is a possibility that the returned charset will fail to handle + * the full set of input data. + * p/> + * aise an exception if + *

+ * + * @return a CharsetMatch object representing the best matching charset, or + * null if there are no matches. + * @stable ICU 3.4 + */ + public CharsetMatch detect() { +// TODO: A better implementation would be to copy the detect loop from +// detectAll(), and cut it short as soon as a match with a high confidence +// is found. This is something to be done later, after things are otherwise +// working. + CharsetMatch matches[] = detectAll(); + + if (matches == null || matches.length == 0) { + return null; + } + + return matches[0]; + } + + /** + * Return an array of all charsets that appear to be plausible + * matches with the input data. The array is ordered with the + * best quality match first. + *

+ * aise an exception if + *

+ * + * @return An array of CharsetMatch objects representing possibly matching charsets. + * @stable ICU 3.4 + */ + public CharsetMatch[] detectAll() { + ArrayList matches = new ArrayList(); + + MungeInput(); // Strip html markup, collect byte stats. + + // Iterate over all possible charsets, remember all that + // give a match quality > 0. + for (int i = 0; i < ALL_CS_RECOGNIZERS.size(); i++) { + CSRecognizerInfo rcinfo = ALL_CS_RECOGNIZERS.get(i); + boolean active = (fEnabledRecognizers != null) ? fEnabledRecognizers[i] : rcinfo.isDefaultEnabled; + if (active) { + CharsetMatch m = rcinfo.recognizer.match(this); + if (m != null) { + matches.add(m); + } + } + } + Collections.sort(matches); // CharsetMatch compares on confidence + Collections.reverse(matches); // Put best match first. + CharsetMatch[] resultArray = new CharsetMatch[matches.size()]; + resultArray = matches.toArray(resultArray); + return resultArray; + } + + + /** + * Autodetect the charset of an inputStream, and return a Java Reader + * to access the converted input data. + *

+ * This is a convenience method that is equivalent to + * this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getReader(); + *

+ * For the input stream that supplies the character data, markSupported() + * must be true; the charset detection will read a small amount of data, + * then return the stream to its original position via + * the InputStream.reset() operation. The exact amount that will + * be read depends on the characteristics of the data itself. + *

+ * Raise an exception if no charsets appear to match the input data. + * + * @param in The source of the byte data in the unknown charset. + * @param declaredEncoding A declared encoding for the data, if available, + * or null or an empty string if none is available. + * @stable ICU 3.4 + */ + public Reader getReader(InputStream in, String declaredEncoding) { + fDeclaredEncoding = declaredEncoding; + + try { + setText(in); + + CharsetMatch match = detect(); + + if (match == null) { + return null; + } + + return match.getReader(); + } catch (IOException e) { + return null; + } + } + + /** + * Autodetect the charset of an inputStream, and return a String + * containing the converted input data. + *

+ * This is a convenience method that is equivalent to + * this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getString(); + *

+ * Raise an exception if no charsets appear to match the input data. + * + * @param in The source of the byte data in the unknown charset. + * @param declaredEncoding A declared encoding for the data, if available, + * or null or an empty string if none is available. + * @stable ICU 3.4 + */ + public String getString(byte[] in, String declaredEncoding) { + fDeclaredEncoding = declaredEncoding; + + try { + setText(in); + + CharsetMatch match = detect(); + + if (match == null) { + return null; + } + + return match.getString(-1); + } catch (IOException e) { + return null; + } + } + + + /** + * Get the names of all charsets supported by CharsetDetector class. + *

+ * Note: Multiple different charset encodings in a same family may use + * a single shared name in this implementation. For example, this method returns + * an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252" + * (Windows Latin 1). However, actual detection result could be "windows-1252" + * when the input data matches Latin 1 code points with any points only available + * in "windows-1252". + * + * @return an array of the names of all charsets supported by + * CharsetDetector class. + * @stable ICU 3.4 + */ + public static String[] getAllDetectableCharsets() { + String[] allCharsetNames = new String[ALL_CS_RECOGNIZERS.size()]; + for (int i = 0; i < allCharsetNames.length; i++) { + allCharsetNames[i] = ALL_CS_RECOGNIZERS.get(i).recognizer.getName(); + } + return allCharsetNames; + } + + /** + * Test whether or not input filtering is enabled. + * + * @return true if input text will be filtered. + * @stable ICU 3.4 + * @see #enableInputFilter + */ + public boolean inputFilterEnabled() { + return fStripTags; + } + + /** + * Enable filtering of input text. If filtering is enabled, + * text within angle brackets ("<" and ">") will be removed + * before detection. + * + * @param filter true to enable input text filtering. + * @return The previous setting. + * @stable ICU 3.4 + */ + public boolean enableInputFilter(boolean filter) { + boolean previous = fStripTags; + + fStripTags = filter; + + return previous; + } + + /* + * MungeInput - after getting a set of raw input data to be analyzed, preprocess + * it by removing what appears to be html markup. + */ + private void MungeInput() { + int srci = 0; + int dsti = 0; + byte b; + boolean inMarkup = false; + int openTags = 0; + int badTags = 0; + + // + // html / xml markup stripping. + // quick and dirty, not 100% accurate, but hopefully good enough, statistically. + // discard everything within < brackets > + // Count how many total '<' and illegal (nested) '<' occur, so we can make some + // guess as to whether the input was actually marked up at all. + if (fStripTags) { + for (srci = 0; srci < fRawLength && dsti < fInputBytes.length; srci++) { + b = fRawInput[srci]; + if (b == (byte) '<') { + if (inMarkup) { + badTags++; + } + inMarkup = true; + openTags++; + } + + if (!inMarkup) { + fInputBytes[dsti++] = b; + } + + if (b == (byte) '>') { + inMarkup = false; + } + } + + fInputLen = dsti; + } + + // + // If it looks like this input wasn't marked up, or if it looks like it's + // essentially nothing but markup abandon the markup stripping. + // Detection will have to work on the unstripped input. + // + if (openTags < 5 || openTags / 5 < badTags || + (fInputLen < 100 && fRawLength > 600)) { + int limit = fRawLength; + + if (limit > kBufSize) { + limit = kBufSize; + } + + for (srci = 0; srci < limit; srci++) { + fInputBytes[srci] = fRawInput[srci]; + } + fInputLen = srci; + } + + // + // Tally up the byte occurence statistics. + // These are available for use by the various detectors. + // + Arrays.fill(fByteStats, (short) 0); + for (srci = 0; srci < fInputLen; srci++) { + int val = fInputBytes[srci] & 0x00ff; + fByteStats[val]++; + } + + fC1Bytes = false; + for (int i = 0x80; i <= 0x9F; i += 1) { + if (fByteStats[i] != 0) { + fC1Bytes = true; + break; + } + } + } + + /* + * The following items are accessed by individual CharsetRecongizers during + * the recognition process + * + */ + byte[] fInputBytes = // The text to be checked. Markup will have been + new byte[kBufSize]; // removed if appropriate. + + int fInputLen; // Length of the byte data in fInputBytes. + + short fByteStats[] = // byte frequency statistics for the input text. + new short[256]; // Value is percent, not absolute. + // Value is rounded up, so zero really means zero occurences. + + boolean fC1Bytes = // True if any bytes in the range 0x80 - 0x9F are in the input; + false; + + String fDeclaredEncoding; + + + byte[] fRawInput; // Original, untouched input bytes. + // If user gave us a byte array, this is it. + // If user gave us a stream, it's read to a + // buffer here. + int fRawLength; // Length of data in fRawInput array. + + InputStream fInputStream; // User's input stream, or null if the user + // gave us a byte array. + + // + // Stuff private to CharsetDetector + // + private boolean fStripTags = // If true, setText() will strip tags from input text. + false; + + private boolean[] fEnabledRecognizers; // If not null, active set of charset recognizers had + // been changed from the default. The array index is + // corresponding to ALL_RECOGNIZER. See setDetectableCharset(). + + private static class CSRecognizerInfo { + CharsetRecognizer recognizer; + boolean isDefaultEnabled; + + CSRecognizerInfo(CharsetRecognizer recognizer, boolean isDefaultEnabled) { + this.recognizer = recognizer; + this.isDefaultEnabled = isDefaultEnabled; + } + } + + /* + * List of recognizers for all charsets known to the implementation. + */ + private static final List ALL_CS_RECOGNIZERS; + + static { + List list = new ArrayList(); + + list.add(new CSRecognizerInfo(new CharsetRecog_UTF8(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_16_BE(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_16_LE(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_32_BE(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_32_LE(), true)); + + list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_sjis(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_2022.CharsetRecog_2022JP(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_2022.CharsetRecog_2022CN(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_2022.CharsetRecog_2022KR(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_gb_18030(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_euc_jp(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_euc_kr(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_big5(), true)); + + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_1(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_2(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_5_ru(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_6_ar(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_7_el(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_8_I_he(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_8_he(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_windows_1251(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_windows_1256(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_KOI8_R(), true)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_9_tr(), true)); + + // IBM 420/424 recognizers are disabled by default + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM424_he_rtl(), false)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM424_he_ltr(), false)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM420_ar_rtl(), false)); + list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM420_ar_ltr(), false)); + + ALL_CS_RECOGNIZERS = Collections.unmodifiableList(list); + } + + /** + * Get the names of charsets that can be recognized by this CharsetDetector instance. + * + * @return an array of the names of charsets that can be recognized by this CharsetDetector + * instance. + *

+ * {@literal @}internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + public String[] getDetectableCharsets() { + List csnames = new ArrayList(ALL_CS_RECOGNIZERS.size()); + for (int i = 0; i < ALL_CS_RECOGNIZERS.size(); i++) { + CSRecognizerInfo rcinfo = ALL_CS_RECOGNIZERS.get(i); + boolean active = (fEnabledRecognizers == null) ? rcinfo.isDefaultEnabled : fEnabledRecognizers[i]; + if (active) { + csnames.add(rcinfo.recognizer.getName()); + } + } + return csnames.toArray(new String[csnames.size()]); + } + + /** + * Enable or disable individual charset encoding. + * A name of charset encoding must be included in the names returned by + * {@link #getAllDetectableCharsets()}. + * + * @param encoding the name of charset encoding. + * @param enabled true to enable, or false to disable the + * charset encoding. + * @return A reference to this CharsetDetector. + * @throws IllegalArgumentException when the name of charset encoding is + * not supported. + *

+ * {@literal @}internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + public CharsetDetector setDetectableCharset(String encoding, boolean enabled) { + int modIdx = -1; + boolean isDefaultVal = false; + for (int i = 0; i < ALL_CS_RECOGNIZERS.size(); i++) { + CSRecognizerInfo csrinfo = ALL_CS_RECOGNIZERS.get(i); + if (csrinfo.recognizer.getName().equals(encoding)) { + modIdx = i; + isDefaultVal = (csrinfo.isDefaultEnabled == enabled); + break; + } + } + if (modIdx < 0) { + // No matching encoding found + throw new IllegalArgumentException("Invalid encoding: " + "\"" + encoding + "\""); + } + + if (fEnabledRecognizers == null && !isDefaultVal) { + // Create an array storing the non default setting + fEnabledRecognizers = new boolean[ALL_CS_RECOGNIZERS.size()]; + + // Initialize the array with default info + for (int i = 0; i < ALL_CS_RECOGNIZERS.size(); i++) { + fEnabledRecognizers[i] = ALL_CS_RECOGNIZERS.get(i).isDefaultEnabled; + } + } + + if (fEnabledRecognizers != null) { + fEnabledRecognizers[modIdx] = enabled; + } + + return this; + } +} diff --git a/app/src/main/java/io/legado/app/utils/icu4j/CharsetMatch.java b/app/src/main/java/io/legado/app/utils/icu4j/CharsetMatch.java new file mode 100644 index 000000000..0c98e4937 --- /dev/null +++ b/app/src/main/java/io/legado/app/utils/icu4j/CharsetMatch.java @@ -0,0 +1,239 @@ +/* GENERATED SOURCE. DO NOT MODIFY. */ +/** + * ****************************************************************************** + * Copyright (C) 2005-2012, International Business Machines Corporation and * + * others. All Rights Reserved. * + * ****************************************************************************** + */ +package io.legado.app.utils.icu4j; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; + + +/** + * This class represents a charset that has been identified by a CharsetDetector + * as a possible encoding for a set of input data. From an instance of this + * class, you can ask for a confidence level in the charset identification, + * or for Java Reader or String to access the original byte data in Unicode form. + *

+ * Instances of this class are created only by CharsetDetectors. + *

+ * Note: this class has a natural ordering that is inconsistent with equals. + * The natural ordering is based on the match confidence value. + * + * @stable ICU 3.4 + * @hide All android.icu classes are currently hidden + */ +public class CharsetMatch implements Comparable { + + + /** + * Create a java.io.Reader for reading the Unicode character data corresponding + * to the original byte data supplied to the Charset detect operation. + *

+ * CAUTION: if the source of the byte data was an InputStream, a Reader + * can be created for only one matching char set using this method. If more + * than one charset needs to be tried, the caller will need to reset + * the InputStream and create InputStreamReaders itself, based on the charset name. + * + * @return the Reader for the Unicode character data. + * @stable ICU 3.4 + */ + public Reader getReader() { + InputStream inputStream = fInputStream; + + if (inputStream == null) { + inputStream = new ByteArrayInputStream(fRawInput, 0, fRawLength); + } + + try { + inputStream.reset(); + return new InputStreamReader(inputStream, getName()); + } catch (IOException e) { + return null; + } + } + + /** + * Create a Java String from Unicode character data corresponding + * to the original byte data supplied to the Charset detect operation. + * + * @return a String created from the converted input data. + * @stable ICU 3.4 + */ + public String getString() throws IOException { + return getString(-1); + + } + + /** + * Create a Java String from Unicode character data corresponding + * to the original byte data supplied to the Charset detect operation. + * The length of the returned string is limited to the specified size; + * the string will be trunctated to this length if necessary. A limit value of + * zero or less is ignored, and treated as no limit. + * + * @param maxLength The maximium length of the String to be created when the + * source of the data is an input stream, or -1 for + * unlimited length. + * @return a String created from the converted input data. + * @stable ICU 3.4 + */ + public String getString(int maxLength) throws IOException { + String result = null; + if (fInputStream != null) { + StringBuilder sb = new StringBuilder(); + char[] buffer = new char[1024]; + Reader reader = getReader(); + int max = maxLength < 0 ? Integer.MAX_VALUE : maxLength; + int bytesRead = 0; + + while ((bytesRead = reader.read(buffer, 0, Math.min(max, 1024))) >= 0) { + sb.append(buffer, 0, bytesRead); + max -= bytesRead; + } + + reader.close(); + + return sb.toString(); + } else { + String name = getName(); + /* + * getName() may return a name with a suffix 'rtl' or 'ltr'. This cannot + * be used to open a charset (e.g. IBM424_rtl). The ending '_rtl' or 'ltr' + * should be stripped off before creating the string. + */ + int startSuffix = name.indexOf("_rtl") < 0 ? name.indexOf("_ltr") : name.indexOf("_rtl"); + if (startSuffix > 0) { + name = name.substring(0, startSuffix); + } + result = new String(fRawInput, name); + } + return result; + + } + + /** + * Get an indication of the confidence in the charset detected. + * Confidence values range from 0-100, with larger numbers indicating + * a better match of the input data to the characteristics of the + * charset. + * + * @return the confidence in the charset match + * @stable ICU 3.4 + */ + public int getConfidence() { + return fConfidence; + } + + /** + * Get the name of the detected charset. + * The name will be one that can be used with other APIs on the + * platform that accept charset names. It is the "Canonical name" + * as defined by the class java.nio.charset.Charset; for + * charsets that are registered with the IANA charset registry, + * this is the MIME-preferred registerd name. + * + * @return The name of the charset. + * @stable ICU 3.4 + * @see java.nio.charset.Charset + * @see InputStreamReader + */ + public String getName() { + return fCharsetName; + } + + /** + * Get the ISO code for the language of the detected charset. + * + * @return The ISO code for the language or null if the language cannot be determined. + * @stable ICU 3.4 + */ + public String getLanguage() { + return fLang; + } + + /** + * Compare to other CharsetMatch objects. + * Comparison is based on the match confidence value, which + * allows CharsetDetector.detectAll() to order its results. + * + * @param other the CharsetMatch object to compare against. + * @return a negative integer, zero, or a positive integer as the + * confidence level of this CharsetMatch + * is less than, equal to, or greater than that of + * the argument. + * @throws ClassCastException if the argument is not a CharsetMatch. + * @stable ICU 4.4 + */ + public int compareTo(CharsetMatch other) { + int compareResult = 0; + if (this.fConfidence > other.fConfidence) { + compareResult = 1; + } else if (this.fConfidence < other.fConfidence) { + compareResult = -1; + } + return compareResult; + } + + /* + * Constructor. Implementation internal + */ + CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf) { + fConfidence = conf; + + // The references to the original application input data must be copied out + // of the charset recognizer to here, in case the application resets the + // recognizer before using this CharsetMatch. + if (det.fInputStream == null) { + // We only want the existing input byte data if it came straight from the user, + // not if is just the head of a stream. + fRawInput = det.fRawInput; + fRawLength = det.fRawLength; + } + fInputStream = det.fInputStream; + fCharsetName = rec.getName(); + fLang = rec.getLanguage(); + } + + /* + * Constructor. Implementation internal + */ + CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf, String csName, String lang) { + fConfidence = conf; + + // The references to the original application input data must be copied out + // of the charset recognizer to here, in case the application resets the + // recognizer before using this CharsetMatch. + if (det.fInputStream == null) { + // We only want the existing input byte data if it came straight from the user, + // not if is just the head of a stream. + fRawInput = det.fRawInput; + fRawLength = det.fRawLength; + } + fInputStream = det.fInputStream; + fCharsetName = csName; + fLang = lang; + } + + + // + // Private Data + // + private int fConfidence; + private byte[] fRawInput = null; // Original, untouched input bytes. + // If user gave us a byte array, this is it. + private int fRawLength; // Length of data in fRawInput array. + + private InputStream fInputStream = null; // User's input stream, or null if the user + // gave us a byte array. + + private String fCharsetName; // The name of the charset this CharsetMatch + // represents. Filled in by the recognizer. + private String fLang; // The language, if one was determined by + // the recognizer during the detect operation. +} diff --git a/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_2022.java b/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_2022.java new file mode 100644 index 000000000..838bcd467 --- /dev/null +++ b/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_2022.java @@ -0,0 +1,164 @@ +/* GENERATED SOURCE. DO NOT MODIFY. */ +/* + ******************************************************************************* + * Copyright (C) 2005 - 2012, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package io.legado.app.utils.icu4j; + + +/** + * class CharsetRecog_2022 part of the ICU charset detection imlementation. + * This is a superclass for the individual detectors for + * each of the detectable members of the ISO 2022 family + * of encodings. + *

+ * The separate classes are nested within this class. + */ +abstract class CharsetRecog_2022 extends CharsetRecognizer { + + + /** + * Matching function shared among the 2022 detectors JP, CN and KR + * Counts up the number of legal an unrecognized escape sequences in + * the sample of text, and computes a score based on the total number & + * the proportion that fit the encoding. + * + * @param text the byte buffer containing text to analyse + * @param textLen the size of the text in the byte. + * @param escapeSequences the byte escape sequences to test for. + * @return match quality, in the range of 0-100. + */ + int match(byte[] text, int textLen, byte[][] escapeSequences) { + int i, j; + int escN; + int hits = 0; + int misses = 0; + int shifts = 0; + int quality; + scanInput: + for (i = 0; i < textLen; i++) { + if (text[i] == 0x1b) { + checkEscapes: + for (escN = 0; escN < escapeSequences.length; escN++) { + byte[] seq = escapeSequences[escN]; + + if ((textLen - i) < seq.length) { + continue checkEscapes; + } + + for (j = 1; j < seq.length; j++) { + if (seq[j] != text[i + j]) { + continue checkEscapes; + } + } + + hits++; + i += seq.length - 1; + continue scanInput; + } + + misses++; + } + + if (text[i] == 0x0e || text[i] == 0x0f) { + // Shift in/out + shifts++; + } + } + + if (hits == 0) { + return 0; + } + + // + // Initial quality is based on relative proportion of recongized vs. + // unrecognized escape sequences. + // All good: quality = 100; + // half or less good: quality = 0; + // linear inbetween. + quality = (100 * hits - 100 * misses) / (hits + misses); + + // Back off quality if there were too few escape sequences seen. + // Include shifts in this computation, so that KR does not get penalized + // for having only a single Escape sequence, but many shifts. + if (hits + shifts < 5) { + quality -= (5 - (hits + shifts)) * 10; + } + + if (quality < 0) { + quality = 0; + } + return quality; + } + + + static class CharsetRecog_2022JP extends CharsetRecog_2022 { + private byte[][] escapeSequences = { + {0x1b, 0x24, 0x28, 0x43}, // KS X 1001:1992 + {0x1b, 0x24, 0x28, 0x44}, // JIS X 212-1990 + {0x1b, 0x24, 0x40}, // JIS C 6226-1978 + {0x1b, 0x24, 0x41}, // GB 2312-80 + {0x1b, 0x24, 0x42}, // JIS X 208-1983 + {0x1b, 0x26, 0x40}, // JIS X 208 1990, 1997 + {0x1b, 0x28, 0x42}, // ASCII + {0x1b, 0x28, 0x48}, // JIS-Roman + {0x1b, 0x28, 0x49}, // Half-width katakana + {0x1b, 0x28, 0x4a}, // JIS-Roman + {0x1b, 0x2e, 0x41}, // ISO 8859-1 + {0x1b, 0x2e, 0x46} // ISO 8859-7 + }; + + String getName() { + return "ISO-2022-JP"; + } + + CharsetMatch match(CharsetDetector det) { + int confidence = match(det.fInputBytes, det.fInputLen, escapeSequences); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + } + + static class CharsetRecog_2022KR extends CharsetRecog_2022 { + private byte[][] escapeSequences = { + {0x1b, 0x24, 0x29, 0x43} + }; + + String getName() { + return "ISO-2022-KR"; + } + + CharsetMatch match(CharsetDetector det) { + int confidence = match(det.fInputBytes, det.fInputLen, escapeSequences); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + } + + static class CharsetRecog_2022CN extends CharsetRecog_2022 { + private byte[][] escapeSequences = { + {0x1b, 0x24, 0x29, 0x41}, // GB 2312-80 + {0x1b, 0x24, 0x29, 0x47}, // CNS 11643-1992 Plane 1 + {0x1b, 0x24, 0x2A, 0x48}, // CNS 11643-1992 Plane 2 + {0x1b, 0x24, 0x29, 0x45}, // ISO-IR-165 + {0x1b, 0x24, 0x2B, 0x49}, // CNS 11643-1992 Plane 3 + {0x1b, 0x24, 0x2B, 0x4A}, // CNS 11643-1992 Plane 4 + {0x1b, 0x24, 0x2B, 0x4B}, // CNS 11643-1992 Plane 5 + {0x1b, 0x24, 0x2B, 0x4C}, // CNS 11643-1992 Plane 6 + {0x1b, 0x24, 0x2B, 0x4D}, // CNS 11643-1992 Plane 7 + {0x1b, 0x4e}, // SS2 + {0x1b, 0x4f}, // SS3 + }; + + String getName() { + return "ISO-2022-CN"; + } + + CharsetMatch match(CharsetDetector det) { + int confidence = match(det.fInputBytes, det.fInputLen, escapeSequences); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + } + +} + diff --git a/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_UTF8.java b/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_UTF8.java new file mode 100644 index 000000000..da648141d --- /dev/null +++ b/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_UTF8.java @@ -0,0 +1,97 @@ +/* GENERATED SOURCE. DO NOT MODIFY. */ +/** + * ****************************************************************************** + * Copyright (C) 2005 - 2014, International Business Machines Corporation and * + * others. All Rights Reserved. * + * ****************************************************************************** + */ +package io.legado.app.utils.icu4j; + + +/** + * Charset recognizer for UTF-8 + */ +class CharsetRecog_UTF8 extends CharsetRecognizer { + + String getName() { + return "UTF-8"; + } + + /* (non-Javadoc) + * @see com.ibm.icu.text.CharsetRecognizer#match(com.ibm.icu.text.CharsetDetector) + */ + CharsetMatch match(CharsetDetector det) { + boolean hasBOM = false; + int numValid = 0; + int numInvalid = 0; + byte input[] = det.fRawInput; + int i; + int trailBytes = 0; + int confidence; + + if (det.fRawLength >= 3 && + (input[0] & 0xFF) == 0xef && (input[1] & 0xFF) == 0xbb && (input[2] & 0xFF) == 0xbf) { + hasBOM = true; + } + + // Scan for multi-byte sequences + for (i = 0; i < det.fRawLength; i++) { + int b = input[i]; + if ((b & 0x80) == 0) { + continue; // ASCII + } + + // Hi bit on char found. Figure out how long the sequence should be + if ((b & 0x0e0) == 0x0c0) { + trailBytes = 1; + } else if ((b & 0x0f0) == 0x0e0) { + trailBytes = 2; + } else if ((b & 0x0f8) == 0xf0) { + trailBytes = 3; + } else { + numInvalid++; + continue; + } + + // Verify that we've got the right number of trail bytes in the sequence + for (; ; ) { + i++; + if (i >= det.fRawLength) { + break; + } + b = input[i]; + if ((b & 0xc0) != 0x080) { + numInvalid++; + break; + } + if (--trailBytes == 0) { + numValid++; + break; + } + } + } + + // Cook up some sort of confidence score, based on presense of a BOM + // and the existence of valid and/or invalid multi-byte sequences. + confidence = 0; + if (hasBOM && numInvalid == 0) { + confidence = 100; + } else if (hasBOM && numValid > numInvalid * 10) { + confidence = 80; + } else if (numValid > 3 && numInvalid == 0) { + confidence = 100; + } else if (numValid > 0 && numInvalid == 0) { + confidence = 80; + } else if (numValid == 0 && numInvalid == 0) { + // Plain ASCII. Confidence must be > 10, it's more likely than UTF-16, which + // accepts ASCII with confidence = 10. + // TODO: add plain ASCII as an explicitly detected type. + confidence = 15; + } else if (numValid > numInvalid * 10) { + // Probably corruput utf-8 data. Valid sequences aren't likely by chance. + confidence = 25; + } + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + +} diff --git a/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_Unicode.java b/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_Unicode.java new file mode 100644 index 000000000..52da0a99f --- /dev/null +++ b/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_Unicode.java @@ -0,0 +1,186 @@ +/* GENERATED SOURCE. DO NOT MODIFY. */ +/* + ******************************************************************************* + * Copyright (C) 1996-2013, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + */ + +package io.legado.app.utils.icu4j; + + +/** + * This class matches UTF-16 and UTF-32, both big- and little-endian. The + * BOM will be used if it is present. + */ +abstract class CharsetRecog_Unicode extends CharsetRecognizer { + + /* (non-Javadoc) + * @see com.ibm.icu.text.CharsetRecognizer#getName() + */ + abstract String getName(); + + /* (non-Javadoc) + * @see com.ibm.icu.text.CharsetRecognizer#match(com.ibm.icu.text.CharsetDetector) + */ + abstract CharsetMatch match(CharsetDetector det); + + static int codeUnit16FromBytes(byte hi, byte lo) { + return ((hi & 0xff) << 8) | (lo & 0xff); + } + + // UTF-16 confidence calculation. Very simple minded, but better than nothing. + // Any 8 bit non-control characters bump the confidence up. These have a zero high byte, + // and are very likely to be UTF-16, although they could also be part of a UTF-32 code. + // NULs are a contra-indication, they will appear commonly if the actual encoding is UTF-32. + // NULs should be rare in actual text. + static int adjustConfidence(int codeUnit, int confidence) { + if (codeUnit == 0) { + confidence -= 10; + } else if ((codeUnit >= 0x20 && codeUnit <= 0xff) || codeUnit == 0x0a) { + confidence += 10; + } + if (confidence < 0) { + confidence = 0; + } else if (confidence > 100) { + confidence = 100; + } + return confidence; + } + + static class CharsetRecog_UTF_16_BE extends CharsetRecog_Unicode { + String getName() { + return "UTF-16BE"; + } + + CharsetMatch match(CharsetDetector det) { + byte[] input = det.fRawInput; + int confidence = 10; + + int bytesToCheck = Math.min(input.length, 30); + for (int charIndex = 0; charIndex < bytesToCheck - 1; charIndex += 2) { + int codeUnit = codeUnit16FromBytes(input[charIndex], input[charIndex + 1]); + if (charIndex == 0 && codeUnit == 0xFEFF) { + confidence = 100; + break; + } + confidence = adjustConfidence(codeUnit, confidence); + if (confidence == 0 || confidence == 100) { + break; + } + } + if (bytesToCheck < 4 && confidence < 100) { + confidence = 0; + } + if (confidence > 0) { + return new CharsetMatch(det, this, confidence); + } + return null; + } + } + + static class CharsetRecog_UTF_16_LE extends CharsetRecog_Unicode { + String getName() { + return "UTF-16LE"; + } + + CharsetMatch match(CharsetDetector det) { + byte[] input = det.fRawInput; + int confidence = 10; + + int bytesToCheck = Math.min(input.length, 30); + for (int charIndex = 0; charIndex < bytesToCheck - 1; charIndex += 2) { + int codeUnit = codeUnit16FromBytes(input[charIndex + 1], input[charIndex]); + if (charIndex == 0 && codeUnit == 0xFEFF) { + confidence = 100; + break; + } + confidence = adjustConfidence(codeUnit, confidence); + if (confidence == 0 || confidence == 100) { + break; + } + } + if (bytesToCheck < 4 && confidence < 100) { + confidence = 0; + } + if (confidence > 0) { + return new CharsetMatch(det, this, confidence); + } + return null; + } + } + + static abstract class CharsetRecog_UTF_32 extends CharsetRecog_Unicode { + abstract int getChar(byte[] input, int index); + + abstract String getName(); + + CharsetMatch match(CharsetDetector det) { + byte[] input = det.fRawInput; + int limit = (det.fRawLength / 4) * 4; + int numValid = 0; + int numInvalid = 0; + boolean hasBOM = false; + int confidence = 0; + + if (limit == 0) { + return null; + } + if (getChar(input, 0) == 0x0000FEFF) { + hasBOM = true; + } + + for (int i = 0; i < limit; i += 4) { + int ch = getChar(input, i); + + if (ch < 0 || ch >= 0x10FFFF || (ch >= 0xD800 && ch <= 0xDFFF)) { + numInvalid += 1; + } else { + numValid += 1; + } + } + + + // Cook up some sort of confidence score, based on presence of a BOM + // and the existence of valid and/or invalid multi-byte sequences. + if (hasBOM && numInvalid == 0) { + confidence = 100; + } else if (hasBOM && numValid > numInvalid * 10) { + confidence = 80; + } else if (numValid > 3 && numInvalid == 0) { + confidence = 100; + } else if (numValid > 0 && numInvalid == 0) { + confidence = 80; + } else if (numValid > numInvalid * 10) { + // Probably corrupt UTF-32BE data. Valid sequences aren't likely by chance. + confidence = 25; + } + + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + } + + static class CharsetRecog_UTF_32_BE extends CharsetRecog_UTF_32 { + int getChar(byte[] input, int index) { + return (input[index + 0] & 0xFF) << 24 | (input[index + 1] & 0xFF) << 16 | + (input[index + 2] & 0xFF) << 8 | (input[index + 3] & 0xFF); + } + + String getName() { + return "UTF-32BE"; + } + } + + + static class CharsetRecog_UTF_32_LE extends CharsetRecog_UTF_32 { + int getChar(byte[] input, int index) { + return (input[index + 3] & 0xFF) << 24 | (input[index + 2] & 0xFF) << 16 | + (input[index + 1] & 0xFF) << 8 | (input[index + 0] & 0xFF); + } + + String getName() { + return "UTF-32LE"; + } + } +} diff --git a/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_mbcs.java b/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_mbcs.java new file mode 100644 index 000000000..2881b8fec --- /dev/null +++ b/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_mbcs.java @@ -0,0 +1,540 @@ +/* GENERATED SOURCE. DO NOT MODIFY. */ +/* + **************************************************************************** + * Copyright (C) 2005-2012, International Business Machines Corporation and * + * others. All Rights Reserved. * + **************************************************************************** + * + */ +package io.legado.app.utils.icu4j; + + +import java.util.Arrays; + +/** + * CharsetRecognizer implemenation for Asian - double or multi-byte - charsets. + * Match is determined mostly by the input data adhering to the + * encoding scheme for the charset, and, optionally, + * frequency-of-occurence of characters. + *

+ * Instances of this class are singletons, one per encoding + * being recognized. They are created in the main + * CharsetDetector class and kept in the global list of available + * encodings to be checked. The specific encoding being recognized + * is determined by subclass. + */ +abstract class CharsetRecog_mbcs extends CharsetRecognizer { + + /** + * Get the IANA name of this charset. + * + * @return the charset name. + */ + abstract String getName(); + + + /** + * Test the match of this charset with the input text data + * which is obtained via the CharsetDetector object. + * + * @param det The CharsetDetector, which contains the input text + * to be checked for being in this charset. + * @return Two values packed into one int (Damn java, anyhow) + *
+ * bits 0-7: the match confidence, ranging from 0-100 + *
+ * bits 8-15: The match reason, an enum-like value. + */ + int match(CharsetDetector det, int[] commonChars) { + @SuppressWarnings("unused") + int singleByteCharCount = 0; //TODO Do we really need this? + int doubleByteCharCount = 0; + int commonCharCount = 0; + int badCharCount = 0; + int totalCharCount = 0; + int confidence = 0; + iteratedChar iter = new iteratedChar(); + + detectBlock: + { + for (iter.reset(); nextChar(iter, det); ) { + totalCharCount++; + if (iter.error) { + badCharCount++; + } else { + long cv = iter.charValue & 0xFFFFFFFFL; + + if (cv <= 0xff) { + singleByteCharCount++; + } else { + doubleByteCharCount++; + if (commonChars != null) { + // NOTE: This assumes that there are no 4-byte common chars. + if (Arrays.binarySearch(commonChars, (int) cv) >= 0) { + commonCharCount++; + } + } + } + } + if (badCharCount >= 2 && badCharCount * 5 >= doubleByteCharCount) { + // Bail out early if the byte data is not matching the encoding scheme. + break detectBlock; + } + } + + if (doubleByteCharCount <= 10 && badCharCount == 0) { + // Not many multi-byte chars. + if (doubleByteCharCount == 0 && totalCharCount < 10) { + // There weren't any multibyte sequences, and there was a low density of non-ASCII single bytes. + // We don't have enough data to have any confidence. + // Statistical analysis of single byte non-ASCII charcters would probably help here. + confidence = 0; + } else { + // ASCII or ISO file? It's probably not our encoding, + // but is not incompatible with our encoding, so don't give it a zero. + confidence = 10; + } + + break detectBlock; + } + + // + // No match if there are too many characters that don't fit the encoding scheme. + // (should we have zero tolerance for these?) + // + if (doubleByteCharCount < 20 * badCharCount) { + confidence = 0; + break detectBlock; + } + + if (commonChars == null) { + // We have no statistics on frequently occuring characters. + // Assess confidence purely on having a reasonable number of + // multi-byte characters (the more the better + confidence = 30 + doubleByteCharCount - 20 * badCharCount; + if (confidence > 100) { + confidence = 100; + } + } else { + // + // Frequency of occurence statistics exist. + // + double maxVal = Math.log((float) doubleByteCharCount / 4); + double scaleFactor = 90.0 / maxVal; + confidence = (int) (Math.log(commonCharCount + 1) * scaleFactor + 10); + confidence = Math.min(confidence, 100); + } + } // end of detectBlock: + + return confidence; + } + + // "Character" iterated character class. + // Recognizers for specific mbcs encodings make their "characters" available + // by providing a nextChar() function that fills in an instance of iteratedChar + // with the next char from the input. + // The returned characters are not converted to Unicode, but remain as the raw + // bytes (concatenated into an int) from the codepage data. + // + // For Asian charsets, use the raw input rather than the input that has been + // stripped of markup. Detection only considers multi-byte chars, effectively + // stripping markup anyway, and double byte chars do occur in markup too. + // + static class iteratedChar { + int charValue = 0; // 1-4 bytes from the raw input data + int index = 0; + int nextIndex = 0; + boolean error = false; + boolean done = false; + + void reset() { + charValue = 0; + index = -1; + nextIndex = 0; + error = false; + done = false; + } + + int nextByte(CharsetDetector det) { + if (nextIndex >= det.fRawLength) { + done = true; + return -1; + } + int byteValue = (int) det.fRawInput[nextIndex++] & 0x00ff; + return byteValue; + } + } + + /** + * Get the next character (however many bytes it is) from the input data + * Subclasses for specific charset encodings must implement this function + * to get characters according to the rules of their encoding scheme. + *

+ * This function is not a method of class iteratedChar only because + * that would require a lot of extra derived classes, which is awkward. + * + * @param it The iteratedChar "struct" into which the returned char is placed. + * @param det The charset detector, which is needed to get at the input byte data + * being iterated over. + * @return True if a character was returned, false at end of input. + */ + abstract boolean nextChar(iteratedChar it, CharsetDetector det); + + + /** + * Shift-JIS charset recognizer. + */ + static class CharsetRecog_sjis extends CharsetRecog_mbcs { + static int[] commonChars = + // TODO: This set of data comes from the character frequency- + // of-occurence analysis tool. The data needs to be moved + // into a resource and loaded from there. + {0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0, + 0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5, + 0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc, + 0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341, + 0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389, + 0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa}; + + boolean nextChar(iteratedChar it, CharsetDetector det) { + it.index = it.nextIndex; + it.error = false; + int firstByte; + firstByte = it.charValue = it.nextByte(det); + if (firstByte < 0) { + return false; + } + + if (firstByte <= 0x7f || (firstByte > 0xa0 && firstByte <= 0xdf)) { + return true; + } + + int secondByte = it.nextByte(det); + if (secondByte < 0) { + return false; + } + it.charValue = (firstByte << 8) | secondByte; + if (!((secondByte >= 0x40 && secondByte <= 0x7f) || (secondByte >= 0x80 && secondByte <= 0xff))) { + // Illegal second byte value. + it.error = true; + } + return true; + } + + CharsetMatch match(CharsetDetector det) { + int confidence = match(det, commonChars); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + + String getName() { + return "Shift_JIS"; + } + + public String getLanguage() { + return "ja"; + } + + + } + + + /** + * Big5 charset recognizer. + */ + static class CharsetRecog_big5 extends CharsetRecog_mbcs { + static int[] commonChars = + // TODO: This set of data comes from the character frequency- + // of-occurence analysis tool. The data needs to be moved + // into a resource and loaded from there. + {0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446, + 0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3, + 0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548, + 0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8, + 0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da, + 0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3, + 0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59, + 0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c, + 0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44, + 0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f}; + + boolean nextChar(iteratedChar it, CharsetDetector det) { + it.index = it.nextIndex; + it.error = false; + int firstByte; + firstByte = it.charValue = it.nextByte(det); + if (firstByte < 0) { + return false; + } + + if (firstByte <= 0x7f || firstByte == 0xff) { + // single byte character. + return true; + } + + int secondByte = it.nextByte(det); + if (secondByte < 0) { + return false; + } + it.charValue = (it.charValue << 8) | secondByte; + + if (secondByte < 0x40 || + secondByte == 0x7f || + secondByte == 0xff) { + it.error = true; + } + return true; + } + + CharsetMatch match(CharsetDetector det) { + int confidence = match(det, commonChars); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + + String getName() { + return "Big5"; + } + + + public String getLanguage() { + return "zh"; + } + } + + + /** + * EUC charset recognizers. One abstract class that provides the common function + * for getting the next character according to the EUC encoding scheme, + * and nested derived classes for EUC_KR, EUC_JP, EUC_CN. + */ + abstract static class CharsetRecog_euc extends CharsetRecog_mbcs { + + /* + * (non-Javadoc) + * Get the next character value for EUC based encodings. + * Character "value" is simply the raw bytes that make up the character + * packed into an int. + */ + boolean nextChar(iteratedChar it, CharsetDetector det) { + it.index = it.nextIndex; + it.error = false; + int firstByte = 0; + int secondByte = 0; + int thirdByte = 0; + //int fourthByte = 0; + + buildChar: + { + firstByte = it.charValue = it.nextByte(det); + if (firstByte < 0) { + // Ran off the end of the input data + it.done = true; + break buildChar; + } + if (firstByte <= 0x8d) { + // single byte char + break buildChar; + } + + secondByte = it.nextByte(det); + it.charValue = (it.charValue << 8) | secondByte; + + if (firstByte >= 0xA1 && firstByte <= 0xfe) { + // Two byte Char + if (secondByte < 0xa1) { + it.error = true; + } + break buildChar; + } + if (firstByte == 0x8e) { + // Code Set 2. + // In EUC-JP, total char size is 2 bytes, only one byte of actual char value. + // In EUC-TW, total char size is 4 bytes, three bytes contribute to char value. + // We don't know which we've got. + // Treat it like EUC-JP. If the data really was EUC-TW, the following two + // bytes will look like a well formed 2 byte char. + if (secondByte < 0xa1) { + it.error = true; + } + break buildChar; + } + + if (firstByte == 0x8f) { + // Code set 3. + // Three byte total char size, two bytes of actual char value. + thirdByte = it.nextByte(det); + it.charValue = (it.charValue << 8) | thirdByte; + if (thirdByte < 0xa1) { + it.error = true; + } + } + } + + return (it.done == false); + } + + /** + * The charset recognize for EUC-JP. A singleton instance of this class + * is created and kept by the public CharsetDetector class + */ + static class CharsetRecog_euc_jp extends CharsetRecog_euc { + static int[] commonChars = + // TODO: This set of data comes from the character frequency- + // of-occurence analysis tool. The data needs to be moved + // into a resource and loaded from there. + {0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2, + 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3, + 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4, + 0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de, + 0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef, + 0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af, + 0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7, + 0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1, + 0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee, + 0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1}; + + String getName() { + return "EUC-JP"; + } + + CharsetMatch match(CharsetDetector det) { + int confidence = match(det, commonChars); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + + public String getLanguage() { + return "ja"; + } + } + + /** + * The charset recognize for EUC-KR. A singleton instance of this class + * is created and kept by the public CharsetDetector class + */ + static class CharsetRecog_euc_kr extends CharsetRecog_euc { + static int[] commonChars = + // TODO: This set of data comes from the character frequency- + // of-occurence analysis tool. The data needs to be moved + // into a resource and loaded from there. + {0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc, + 0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9, + 0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce, + 0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce, + 0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba, + 0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee, + 0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7, + 0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6, + 0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6, + 0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad}; + + String getName() { + return "EUC-KR"; + } + + CharsetMatch match(CharsetDetector det) { + int confidence = match(det, commonChars); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + + public String getLanguage() { + return "ko"; + } + } + } + + /** + * GB-18030 recognizer. Uses simplified Chinese statistics. + */ + static class CharsetRecog_gb_18030 extends CharsetRecog_mbcs { + + /* + * (non-Javadoc) + * Get the next character value for EUC based encodings. + * Character "value" is simply the raw bytes that make up the character + * packed into an int. + */ + boolean nextChar(iteratedChar it, CharsetDetector det) { + it.index = it.nextIndex; + it.error = false; + int firstByte = 0; + int secondByte = 0; + int thirdByte = 0; + int fourthByte = 0; + + buildChar: + { + firstByte = it.charValue = it.nextByte(det); + + if (firstByte < 0) { + // Ran off the end of the input data + it.done = true; + break buildChar; + } + + if (firstByte <= 0x80) { + // single byte char + break buildChar; + } + + secondByte = it.nextByte(det); + it.charValue = (it.charValue << 8) | secondByte; + + if (firstByte >= 0x81 && firstByte <= 0xFE) { + // Two byte Char + if ((secondByte >= 0x40 && secondByte <= 0x7E) || (secondByte >= 80 && secondByte <= 0xFE)) { + break buildChar; + } + + // Four byte char + if (secondByte >= 0x30 && secondByte <= 0x39) { + thirdByte = it.nextByte(det); + + if (thirdByte >= 0x81 && thirdByte <= 0xFE) { + fourthByte = it.nextByte(det); + + if (fourthByte >= 0x30 && fourthByte <= 0x39) { + it.charValue = (it.charValue << 16) | (thirdByte << 8) | fourthByte; + break buildChar; + } + } + } + + it.error = true; + break buildChar; + } + } + + return (it.done == false); + } + + static int[] commonChars = + // TODO: This set of data comes from the character frequency- + // of-occurence analysis tool. The data needs to be moved + // into a resource and loaded from there. + {0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac, + 0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4, + 0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4, + 0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6, + 0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6, + 0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7, + 0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7, + 0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5, + 0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2, + 0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0}; + + + String getName() { + return "GB18030"; + } + + CharsetMatch match(CharsetDetector det) { + int confidence = match(det, commonChars); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + + public String getLanguage() { + return "zh"; + } + } + + +} diff --git a/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_sbcs.java b/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_sbcs.java new file mode 100644 index 000000000..c84eada05 --- /dev/null +++ b/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecog_sbcs.java @@ -0,0 +1,1144 @@ +/* GENERATED SOURCE. DO NOT MODIFY. */ +/* + **************************************************************************** + * Copyright (C) 2005-2013, International Business Machines Corporation and * + * others. All Rights Reserved. * + ************************************************************************** * + * + */ + +package io.legado.app.utils.icu4j; + + +/** + * This class recognizes single-byte encodings. Because the encoding scheme is so + * simple, language statistics are used to do the matching. + */ +abstract class CharsetRecog_sbcs extends CharsetRecognizer { + + /* (non-Javadoc) + * @see com.ibm.icu.text.CharsetRecognizer#getName() + */ + abstract String getName(); + + static class NGramParser { + // private static final int N_GRAM_SIZE = 3; + private static final int N_GRAM_MASK = 0xFFFFFF; + + protected int byteIndex = 0; + private int ngram = 0; + + private int[] ngramList; + protected byte[] byteMap; + + private int ngramCount; + private int hitCount; + + protected byte spaceChar; + + public NGramParser(int[] theNgramList, byte[] theByteMap) { + ngramList = theNgramList; + byteMap = theByteMap; + + ngram = 0; + + ngramCount = hitCount = 0; + } + + /* + * Binary search for value in table, which must have exactly 64 entries. + */ + private static int search(int[] table, int value) { + int index = 0; + + if (table[index + 32] <= value) { + index += 32; + } + + if (table[index + 16] <= value) { + index += 16; + } + + if (table[index + 8] <= value) { + index += 8; + } + + if (table[index + 4] <= value) { + index += 4; + } + + if (table[index + 2] <= value) { + index += 2; + } + + if (table[index + 1] <= value) { + index += 1; + } + + if (table[index] > value) { + index -= 1; + } + + if (index < 0 || table[index] != value) { + return -1; + } + + return index; + } + + private void lookup(int thisNgram) { + ngramCount += 1; + + if (search(ngramList, thisNgram) >= 0) { + hitCount += 1; + } + + } + + protected void addByte(int b) { + ngram = ((ngram << 8) + (b & 0xFF)) & N_GRAM_MASK; + lookup(ngram); + } + + private int nextByte(CharsetDetector det) { + if (byteIndex >= det.fInputLen) { + return -1; + } + + return det.fInputBytes[byteIndex++] & 0xFF; + } + + protected void parseCharacters(CharsetDetector det) { + int b; + boolean ignoreSpace = false; + + while ((b = nextByte(det)) >= 0) { + byte mb = byteMap[b]; + + // TODO: 0x20 might not be a space in all character sets... + if (mb != 0) { + if (!(mb == spaceChar && ignoreSpace)) { + addByte(mb); + } + + ignoreSpace = (mb == spaceChar); + } + } + + } + + public int parse(CharsetDetector det) { + return parse(det, (byte) 0x20); + } + + public int parse(CharsetDetector det, byte spaceCh) { + + this.spaceChar = spaceCh; + + parseCharacters(det); + + // TODO: Is this OK? The buffer could have ended in the middle of a word... + addByte(spaceChar); + + double rawPercent = (double) hitCount / (double) ngramCount; + +// if (rawPercent <= 2.0) { +// return 0; +// } + + // TODO - This is a bit of a hack to take care of a case + // were we were getting a confidence of 135... + if (rawPercent > 0.33) { + return 98; + } + + return (int) (rawPercent * 300.0); + } + } + + static class NGramParser_IBM420 extends NGramParser { + private byte alef = 0x00; + + protected static byte[] unshapeMap = { +/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ +/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 4- */ (byte) 0x40, (byte) 0x40, (byte) 0x42, (byte) 0x42, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x47, (byte) 0x49, (byte) 0x4A, (byte) 0x4B, (byte) 0x4C, (byte) 0x4D, (byte) 0x4E, (byte) 0x4F, +/* 5- */ (byte) 0x50, (byte) 0x49, (byte) 0x52, (byte) 0x53, (byte) 0x54, (byte) 0x55, (byte) 0x56, (byte) 0x56, (byte) 0x58, (byte) 0x58, (byte) 0x5A, (byte) 0x5B, (byte) 0x5C, (byte) 0x5D, (byte) 0x5E, (byte) 0x5F, +/* 6- */ (byte) 0x60, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x63, (byte) 0x65, (byte) 0x65, (byte) 0x67, (byte) 0x67, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, +/* 7- */ (byte) 0x69, (byte) 0x71, (byte) 0x71, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, (byte) 0x77, (byte) 0x79, (byte) 0x7A, (byte) 0x7B, (byte) 0x7C, (byte) 0x7D, (byte) 0x7E, (byte) 0x7F, +/* 8- */ (byte) 0x80, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x80, (byte) 0x8B, (byte) 0x8B, (byte) 0x8D, (byte) 0x8D, (byte) 0x8F, +/* 9- */ (byte) 0x90, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9A, (byte) 0x9A, (byte) 0x9A, (byte) 0x9E, (byte) 0x9E, +/* A- */ (byte) 0x9E, (byte) 0xA1, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x9E, (byte) 0xAB, (byte) 0xAB, (byte) 0xAD, (byte) 0xAD, (byte) 0xAF, +/* B- */ (byte) 0xAF, (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0xB6, (byte) 0xB7, (byte) 0xB8, (byte) 0xB9, (byte) 0xB1, (byte) 0xBB, (byte) 0xBB, (byte) 0xBD, (byte) 0xBD, (byte) 0xBF, +/* C- */ (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xBF, (byte) 0xCC, (byte) 0xBF, (byte) 0xCE, (byte) 0xCF, +/* D- */ (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDA, (byte) 0xDC, (byte) 0xDC, (byte) 0xDC, (byte) 0xDF, +/* E- */ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, +/* F- */ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, + }; + + + public NGramParser_IBM420(int[] theNgramList, byte[] theByteMap) { + super(theNgramList, theByteMap); + } + + private byte isLamAlef(byte b) { + if (b == (byte) 0xb2 || b == (byte) 0xb3) { + return (byte) 0x47; + } else if (b == (byte) 0xb4 || b == (byte) 0xb5) { + return (byte) 0x49; + } else if (b == (byte) 0xb8 || b == (byte) 0xb9) { + return (byte) 0x56; + } else + return (byte) 0x00; + } + + /* + * Arabic shaping needs to be done manually. Cannot call ArabicShaping class + * because CharsetDetector is dealing with bytes not Unicode code points. We could + * convert the bytes to Unicode code points but that would leave us dependent + * on CharsetICU which we try to avoid. IBM420 converter amongst different versions + * of JDK can produce different results and therefore is also avoided. + */ + private int nextByte(CharsetDetector det) { + if (byteIndex >= det.fInputLen || det.fInputBytes[byteIndex] == 0) { + return -1; + } + int next; + + alef = isLamAlef(det.fInputBytes[byteIndex]); + if (alef != (byte) 0x00) + next = 0xB1 & 0xFF; + else + next = unshapeMap[det.fInputBytes[byteIndex] & 0xFF] & 0xFF; + + byteIndex++; + + return next; + } + + protected void parseCharacters(CharsetDetector det) { + int b; + boolean ignoreSpace = false; + + while ((b = nextByte(det)) >= 0) { + byte mb = byteMap[b]; + + // TODO: 0x20 might not be a space in all character sets... + if (mb != 0) { + if (!(mb == spaceChar && ignoreSpace)) { + addByte(mb); + } + + ignoreSpace = (mb == spaceChar); + } + if (alef != (byte) 0x00) { + mb = byteMap[alef & 0xFF]; + + // TODO: 0x20 might not be a space in all character sets... + if (mb != 0) { + if (!(mb == spaceChar && ignoreSpace)) { + addByte(mb); + } + + ignoreSpace = (mb == spaceChar); + } + + } + } + } + } + + + int match(CharsetDetector det, int[] ngrams, byte[] byteMap) { + return match(det, ngrams, byteMap, (byte) 0x20); + } + + int match(CharsetDetector det, int[] ngrams, byte[] byteMap, byte spaceChar) { + NGramParser parser = new NGramParser(ngrams, byteMap); + return parser.parse(det, spaceChar); + } + + int matchIBM420(CharsetDetector det, int[] ngrams, byte[] byteMap, byte spaceChar) { + NGramParser_IBM420 parser = new NGramParser_IBM420(ngrams, byteMap); + return parser.parse(det, spaceChar); + } + + static class NGramsPlusLang { + int[] fNGrams; + String fLang; + + NGramsPlusLang(String la, int[] ng) { + fLang = la; + fNGrams = ng; + } + } + + static class CharsetRecog_8859_1 extends CharsetRecog_sbcs { + protected static byte[] byteMap = { + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, + }; + + + private static NGramsPlusLang[] ngrams_8859_1 = new NGramsPlusLang[]{ + new NGramsPlusLang( + "da", + new int[]{ + 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, + 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, + 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, + 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, + }), + new NGramsPlusLang( + "de", + new int[]{ + 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, + 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, + 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, + 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, + }), + new NGramsPlusLang( + "en", + new int[]{ + 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, + 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, + 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, + 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, + }), + + new NGramsPlusLang( + "es", + new int[]{ + 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, + 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, + 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, + 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, + }), + + new NGramsPlusLang( + "fr", + new int[]{ + 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E, + 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20, + 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420, + 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220, + }), + + new NGramsPlusLang( + "it", + new int[]{ + 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073, + 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220, + 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20, + 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F, + }), + + new NGramsPlusLang( + "nl", + new int[]{ + 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665, + 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E, + 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F, + 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F, + }), + + new NGramsPlusLang( + "no", + new int[]{ + 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469, + 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474, + 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65, + 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572, + }), + + new NGramsPlusLang( + "pt", + new int[]{ + 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, + 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20, + 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065, + 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F, + + }), + + new NGramsPlusLang( + "sv", + new int[]{ + 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469, + 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220, + 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20, + 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220, + }), + + }; + + + public CharsetMatch match(CharsetDetector det) { + String name = det.fC1Bytes ? "windows-1252" : "ISO-8859-1"; + int bestConfidenceSoFar = -1; + String lang = null; + for (NGramsPlusLang ngl : ngrams_8859_1) { + int confidence = match(det, ngl.fNGrams, byteMap); + if (confidence > bestConfidenceSoFar) { + bestConfidenceSoFar = confidence; + lang = ngl.fLang; + } + } + return bestConfidenceSoFar <= 0 ? null : new CharsetMatch(det, this, bestConfidenceSoFar, name, lang); + } + + + public String getName() { + return "ISO-8859-1"; + } + } + + + static class CharsetRecog_8859_2 extends CharsetRecog_sbcs { + protected static byte[] byteMap = { + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0xB1, (byte) 0x20, (byte) 0xB3, (byte) 0x20, (byte) 0xB5, (byte) 0xB6, (byte) 0x20, + (byte) 0x20, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0x20, (byte) 0xBE, (byte) 0xBF, + (byte) 0x20, (byte) 0xB1, (byte) 0x20, (byte) 0xB3, (byte) 0x20, (byte) 0xB5, (byte) 0xB6, (byte) 0xB7, + (byte) 0x20, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0x20, (byte) 0xBE, (byte) 0xBF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x20, + }; + + private static NGramsPlusLang[] ngrams_8859_2 = new NGramsPlusLang[]{ + new NGramsPlusLang( + "cs", + new int[]{ + 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F, + 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465, + 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865, + 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564, + }), + new NGramsPlusLang( + "hu", + new int[]{ + 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69, + 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20, + 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061, + 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320, + }), + new NGramsPlusLang( + "pl", + new int[]{ + 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779, + 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20, + 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769, + 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720, + }), + new NGramsPlusLang( + "ro", + new int[]{ + 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69, + 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070, + 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72, + 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20, + }) + }; + + public CharsetMatch match(CharsetDetector det) { + String name = det.fC1Bytes ? "windows-1250" : "ISO-8859-2"; + int bestConfidenceSoFar = -1; + String lang = null; + for (NGramsPlusLang ngl : ngrams_8859_2) { + int confidence = match(det, ngl.fNGrams, byteMap); + if (confidence > bestConfidenceSoFar) { + bestConfidenceSoFar = confidence; + lang = ngl.fLang; + } + } + return bestConfidenceSoFar <= 0 ? null : new CharsetMatch(det, this, bestConfidenceSoFar, name, lang); + } + + public String getName() { + return "ISO-8859-2"; + } + + } + + + abstract static class CharsetRecog_8859_5 extends CharsetRecog_sbcs { + protected static byte[] byteMap = { + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0xFE, (byte) 0xFF, + (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, + (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, + (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0x20, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0xFE, (byte) 0xFF, + }; + + public String getName() { + return "ISO-8859-5"; + } + } + + static class CharsetRecog_8859_5_ru extends CharsetRecog_8859_5 { + private static int[] ngrams = { + 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE, + 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD, + 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2, + 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520, + }; + + public String getLanguage() { + return "ru"; + } + + public CharsetMatch match(CharsetDetector det) { + int confidence = match(det, ngrams, byteMap); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + } + + abstract static class CharsetRecog_8859_6 extends CharsetRecog_sbcs { + protected static byte[] byteMap = { + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, + (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, + (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, + (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + }; + + public String getName() { + return "ISO-8859-6"; + } + } + + static class CharsetRecog_8859_6_ar extends CharsetRecog_8859_6 { + private static int[] ngrams = { + 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8, + 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1, + 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20, + 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620, + }; + + public String getLanguage() { + return "ar"; + } + + public CharsetMatch match(CharsetDetector det) { + int confidence = match(det, ngrams, byteMap); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + } + + abstract static class CharsetRecog_8859_7 extends CharsetRecog_sbcs { + protected static byte[] byteMap = { + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0xA1, (byte) 0xA2, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xDC, (byte) 0x20, + (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, (byte) 0x20, (byte) 0xFC, (byte) 0x20, (byte) 0xFD, (byte) 0xFE, + (byte) 0xC0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0x20, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x20, + }; + + public String getName() { + return "ISO-8859-7"; + } + } + + static class CharsetRecog_8859_7_el extends CharsetRecog_8859_7 { + private static int[] ngrams = { + 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7, + 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120, + 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5, + 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20, + }; + + public String getLanguage() { + return "el"; + } + + public CharsetMatch match(CharsetDetector det) { + String name = det.fC1Bytes ? "windows-1253" : "ISO-8859-7"; + int confidence = match(det, ngrams, byteMap); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "el"); + } + } + + abstract static class CharsetRecog_8859_8 extends CharsetRecog_sbcs { + protected static byte[] byteMap = { + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + }; + + public String getName() { + return "ISO-8859-8"; + } + } + + static class CharsetRecog_8859_8_I_he extends CharsetRecog_8859_8 { + private static int[] ngrams = { + 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0, + 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4, + 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE, + 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9, + }; + + public String getName() { + return "ISO-8859-8-I"; + } + + public String getLanguage() { + return "he"; + } + + public CharsetMatch match(CharsetDetector det) { + String name = det.fC1Bytes ? "windows-1255" : "ISO-8859-8-I"; + int confidence = match(det, ngrams, byteMap); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "he"); + } + } + + static class CharsetRecog_8859_8_he extends CharsetRecog_8859_8 { + private static int[] ngrams = { + 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0, + 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC, + 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920, + 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9, + }; + + public String getLanguage() { + return "he"; + } + + public CharsetMatch match(CharsetDetector det) { + String name = det.fC1Bytes ? "windows-1255" : "ISO-8859-8"; + int confidence = match(det, ngrams, byteMap); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "he"); + + } + } + + abstract static class CharsetRecog_8859_9 extends CharsetRecog_sbcs { + protected static byte[] byteMap = { + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x69, (byte) 0xFE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, + }; + + public String getName() { + return "ISO-8859-9"; + } + } + + static class CharsetRecog_8859_9_tr extends CharsetRecog_8859_9 { + private static int[] ngrams = { + 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961, + 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062, + 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062, + 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD, + }; + + public String getLanguage() { + return "tr"; + } + + public CharsetMatch match(CharsetDetector det) { + String name = det.fC1Bytes ? "windows-1254" : "ISO-8859-9"; + int confidence = match(det, ngrams, byteMap); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "tr"); + } + } + + static class CharsetRecog_windows_1251 extends CharsetRecog_sbcs { + private static int[] ngrams = { + 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, + 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, + 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, + 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, + }; + + private static byte[] byteMap = { + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x90, (byte) 0x83, (byte) 0x20, (byte) 0x83, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F, + (byte) 0x90, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F, + (byte) 0x20, (byte) 0xA2, (byte) 0xA2, (byte) 0xBC, (byte) 0x20, (byte) 0xB4, (byte) 0x20, (byte) 0x20, + (byte) 0xB8, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xBF, + (byte) 0x20, (byte) 0x20, (byte) 0xB3, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0x20, (byte) 0x20, + (byte) 0xB8, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0xBC, (byte) 0xBE, (byte) 0xBE, (byte) 0xBF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, + }; + + public String getName() { + return "windows-1251"; + } + + public String getLanguage() { + return "ru"; + } + + public CharsetMatch match(CharsetDetector det) { + int confidence = match(det, ngrams, byteMap); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + } + + static class CharsetRecog_windows_1256 extends CharsetRecog_sbcs { + private static int[] ngrams = { + 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, + 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, + 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, + 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, + }; + + private static byte[] byteMap = { + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x81, (byte) 0x20, (byte) 0x83, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x88, (byte) 0x20, (byte) 0x8A, (byte) 0x20, (byte) 0x9C, (byte) 0x8D, (byte) 0x8E, (byte) 0x8F, + (byte) 0x90, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x98, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x20, (byte) 0x20, (byte) 0x9F, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, + (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, + (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0x20, + (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xF4, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0xF9, (byte) 0x20, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0x20, (byte) 0xFF, + }; + + public String getName() { + return "windows-1256"; + } + + public String getLanguage() { + return "ar"; + } + + public CharsetMatch match(CharsetDetector det) { + int confidence = match(det, ngrams, byteMap); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + } + + static class CharsetRecog_KOI8_R extends CharsetRecog_sbcs { + private static int[] ngrams = { + 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, + 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, + 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, + 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, + }; + + private static byte[] byteMap = { + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xA3, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xA3, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, + (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, + (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, + (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, + (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, + (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, + (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, + (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, + }; + + public String getName() { + return "KOI8-R"; + } + + public String getLanguage() { + return "ru"; + } + + public CharsetMatch match(CharsetDetector det) { + int confidence = match(det, ngrams, byteMap); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + } + + abstract static class CharsetRecog_IBM424_he extends CharsetRecog_sbcs { + protected static byte[] byteMap = { +/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ +/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 4- */ (byte) 0x40, (byte) 0x41, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x48, (byte) 0x49, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 5- */ (byte) 0x40, (byte) 0x51, (byte) 0x52, (byte) 0x53, (byte) 0x54, (byte) 0x55, (byte) 0x56, (byte) 0x57, (byte) 0x58, (byte) 0x59, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 6- */ (byte) 0x40, (byte) 0x40, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, (byte) 0x68, (byte) 0x69, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 7- */ (byte) 0x40, (byte) 0x71, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x00, (byte) 0x40, (byte) 0x40, +/* 8- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 9- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* A- */ (byte) 0xA0, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* B- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* C- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* D- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* E- */ (byte) 0x40, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* F- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, + }; + + public String getLanguage() { + return "he"; + } + } + + static class CharsetRecog_IBM424_he_rtl extends CharsetRecog_IBM424_he { + public String getName() { + return "IBM424_rtl"; + } + + private static int[] ngrams = { + 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, + 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, + 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, + 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, + }; + + public CharsetMatch match(CharsetDetector det) { + int confidence = match(det, ngrams, byteMap, (byte) 0x40); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + } + + static class CharsetRecog_IBM424_he_ltr extends CharsetRecog_IBM424_he { + public String getName() { + return "IBM424_ltr"; + } + + private static int[] ngrams = { + 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141, + 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054, + 0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940, + 0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651 + + }; + + public CharsetMatch match(CharsetDetector det) { + int confidence = match(det, ngrams, byteMap, (byte) 0x40); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + } + + abstract static class CharsetRecog_IBM420_ar extends CharsetRecog_sbcs { + + protected static byte[] byteMap = { +/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ +/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 4- */ (byte) 0x40, (byte) 0x40, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x48, (byte) 0x49, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 5- */ (byte) 0x40, (byte) 0x51, (byte) 0x52, (byte) 0x40, (byte) 0x40, (byte) 0x55, (byte) 0x56, (byte) 0x57, (byte) 0x58, (byte) 0x59, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 6- */ (byte) 0x40, (byte) 0x40, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, (byte) 0x68, (byte) 0x69, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 7- */ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, (byte) 0x78, (byte) 0x79, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 8- */ (byte) 0x80, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x8A, (byte) 0x8B, (byte) 0x8C, (byte) 0x8D, (byte) 0x8E, (byte) 0x8F, +/* 9- */ (byte) 0x90, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9B, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F, +/* A- */ (byte) 0xA0, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0xAA, (byte) 0xAB, (byte) 0xAC, (byte) 0xAD, (byte) 0xAE, (byte) 0xAF, +/* B- */ (byte) 0xB0, (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0x40, (byte) 0x40, (byte) 0xB8, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0xBD, (byte) 0xBE, (byte) 0xBF, +/* C- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0xCB, (byte) 0x40, (byte) 0xCD, (byte) 0x40, (byte) 0xCF, +/* D- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, +/* E- */ (byte) 0x40, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0xEA, (byte) 0xEB, (byte) 0x40, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, +/* F- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x40, + }; + + + public String getLanguage() { + return "ar"; + } + + } + + static class CharsetRecog_IBM420_ar_rtl extends CharsetRecog_IBM420_ar { + private static int[] ngrams = { + 0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158, + 0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB, + 0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40, + 0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40, + }; + + public String getName() { + return "IBM420_rtl"; + } + + public CharsetMatch match(CharsetDetector det) { + int confidence = matchIBM420(det, ngrams, byteMap, (byte) 0x40); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + + } + + static class CharsetRecog_IBM420_ar_ltr extends CharsetRecog_IBM420_ar { + private static int[] ngrams = { + 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, + 0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD, + 0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156, + 0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156 + }; + + public String getName() { + return "IBM420_ltr"; + } + + public CharsetMatch match(CharsetDetector det) { + int confidence = matchIBM420(det, ngrams, byteMap, (byte) 0x40); + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); + } + + } +} diff --git a/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecognizer.java b/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecognizer.java new file mode 100644 index 000000000..07a01aa68 --- /dev/null +++ b/app/src/main/java/io/legado/app/utils/icu4j/CharsetRecognizer.java @@ -0,0 +1,53 @@ +/* GENERATED SOURCE. DO NOT MODIFY. */ +/** + * ****************************************************************************** + * Copyright (C) 2005-2012, International Business Machines Corporation and * + * others. All Rights Reserved. * + * ****************************************************************************** + */ +package io.legado.app.utils.icu4j; + + +/** + * Abstract class for recognizing a single charset. + * Part of the implementation of ICU's CharsetDetector. + *

+ * Each specific charset that can be recognized will have an instance + * of some subclass of this class. All interaction between the overall + * CharsetDetector and the stuff specific to an individual charset happens + * via the interface provided here. + *

+ * Instances of CharsetDetector DO NOT have or maintain + * state pertaining to a specific match or detect operation. + * The WILL be shared by multiple instances of CharsetDetector. + * They encapsulate const charset-specific information. + */ +abstract class CharsetRecognizer { + /** + * Get the IANA name of this charset. + * + * @return the charset name. + */ + abstract String getName(); + + /** + * Get the ISO language code for this charset. + * + * @return the language code, or null if the language cannot be determined. + */ + public String getLanguage() { + return null; + } + + /** + * Test the match of this charset with the input text data + * which is obtained via the CharsetDetector object. + * + * @param det The CharsetDetector, which contains the input text + * to be checked for being in this charset. + * @return A CharsetMatch object containing details of match + * with this charset, or null if there was no match. + */ + abstract CharsetMatch match(CharsetDetector det); + +}