|
|
@ -35,6 +35,7 @@ import static android.text.TextUtils.isEmpty; |
|
|
|
* @version 1.0 |
|
|
|
* @version 1.0 |
|
|
|
* @since Create on 2010-01-27 11:19:00 |
|
|
|
* @since Create on 2010-01-27 11:19:00 |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
|
|
|
|
@SuppressWarnings("ALL") |
|
|
|
public class EncodingDetect { |
|
|
|
public class EncodingDetect { |
|
|
|
|
|
|
|
|
|
|
|
public static String getHtmlEncode(@NonNull byte[] bytes) { |
|
|
|
public static String getHtmlEncode(@NonNull byte[] bytes) { |
|
|
@ -117,6 +118,7 @@ public class EncodingDetect { |
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@SuppressWarnings("ALL") |
|
|
|
class BytesEncodingDetect extends Encoding { |
|
|
|
class BytesEncodingDetect extends Encoding { |
|
|
|
// Frequency tables to hold the GB, Big5, and EUC-TW character
|
|
|
|
// Frequency tables to hold the GB, Big5, and EUC-TW character
|
|
|
|
// frequencies
|
|
|
|
// frequencies
|
|
|
@ -271,9 +273,7 @@ class BytesEncodingDetect extends Encoding { |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
if (rawtext[i] >= 0) { |
|
|
|
if (rawtext[i] < 0) { |
|
|
|
// asciichars++;
|
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
dbchars++; |
|
|
|
dbchars++; |
|
|
|
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 |
|
|
|
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 |
|
|
|
&& (byte) 0xA1 <= rawtext[i + 1] |
|
|
|
&& (byte) 0xA1 <= rawtext[i + 1] |
|
|
@ -312,9 +312,7 @@ class BytesEncodingDetect extends Encoding { |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
if (rawtext[i] >= 0) { |
|
|
|
if (rawtext[i] < 0) { |
|
|
|
// asciichars++;
|
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
dbchars++; |
|
|
|
dbchars++; |
|
|
|
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 |
|
|
|
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 |
|
|
|
&& // Original GB range
|
|
|
|
&& // Original GB range
|
|
|
@ -373,9 +371,7 @@ class BytesEncodingDetect extends Encoding { |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
if (rawtext[i] >= 0) { |
|
|
|
if (rawtext[i] < 0) { |
|
|
|
// asciichars++;
|
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
dbchars++; |
|
|
|
dbchars++; |
|
|
|
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 |
|
|
|
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 |
|
|
|
&& // Original GB range
|
|
|
|
&& // Original GB range
|
|
|
@ -516,9 +512,7 @@ class BytesEncodingDetect extends Encoding { |
|
|
|
// Check to see if characters fit into acceptable ranges
|
|
|
|
// Check to see if characters fit into acceptable ranges
|
|
|
|
rawtextlen = rawtext.length; |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
if (rawtext[i] >= 0) { |
|
|
|
if (rawtext[i] < 0) { |
|
|
|
// asciichars++;
|
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
dbchars++; |
|
|
|
dbchars++; |
|
|
|
if ((byte) 0xA1 <= rawtext[i] |
|
|
|
if ((byte) 0xA1 <= rawtext[i] |
|
|
|
&& rawtext[i] <= (byte) 0xF9 |
|
|
|
&& rawtext[i] <= (byte) 0xF9 |
|
|
@ -560,9 +554,7 @@ class BytesEncodingDetect extends Encoding { |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
if (rawtext[i] >= 128) { |
|
|
|
if (rawtext[i] < 128) { |
|
|
|
// asciichars++;
|
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
dbchars++; |
|
|
|
dbchars++; |
|
|
|
if (0xA1 <= rawtext[i] |
|
|
|
if (0xA1 <= rawtext[i] |
|
|
|
&& rawtext[i] <= 0xF9 |
|
|
|
&& rawtext[i] <= 0xF9 |
|
|
@ -626,9 +618,7 @@ class BytesEncodingDetect extends Encoding { |
|
|
|
// and have expected frequency of use
|
|
|
|
// and have expected frequency of use
|
|
|
|
rawtextlen = rawtext.length; |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
if (rawtext[i] >= 0) { // in ASCII range
|
|
|
|
if (rawtext[i] < 0) { // high bit set
|
|
|
|
// asciichars++;
|
|
|
|
|
|
|
|
} else { // high bit set
|
|
|
|
|
|
|
|
dbchars++; |
|
|
|
dbchars++; |
|
|
|
if (i + 3 < rawtextlen && (byte) 0x8E == rawtext[i] |
|
|
|
if (i + 3 < rawtextlen && (byte) 0x8E == rawtext[i] |
|
|
|
&& (byte) 0xA1 <= rawtext[i + 1] |
|
|
|
&& (byte) 0xA1 <= rawtext[i + 1] |
|
|
@ -846,9 +836,7 @@ class BytesEncodingDetect extends Encoding { |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
if (rawtext[i] >= 0) { |
|
|
|
if (rawtext[i] < 0) { |
|
|
|
// asciichars++;
|
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
dbchars++; |
|
|
|
dbchars++; |
|
|
|
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xFE |
|
|
|
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xFE |
|
|
|
&& (byte) 0xA1 <= rawtext[i + 1] |
|
|
|
&& (byte) 0xA1 <= rawtext[i + 1] |
|
|
@ -886,9 +874,7 @@ class BytesEncodingDetect extends Encoding { |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
if (rawtext[i] >= 0) { |
|
|
|
if (rawtext[i] < 0) { |
|
|
|
// asciichars++;
|
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
dbchars++; |
|
|
|
dbchars++; |
|
|
|
if ((byte) 0x81 <= rawtext[i] |
|
|
|
if ((byte) 0x81 <= rawtext[i] |
|
|
|
&& rawtext[i] <= (byte) 0xFE |
|
|
|
&& rawtext[i] <= (byte) 0xFE |
|
|
@ -945,9 +931,7 @@ class BytesEncodingDetect extends Encoding { |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
if (rawtext[i] >= 0) { |
|
|
|
if (rawtext[i] < 0) { |
|
|
|
// asciichars++;
|
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
dbchars++; |
|
|
|
dbchars++; |
|
|
|
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xFE |
|
|
|
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xFE |
|
|
|
&& (byte) 0xA1 <= rawtext[i + 1] |
|
|
|
&& (byte) 0xA1 <= rawtext[i + 1] |
|
|
@ -997,9 +981,7 @@ class BytesEncodingDetect extends Encoding { |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
rawtextlen = rawtext.length; |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
for (i = 0; i < rawtextlen - 1; i++) { |
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
// System.err.println(rawtext[i]);
|
|
|
|
if (rawtext[i] >= 0) { |
|
|
|
if (rawtext[i] < 0) { |
|
|
|
// asciichars++;
|
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
dbchars++; |
|
|
|
dbchars++; |
|
|
|
if (i + 1 < rawtext.length |
|
|
|
if (i + 1 < rawtext.length |
|
|
|
&& (((byte) 0x81 <= rawtext[i] && rawtext[i] <= (byte) 0x9F) || ((byte) 0xE0 <= rawtext[i] && rawtext[i] <= (byte) 0xEF)) |
|
|
|
&& (((byte) 0x81 <= rawtext[i] && rawtext[i] <= (byte) 0x9F) || ((byte) 0xE0 <= rawtext[i] && rawtext[i] <= (byte) 0xEF)) |
|
|
|