pull/433/head
gedoor 4 years ago
parent 56ba339f23
commit 4cd36aa6e6
  1. 42
      app/src/main/java/io/legado/app/utils/EncodingDetect.java

@ -35,6 +35,7 @@ import static android.text.TextUtils.isEmpty;
* @version 1.0 * @version 1.0
* @since Create on 2010-01-27 11:19:00 * @since Create on 2010-01-27 11:19:00
*/ */
@SuppressWarnings("ALL")
public class EncodingDetect { public class EncodingDetect {
public static String getHtmlEncode(@NonNull byte[] bytes) { public static String getHtmlEncode(@NonNull byte[] bytes) {
@ -117,6 +118,7 @@ public class EncodingDetect {
} }
@SuppressWarnings("ALL")
class BytesEncodingDetect extends Encoding { class BytesEncodingDetect extends Encoding {
// Frequency tables to hold the GB, Big5, and EUC-TW character // Frequency tables to hold the GB, Big5, and EUC-TW character
// frequencies // frequencies
@ -271,9 +273,7 @@ class BytesEncodingDetect extends Encoding {
rawtextlen = rawtext.length; rawtextlen = rawtext.length;
for (i = 0; i < rawtextlen - 1; i++) { for (i = 0; i < rawtextlen - 1; i++) {
// System.err.println(rawtext[i]); // System.err.println(rawtext[i]);
if (rawtext[i] >= 0) { if (rawtext[i] < 0) {
// asciichars++;
} else {
dbchars++; dbchars++;
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7
&& (byte) 0xA1 <= rawtext[i + 1] && (byte) 0xA1 <= rawtext[i + 1]
@ -312,9 +312,7 @@ class BytesEncodingDetect extends Encoding {
rawtextlen = rawtext.length; rawtextlen = rawtext.length;
for (i = 0; i < rawtextlen - 1; i++) { for (i = 0; i < rawtextlen - 1; i++) {
// System.err.println(rawtext[i]); // System.err.println(rawtext[i]);
if (rawtext[i] >= 0) { if (rawtext[i] < 0) {
// asciichars++;
} else {
dbchars++; dbchars++;
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7
&& // Original GB range && // Original GB range
@ -373,9 +371,7 @@ class BytesEncodingDetect extends Encoding {
rawtextlen = rawtext.length; rawtextlen = rawtext.length;
for (i = 0; i < rawtextlen - 1; i++) { for (i = 0; i < rawtextlen - 1; i++) {
// System.err.println(rawtext[i]); // System.err.println(rawtext[i]);
if (rawtext[i] >= 0) { if (rawtext[i] < 0) {
// asciichars++;
} else {
dbchars++; dbchars++;
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7
&& // Original GB range && // Original GB range
@ -516,9 +512,7 @@ class BytesEncodingDetect extends Encoding {
// Check to see if characters fit into acceptable ranges // Check to see if characters fit into acceptable ranges
rawtextlen = rawtext.length; rawtextlen = rawtext.length;
for (i = 0; i < rawtextlen - 1; i++) { for (i = 0; i < rawtextlen - 1; i++) {
if (rawtext[i] >= 0) { if (rawtext[i] < 0) {
// asciichars++;
} else {
dbchars++; dbchars++;
if ((byte) 0xA1 <= rawtext[i] if ((byte) 0xA1 <= rawtext[i]
&& rawtext[i] <= (byte) 0xF9 && rawtext[i] <= (byte) 0xF9
@ -560,9 +554,7 @@ class BytesEncodingDetect extends Encoding {
rawtextlen = rawtext.length; rawtextlen = rawtext.length;
for (i = 0; i < rawtextlen - 1; i++) { for (i = 0; i < rawtextlen - 1; i++) {
// System.err.println(rawtext[i]); // System.err.println(rawtext[i]);
if (rawtext[i] >= 128) { if (rawtext[i] < 128) {
// asciichars++;
} else {
dbchars++; dbchars++;
if (0xA1 <= rawtext[i] if (0xA1 <= rawtext[i]
&& rawtext[i] <= 0xF9 && rawtext[i] <= 0xF9
@ -626,9 +618,7 @@ class BytesEncodingDetect extends Encoding {
// and have expected frequency of use // and have expected frequency of use
rawtextlen = rawtext.length; rawtextlen = rawtext.length;
for (i = 0; i < rawtextlen - 1; i++) { for (i = 0; i < rawtextlen - 1; i++) {
if (rawtext[i] >= 0) { // in ASCII range if (rawtext[i] < 0) { // high bit set
// asciichars++;
} else { // high bit set
dbchars++; dbchars++;
if (i + 3 < rawtextlen && (byte) 0x8E == rawtext[i] if (i + 3 < rawtextlen && (byte) 0x8E == rawtext[i]
&& (byte) 0xA1 <= rawtext[i + 1] && (byte) 0xA1 <= rawtext[i + 1]
@ -846,9 +836,7 @@ class BytesEncodingDetect extends Encoding {
rawtextlen = rawtext.length; rawtextlen = rawtext.length;
for (i = 0; i < rawtextlen - 1; i++) { for (i = 0; i < rawtextlen - 1; i++) {
// System.err.println(rawtext[i]); // System.err.println(rawtext[i]);
if (rawtext[i] >= 0) { if (rawtext[i] < 0) {
// asciichars++;
} else {
dbchars++; dbchars++;
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xFE if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xFE
&& (byte) 0xA1 <= rawtext[i + 1] && (byte) 0xA1 <= rawtext[i + 1]
@ -886,9 +874,7 @@ class BytesEncodingDetect extends Encoding {
rawtextlen = rawtext.length; rawtextlen = rawtext.length;
for (i = 0; i < rawtextlen - 1; i++) { for (i = 0; i < rawtextlen - 1; i++) {
// System.err.println(rawtext[i]); // System.err.println(rawtext[i]);
if (rawtext[i] >= 0) { if (rawtext[i] < 0) {
// asciichars++;
} else {
dbchars++; dbchars++;
if ((byte) 0x81 <= rawtext[i] if ((byte) 0x81 <= rawtext[i]
&& rawtext[i] <= (byte) 0xFE && rawtext[i] <= (byte) 0xFE
@ -945,9 +931,7 @@ class BytesEncodingDetect extends Encoding {
rawtextlen = rawtext.length; rawtextlen = rawtext.length;
for (i = 0; i < rawtextlen - 1; i++) { for (i = 0; i < rawtextlen - 1; i++) {
// System.err.println(rawtext[i]); // System.err.println(rawtext[i]);
if (rawtext[i] >= 0) { if (rawtext[i] < 0) {
// asciichars++;
} else {
dbchars++; dbchars++;
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xFE if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xFE
&& (byte) 0xA1 <= rawtext[i + 1] && (byte) 0xA1 <= rawtext[i + 1]
@ -997,9 +981,7 @@ class BytesEncodingDetect extends Encoding {
rawtextlen = rawtext.length; rawtextlen = rawtext.length;
for (i = 0; i < rawtextlen - 1; i++) { for (i = 0; i < rawtextlen - 1; i++) {
// System.err.println(rawtext[i]); // System.err.println(rawtext[i]);
if (rawtext[i] >= 0) { if (rawtext[i] < 0) {
// asciichars++;
} else {
dbchars++; dbchars++;
if (i + 1 < rawtext.length if (i + 1 < rawtext.length
&& (((byte) 0x81 <= rawtext[i] && rawtext[i] <= (byte) 0x9F) || ((byte) 0xE0 <= rawtext[i] && rawtext[i] <= (byte) 0xEF)) && (((byte) 0x81 <= rawtext[i] && rawtext[i] <= (byte) 0x9F) || ((byte) 0xE0 <= rawtext[i] && rawtext[i] <= (byte) 0xEF))

Loading…
Cancel
Save