/*
 * Decompiled with CFR 0.152.
 */
package gate.creole.tokeniser.chinesetokeniser;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.TreeMap;
import java.util.TreeSet;

public class Segmenter {
    private TreeMap zhwords;
    private TreeSet csurname;
    private TreeSet cforeign;
    private TreeSet cnumbers;
    private TreeSet cnotname;
    private boolean debug = false;
    public static final int TRAD = 0;
    public static final int SIMP = 1;
    public static final int BOTH = 2;
    private ArrayList marks;

    public Segmenter(int n, boolean bl) {
        boolean bl2 = false;
        this.csurname = new TreeSet();
        this.cforeign = new TreeSet();
        this.cnumbers = new TreeSet();
        this.cnotname = new TreeSet();
        if (n == 1) {
            this.loadset(this.cnumbers, "gate:/creole/tokeniser/chinesetokeniser/snumbers_u8.txt");
            this.loadset(this.cforeign, "gate:/creole/tokeniser/chinesetokeniser/sforeign_u8.txt");
            this.loadset(this.csurname, "gate:/creole/tokeniser/chinesetokeniser/ssurname_u8.txt");
            this.loadset(this.cnotname, "gate:/creole/tokeniser/chinesetokeniser/snotname_u8.txt");
        } else if (n == 0) {
            this.loadset(this.cnumbers, "gate:/creole/tokeniser/chinesetokeniser/tnumbers_u8.txt");
            this.loadset(this.cforeign, "gate:/creole/tokeniser/chinesetokeniser/tforeign_u8.txt");
            this.loadset(this.csurname, "gate:/creole/tokeniser/chinesetokeniser/tsurname_u8.txt");
            this.loadset(this.cnotname, "gate:/creole/tokeniser/chinesetokeniser/tnotname_u8.txt");
        } else {
            this.loadset(this.cnumbers, "gate:/creole/tokeniser/chinesetokeniser/snumbers_u8.txt");
            this.loadset(this.cforeign, "gate:/creole/tokeniser/chinesetokeniser/sforeign_u8.txt");
            this.loadset(this.csurname, "gate:/creole/tokeniser/chinesetokeniser/ssurname_u8.txt");
            this.loadset(this.cnotname, "gate:/creole/tokeniser/chinesetokeniser/snotname_u8.txt");
            this.loadset(this.cnumbers, "gate:/creole/tokeniser/chinesetokeniser/tnumbers_u8.txt");
            this.loadset(this.cforeign, "gate:/creole/tokeniser/chinesetokeniser/tforeign_u8.txt");
            this.loadset(this.csurname, "gate:/creole/tokeniser/chinesetokeniser/tsurname_u8.txt");
            this.loadset(this.cnotname, "gate:/creole/tokeniser/chinesetokeniser/tnotname_u8.txt");
        }
        this.zhwords = new TreeMap();
        if (!bl) {
            return;
        }
        String string = null;
        try {
            InputStream inputStream = null;
            if (n == 1) {
                inputStream = new URL("gate:/creole/tokeniser/chinesetokeniser/simplexu8.txt").openStream();
            } else if (n == 0) {
                inputStream = new URL("gate:/creole/tokeniser/chinesetokeniser/tradlexu8.txt").openStream();
            } else if (n == 2) {
                inputStream = new URL("gate:/creole/tokeniser/chinesetokeniser/bothlexu8.txt").openStream();
            }
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF8"));
            while ((string = bufferedReader.readLine()) != null) {
                if (string.indexOf("#") != -1 || string.length() >= 5) continue;
                this.zhwords.put(string.intern(), "1");
                if (string.length() == 3 && !this.zhwords.containsKey(string.substring(0, 2).intern())) {
                    this.zhwords.put(string.substring(0, 2).intern(), "2");
                }
                if (string.length() != 4) continue;
                if (!this.zhwords.containsKey(string.substring(0, 2).intern())) {
                    this.zhwords.put(string.substring(0, 2).intern(), "2");
                }
                if (this.zhwords.containsKey(string.substring(0, 3).intern())) continue;
                this.zhwords.put(string.substring(0, 3).intern(), "2");
            }
            bufferedReader.close();
        }
        catch (IOException iOException) {
            // empty catch block
        }
    }

    private void loadset(TreeSet treeSet, String string) {
        try {
            String string2;
            InputStream inputStream = new URL(string).openStream();
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
            while ((string2 = bufferedReader.readLine()) != null) {
                if (string2.indexOf("#") > -1 || string2.length() == 0) continue;
                treeSet.add(string2.intern());
            }
            bufferedReader.close();
        }
        catch (Exception exception) {
            // empty catch block
        }
    }

    public boolean isNumber(String string) {
        boolean bl = true;
        for (int i = 0; i < string.length(); ++i) {
            if (this.cnumbers.contains(string.substring(i, i + 1).intern())) continue;
            bl = false;
            break;
        }
        return bl;
    }

    public boolean isAllForeign(String string) {
        boolean bl = true;
        for (int i = 0; i < string.length(); ++i) {
            if (this.cforeign.contains(string.substring(i, i + 1).intern())) continue;
            bl = false;
            break;
        }
        return bl;
    }

    public boolean isNotCJK(String string) {
        boolean bl = true;
        for (int i = 0; i < string.length(); ++i) {
            if (Character.UnicodeBlock.of(string.charAt(i)) != Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) continue;
            bl = false;
            break;
        }
        return bl;
    }

    public String stemWord(String string) {
        int n;
        String[] stringArray = new String[]{"\u7b2c", "\u526f", "\u4e0d"};
        String[] stringArray2 = new String[]{"\u4e86", "\u7684", "\u5730", "\u4e0b", "\u4e0a", "\u4e2d", "\u91cc", "\u5230", "\u5185", "\u5916", "\u4eec"};
        String[] stringArray3 = new String[]{"\u5f97", "\u4e0d"};
        StringBuffer stringBuffer = new StringBuffer(string);
        for (n = 0; n < stringArray.length; ++n) {
            if (!stringBuffer.substring(0, 1).equals(stringArray[n]) || this.zhwords.get(stringBuffer.substring(1, stringBuffer.length()).intern()) == null && stringBuffer.length() != 2) continue;
            stringBuffer.deleteCharAt(0);
            return stringBuffer.toString();
        }
        for (n = 0; n < stringArray2.length; ++n) {
            if (!stringBuffer.substring(stringBuffer.length() - 1, stringBuffer.length()).equals(stringArray2[n]) || this.zhwords.get(stringBuffer.substring(0, stringBuffer.length() - 1).intern()) == null && stringBuffer.length() != 2) continue;
            stringBuffer.deleteCharAt(stringBuffer.length() - 1);
            return stringBuffer.toString();
        }
        for (n = 0; n < stringArray3.length; ++n) {
            if (stringBuffer.length() != 3 || !stringBuffer.substring(1, 2).equals(stringArray3[n]) || this.zhwords.get(new String(stringBuffer.substring(0, 1) + stringBuffer.substring(2, 3)).intern()) == null) continue;
            stringBuffer.deleteCharAt(1);
            return stringBuffer.toString();
        }
        return stringBuffer.toString();
    }

    public String segmentLine(String string, String string2) {
        StringBuffer stringBuffer = new StringBuffer();
        StringBuffer stringBuffer2 = new StringBuffer();
        string2 = " ";
        int n = string.length();
        int[][] nArray = new int[n][2];
        this.marks = new ArrayList();
        for (int i = 0; i < n; ++i) {
            char c = string.charAt(i);
            if (Character.UnicodeBlock.of(c) == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || this.isNumber(string.substring(i, i + 1))) {
                if (stringBuffer.length() == 0) {
                    if (i > 0 && !Character.isWhitespace(string.charAt(i - 1))) {
                        this.marks.add(new Long(i + this.marks.size()));
                        stringBuffer2.append(string2);
                    }
                    stringBuffer.append(c);
                    continue;
                }
                if (this.zhwords.containsKey(new String(stringBuffer.toString() + c).intern()) && ((String)this.zhwords.get(new String(stringBuffer.toString() + c).intern())).equals("1")) {
                    stringBuffer.append(c);
                    continue;
                }
                if (this.isAllForeign(stringBuffer.toString()) && this.cforeign.contains(new String(new char[]{c}).intern()) && i + 2 < n && !this.zhwords.containsKey(string.substring(i, i + 2).intern())) {
                    stringBuffer.append(c);
                    continue;
                }
                if (this.isNumber(stringBuffer.toString()) && this.cnumbers.contains(new String(new char[]{c}).intern())) {
                    stringBuffer.append(c);
                    continue;
                }
                if (this.zhwords.containsKey(new String(stringBuffer.toString() + c).intern()) && ((String)this.zhwords.get(new String(stringBuffer.toString() + c).intern())).equals("2") && i + 1 < n && this.zhwords.containsKey(new String(stringBuffer.toString() + c + string.charAt(i + 1)).intern())) {
                    stringBuffer.append(c);
                    continue;
                }
                stringBuffer2.append(stringBuffer.toString());
                if (!Character.isWhitespace(c)) {
                    this.marks.add(new Long(i + this.marks.size()));
                    stringBuffer2.append(string2);
                }
                stringBuffer.setLength(0);
                stringBuffer.append(c);
                continue;
            }
            if (stringBuffer.length() > 0) {
                stringBuffer2.append(stringBuffer.toString());
                if (!Character.isWhitespace(c)) {
                    this.marks.add(new Long(i + this.marks.size()));
                    stringBuffer2.append(string2);
                }
                stringBuffer.setLength(0);
            }
            stringBuffer2.append(c);
        }
        stringBuffer2.append(stringBuffer.toString());
        return stringBuffer2.toString();
    }

    public void addword(String string) {
        this.zhwords.put(string.intern(), "1");
        if (string.length() == 3 && !this.zhwords.containsKey(string.substring(0, 2).intern())) {
            this.zhwords.put(string.substring(0, 2).intern(), "2");
        }
        if (string.length() == 4) {
            if (!this.zhwords.containsKey(string.substring(0, 2).intern())) {
                this.zhwords.put(string.substring(0, 2).intern(), "2");
            }
            if (!this.zhwords.containsKey(string.substring(0, 3).intern())) {
                this.zhwords.put(string.substring(0, 3).intern(), "2");
            }
        }
        if (string.length() == 5) {
            if (!this.zhwords.containsKey(string.substring(0, 2).intern())) {
                this.zhwords.put(string.substring(0, 2).intern(), "2");
            }
            if (!this.zhwords.containsKey(string.substring(0, 3).intern())) {
                this.zhwords.put(string.substring(0, 3).intern(), "2");
            }
            if (!this.zhwords.containsKey(string.substring(0, 4).intern())) {
                this.zhwords.put(string.substring(0, 4).intern(), "2");
            }
        }
        if (string.length() == 6) {
            if (!this.zhwords.containsKey(string.substring(0, 2).intern())) {
                this.zhwords.put(string.substring(0, 2).intern(), "2");
            }
            if (!this.zhwords.containsKey(string.substring(0, 3).intern())) {
                this.zhwords.put(string.substring(0, 3).intern(), "2");
            }
            if (!this.zhwords.containsKey(string.substring(0, 4).intern())) {
                this.zhwords.put(string.substring(0, 4).intern(), "2");
            }
            if (!this.zhwords.containsKey(string.substring(0, 5).intern())) {
                this.zhwords.put(string.substring(0, 5).intern(), "2");
            }
        }
    }

    public ArrayList getMarks() {
        return this.marks;
    }

    public String segmentData(String string, String string2) {
        String string3 = "";
        boolean bl = false;
        try {
            string3 = this.segmentLine(string, " ");
            if (bl) {
                byte[] byArray = string3.getBytes(string2);
            }
        }
        catch (Exception exception) {
            // empty catch block
        }
        return string3;
    }
}

