/*
 * Decompiled with CFR 0.152.
 */
package gate.creole.tokeniser.chinesetokeniser;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.ProcessingResource;
import gate.Resource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.tokeniser.SimpleTokeniser;
import gate.creole.tokeniser.chinesetokeniser.Segmenter;
import gate.util.GateRuntimeException;
import gate.util.InvalidOffsetException;
import gate.util.OffsetComparator;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;

public class ChineseTokeniser
extends AbstractLanguageAnalyser
implements ProcessingResource {
    private Segmenter segmenter;
    private String encoding;
    private Document document;
    private Document tempDoc;
    private SimpleTokeniser tokeniser;
    private Boolean runSegmenter;
    private Boolean generateSpaceTokens;
    private URL rulesURL;
    private String annotationSetName;
    private int charform;

    public Resource init() throws ResourceInstantiationException {
        this.fireProgressChanged(0);
        this.fireStatusChanged("Loading Data Files...");
        if (this.encoding == null) {
            this.encoding = "UTF8";
        } else if (this.encoding.equals("BIG5")) {
            this.charform = 0;
        } else if (this.encoding.equals("GBK")) {
            this.charform = 1;
        } else if (this.encoding.equals("UTF8")) {
            this.charform = 2;
        } else {
            this.encoding = "UTF8";
            this.charform = 2;
        }
        if (this.rulesURL == null) {
            throw new ResourceInstantiationException("No URL provided for the tokeniser rules");
        }
        this.segmenter = new Segmenter(this.charform, true);
        this.fireProcessFinished();
        return this;
    }

    public void reInit() throws ResourceInstantiationException {
        this.segmenter = new Segmenter(this.charform, true);
    }

    public void execute() throws ExecutionException {
        long[] lArray;
        AnnotationSet annotationSet;
        AnnotationSet annotationSet2;
        FeatureMap featureMap;
        Object object;
        this.fireProgressChanged(0);
        if (this.document == null) {
            throw new GateRuntimeException("No document to process!");
        }
        String string = null;
        string = this.runSegmenter != false ? this.segmenter.segmentData(this.document.getContent().toString(), this.encoding) : this.document.getContent().toString();
        if (this.encoding.equals("UTF8")) {
            this.encoding = "UTF-8";
        }
        try {
            object = Factory.newFeatureMap();
            object.put("stringContent", string);
            featureMap = Factory.newFeatureMap();
            Gate.setHiddenAttribute(featureMap, true);
            this.tempDoc = (Document)Factory.createResource("gate.corpora.DocumentImpl", (FeatureMap)object, featureMap);
        }
        catch (ResourceInstantiationException resourceInstantiationException) {
            throw new ExecutionException("Temporary document cannot be created");
        }
        object = null;
        if (this.runSegmenter.booleanValue()) {
            object = this.segmenter.getMarks();
        }
        featureMap = Factory.newFeatureMap();
        Gate.setHiddenAttribute(featureMap, true);
        FeatureMap featureMap2 = Factory.newFeatureMap();
        featureMap2.put("rulesURL", this.rulesURL);
        featureMap2.put("encoding", this.encoding);
        featureMap2.put("document", this.tempDoc);
        featureMap2.put("annotationSetName", this.annotationSetName);
        try {
            this.tokeniser = (SimpleTokeniser)Factory.createResource("gate.creole.tokeniser.SimpleTokeniser", featureMap2, featureMap);
        }
        catch (ResourceInstantiationException resourceInstantiationException) {
            throw new ExecutionException("Instance of SimpleTokeniser cannot be created");
        }
        this.tokeniser.execute();
        if (this.annotationSetName == null || this.annotationSetName.length() == 0) {
            annotationSet2 = this.tempDoc.getAnnotations();
            annotationSet = this.document.getAnnotations();
        } else {
            annotationSet2 = this.tempDoc.getAnnotations(this.annotationSetName);
            annotationSet = this.document.getAnnotations(this.annotationSetName);
        }
        ArrayList<Annotation> arrayList = new ArrayList<Annotation>(annotationSet2.get());
        OffsetComparator offsetComparator = new OffsetComparator();
        Collections.sort(arrayList, offsetComparator);
        Iterator iterator = arrayList.iterator();
        long[] lArray2 = lArray = this.runSegmenter != false ? new long[((ArrayList)object).size()] : null;
        if (lArray != null) {
            for (int i = 0; i < ((ArrayList)object).size(); ++i) {
                lArray[i] = (Long)((ArrayList)object).get(i);
            }
            Arrays.sort(lArray);
        }
        while (iterator.hasNext()) {
            Object object2;
            int n;
            Annotation annotation = (Annotation)iterator.next();
            long l = annotation.getStartNode().getOffset();
            long l2 = annotation.getEndNode().getOffset();
            int n2 = n = lArray == null ? -1 : Arrays.binarySearch(lArray, l);
            if (n >= 0) {
                if (!this.generateSpaceTokens.booleanValue()) continue;
                try {
                    object2 = Factory.newFeatureMap();
                    object2.put("kind", "ChineseSplit");
                    annotationSet.add(new Long(l - (long)n), new Long(l - (long)n), "SpaceToken", (FeatureMap)object2);
                    continue;
                }
                catch (InvalidOffsetException invalidOffsetException) {
                    throw new ExecutionException("Offset Error");
                }
            }
            n = Math.abs(n) - 1;
            object2 = annotation.getType();
            FeatureMap featureMap3 = annotation.getFeatures();
            try {
                annotationSet.add(new Long(l - (long)n), new Long(l2 - (long)n), (String)object2, featureMap3);
            }
            catch (InvalidOffsetException invalidOffsetException) {
                throw new ExecutionException("Problem with the invalid offset while adding annotationsto the original document");
            }
        }
        Factory.deleteResource(this.tempDoc);
        this.fireProcessFinished();
    }

    public void setRunSegmenter(Boolean bl) {
        this.runSegmenter = bl;
    }

    public Boolean getRunSegmenter() {
        return this.runSegmenter;
    }

    public void setGenerateSpaceTokens(Boolean bl) {
        this.generateSpaceTokens = bl;
    }

    public Boolean getGenerateSpaceTokens() {
        return this.generateSpaceTokens;
    }

    public void setDocument(Document document) {
        this.document = document;
    }

    public Document getDocument() {
        return this.document;
    }

    public void setEncoding(String string) {
        this.encoding = string;
    }

    public String getEncoding() {
        return this.encoding;
    }

    public void setRulesURL(URL uRL) {
        this.rulesURL = uRL;
    }

    public URL getRulesURL() {
        return this.rulesURL;
    }

    public void setAnnotationSetName(String string) {
        this.annotationSetName = string;
    }

    public String getAnnotationSetName() {
        return this.annotationSetName;
    }
}

