/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.gate;

import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.DefaultSentenceSplitter;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitter;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.DefaultWordTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.morphadorner.gate.MorphAdornerGateWrapperBase;
import edu.northwestern.at.utils.Formatters;
import gate.AnnotationSet;
import gate.DocumentContent;
import gate.FeatureMap;
import gate.Resource;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.util.GateRuntimeException;
import gate.util.SimpleFeatureMapImpl;
import java.util.ArrayList;
import java.util.List;

public class TokenizerGateWrapper
extends MorphAdornerGateWrapperBase {
    protected SentenceSplitter sentenceSplitter;
    protected WordTokenizer tokenizer;

    @Override
    public Resource init() throws ResourceInstantiationException {
        this.commonInit();
        this.tokenizer = new DefaultWordTokenizer();
        this.sentenceSplitter = new DefaultSentenceSplitter();
        this.sentenceSplitter.setPartOfSpeechGuesser(this.guesser);
        return super.init();
    }

    @Override
    public void execute() throws ExecutionException {
        try {
            AnnotationSet inputAS;
            if (this.document == null) {
                throw new GateRuntimeException("No document to process!");
            }
            String content = this.document.getContent().toString();
            DocumentContent docContent = this.document.getContent();
            long contLen = this.document.getContent().size();
            this.fireStatusChanged("Tokenizing " + this.document.getName());
            this.fireProgressChanged(0);
            List<List<String>> sentences = this.sentenceSplitter.extractSentences(content, this.tokenizer);
            int[] sentenceOffsets = this.sentenceSplitter.findSentenceOffsets(content, sentences);
            this.fireStatusChanged("Extracted " + Formatters.formatIntegerWithCommas(sentences.size()) + " sentences");
            this.fireProgressChanged(0);
            AnnotationSet annotationSet = inputAS = this.inputASName == null ? this.document.getAnnotations() : this.document.getAnnotations(this.inputASName);
            if (this.outputASName != null && this.outputASName.length() == 0) {
                this.outputASName = null;
            }
            AnnotationSet outputAS = this.outputASName == null ? this.document.getAnnotations() : this.document.getAnnotations(this.outputASName);
            for (int sentenceNumber = 0; sentenceNumber < sentences.size(); ++sentenceNumber) {
                long sentenceStart = sentenceOffsets[sentenceNumber];
                long sentenceEnd = sentenceOffsets[sentenceNumber + 1];
                DocumentContent sentenceContent = docContent.getContent(Long.valueOf(sentenceStart), Long.valueOf(sentenceEnd));
                String sentenceText = sentenceContent.toString();
                List<String> sentenceTokens = sentences.get(sentenceNumber);
                int[] tokenOffsets = this.tokenizer.findWordOffsets(sentenceText, sentenceTokens);
                ArrayList<TokenAnnotation> annotationSpans = new ArrayList<TokenAnnotation>();
                for (int tokenNumber = 0; tokenNumber < sentenceTokens.size(); ++tokenNumber) {
                    long tokenStart = tokenOffsets[tokenNumber];
                    long tokenEnd = tokenStart + (long)sentenceTokens.get(tokenNumber).length();
                    TokenAnnotation tokenAnnotation = new TokenAnnotation();
                    tokenAnnotation.start = tokenStart + sentenceStart;
                    tokenAnnotation.end = tokenEnd + sentenceStart;
                    tokenAnnotation.string = sentenceTokens.get(tokenNumber);
                    annotationSpans.add(tokenAnnotation);
                }
                for (TokenAnnotation span : annotationSpans) {
                    SimpleFeatureMapImpl tokenFeats = new SimpleFeatureMapImpl();
                    tokenFeats.put((Object)"string", (Object)span.string);
                    tokenFeats.put((Object)"category", (Object)"");
                    outputAS.add(Long.valueOf(span.start), Long.valueOf(span.end), this.baseTokenAnnotationType, (FeatureMap)tokenFeats);
                }
                if (sentenceEnd > sentenceStart) {
                    SimpleFeatureMapImpl sentFeats = new SimpleFeatureMapImpl();
                    outputAS.add(Long.valueOf(sentenceStart), Long.valueOf(sentenceEnd), this.baseSentenceAnnotationType, (FeatureMap)sentFeats);
                }
                this.fireStatusChanged("Added sentence " + sentenceNumber);
                this.fireProgressChanged(0);
            }
        }
        catch (Exception e) {
            throw new ExecutionException((Throwable)e);
        }
    }

    class TokenAnnotation {
        long start;
        long end;
        String string;

        TokenAnnotation() {
        }
    }
}

