/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.formats.ontonotes;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.StringUtil;

public class OntoNotesNameSampleStream
extends FilterObjectStream<String, NameSample> {
    private static final String TAG_DOC_OPEN = "<DOC";
    private static final String TAG_DOC_CLOSE = "</DOC>";
    private static final String TAG_ENAMEX_OPEN = "<ENAMEX";
    private static final String TAG_ENAMEX_CLOSE = "</ENAMEX>";
    private static final String TYPE = "TYPE=\"";
    private static final String SYMBOL_CLOSE = ">";
    private static final String SYMBOL_OPEN = "<";
    private final Map<String, String> tokenConversionMap;
    private final List<NameSample> nameSamples = new LinkedList<NameSample>();

    public OntoNotesNameSampleStream(ObjectStream<String> samples) {
        super(samples);
        HashMap<String, String> tokenConversionMap = new HashMap<String, String>();
        tokenConversionMap.put("-LRB-", "(");
        tokenConversionMap.put("-RRB-", ")");
        tokenConversionMap.put("-LSB-", "[");
        tokenConversionMap.put("-RSB-", "]");
        tokenConversionMap.put("-LCB-", "{");
        tokenConversionMap.put("-RCB-", "}");
        tokenConversionMap.put("-AMP-", "&");
        this.tokenConversionMap = Collections.unmodifiableMap(tokenConversionMap);
    }

    private String convertToken(String token) {
        String cleanedToken;
        StringBuilder convertedToken = new StringBuilder(token);
        int startTagEndIndex = convertedToken.indexOf(SYMBOL_CLOSE);
        if (token.contains("=\"") && startTagEndIndex != -1) {
            convertedToken.delete(0, startTagEndIndex + 1);
        }
        int endTagBeginIndex = convertedToken.indexOf(SYMBOL_OPEN);
        int endTagEndIndex = convertedToken.indexOf(SYMBOL_CLOSE);
        if (endTagBeginIndex != -1 && endTagEndIndex != -1) {
            convertedToken.delete(endTagBeginIndex, endTagEndIndex + 1);
        }
        if (this.tokenConversionMap.get(cleanedToken = convertedToken.toString()) != null) {
            cleanedToken = this.tokenConversionMap.get(cleanedToken);
        }
        return cleanedToken;
    }

    public NameSample read() throws IOException {
        String doc;
        if (this.nameSamples.isEmpty() && (doc = (String)this.samples.read()) != null) {
            boolean clearAdaptiveData = true;
            try (BufferedReader docIn = new BufferedReader(new StringReader(doc));){
                String line;
                while ((line = docIn.readLine()) != null) {
                    if (line.startsWith(TAG_DOC_OPEN)) continue;
                    if (line.equals(TAG_DOC_CLOSE)) {
                        break;
                    }
                    String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(line);
                    LinkedList<Span> entities = new LinkedList<Span>();
                    ArrayList<String> cleanedTokens = new ArrayList<String>(tokens.length);
                    int tokenIndex = 0;
                    int entityBeginIndex = -1;
                    String entityType = null;
                    boolean insideStartEnmaxTag = false;
                    for (String token : tokens) {
                        if (token.startsWith(TAG_ENAMEX_OPEN)) {
                            insideStartEnmaxTag = true;
                            continue;
                        }
                        if (insideStartEnmaxTag) {
                            String typeBegin = TYPE;
                            if (token.startsWith(typeBegin)) {
                                int typeEnd = token.indexOf("\"", typeBegin.length());
                                entityType = StringUtil.toLowerCase((CharSequence)token.substring(typeBegin.length(), typeEnd));
                            }
                            if (!token.contains(SYMBOL_CLOSE)) continue;
                            entityBeginIndex = tokenIndex;
                            insideStartEnmaxTag = false;
                        }
                        if (token.endsWith(TAG_ENAMEX_CLOSE)) {
                            entities.add(new Span(entityBeginIndex, tokenIndex + 1, entityType));
                            entityBeginIndex = -1;
                        }
                        cleanedTokens.add(this.convertToken(token));
                        ++tokenIndex;
                    }
                    this.nameSamples.add(new NameSample(cleanedTokens.toArray(new String[0]), entities.toArray(new Span[0]), clearAdaptiveData));
                    clearAdaptiveData = false;
                }
            }
        }
        if (!this.nameSamples.isEmpty()) {
            return this.nameSamples.remove(0);
        }
        return null;
    }
}

