package org.apache.ctakes.utils.wiki;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.ctakes.utils.struct.CounterMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/* loaded from: input_file:WEB-INF/lib/ctakes-utils-3.2.2.jar:org/apache/ctakes/utils/wiki/WikiIndex.class */
public class WikiIndex {
    public static int defaultMaxHits = 10;
    public static String defaultIndexPath = "/home/dima/i2b2/wiki-index/index_nometa";
    public static String defaultSearchField = "text";
    private int maxHits;
    private String indexPath;
    private String searchField;
    private IndexReader indexReader;
    private IndexSearcher indexSearcher;
    private Analyzer standardAnalyzer;
    private QueryParser queryParser;
    private DefaultSimilarity similarity;
    private int numDocs;
    private boolean useCache;
    private Cache lastQuery;

    public WikiIndex(int i, String str, String str2, boolean z) {
        this.useCache = true;
        this.lastQuery = null;
        this.maxHits = i;
        this.indexPath = str;
        this.searchField = str2;
        this.similarity = z ? new ApproximateSimilarity() : new DefaultSimilarity();
    }

    public WikiIndex(int i, String str, String str2) {
        this(i, str, str2, false);
    }

    public WikiIndex() {
        this.useCache = true;
        this.lastQuery = null;
        this.maxHits = defaultMaxHits;
        this.indexPath = defaultIndexPath;
        this.searchField = defaultSearchField;
    }

    public void initialize() throws CorruptIndexException, IOException {
        this.indexReader = IndexReader.open(FSDirectory.open(new File(this.indexPath)));
        this.numDocs = this.indexReader.numDocs();
        this.indexSearcher = new IndexSearcher(this.indexReader);
        this.standardAnalyzer = new StandardAnalyzer(Version.LUCENE_40);
        this.queryParser = new QueryParser(Version.LUCENE_40, this.searchField, this.standardAnalyzer);
        this.lastQuery = new Cache();
    }

    public ArrayList<SearchResult> search(String str) throws ParseException, IOException {
        ArrayList<SearchResult> arrayList = new ArrayList<>();
        for (ScoreDoc scoreDoc : this.indexSearcher.search(this.queryParser.parse(QueryParser.escape(str)), (Filter) null, this.maxHits).scoreDocs) {
            ScoreDoc handlePossibleRedirect = handlePossibleRedirect(scoreDoc);
            arrayList.add(new SearchResult(this.indexSearcher.doc(handlePossibleRedirect.doc).get("title"), handlePossibleRedirect.score));
        }
        return arrayList;
    }

    public double getCosineSimilarity(String str, String str2) throws ParseException, IOException {
        HashMap<String, Double> makeTfIdfVector;
        HashMap<String, Double> makeTfIdfVector2;
        if (this.useCache && this.lastQuery.t1 != null && this.lastQuery.t1.equals(str)) {
            makeTfIdfVector = this.lastQuery.v1;
        } else if (this.useCache && this.lastQuery.t2 != null && this.lastQuery.t2.equals(str)) {
            makeTfIdfVector = this.lastQuery.v2;
        } else {
            ArrayList<Terms> termFreqVectors = getTermFreqVectors(str);
            if (termFreqVectors.size() == 0) {
                return 0.0d;
            }
            makeTfIdfVector = makeTfIdfVector(termFreqVectors);
        }
        if (makeTfIdfVector.size() == 0) {
            return 0.0d;
        }
        if (this.useCache && this.lastQuery.t1 != null && this.lastQuery.t1.equals(str2)) {
            makeTfIdfVector2 = this.lastQuery.v1;
        } else if (this.useCache && this.lastQuery.t2 != null && this.lastQuery.t2.equals(str2)) {
            makeTfIdfVector2 = this.lastQuery.v2;
        } else {
            ArrayList<Terms> termFreqVectors2 = getTermFreqVectors(str2);
            if (termFreqVectors2.size() == 0) {
                return 0.0d;
            }
            makeTfIdfVector2 = makeTfIdfVector(termFreqVectors2);
        }
        if (makeTfIdfVector2.size() == 0) {
            return 0.0d;
        }
        if (this.useCache) {
            this.lastQuery.t1 = str;
            this.lastQuery.v1 = makeTfIdfVector;
            this.lastQuery.t2 = str2;
            this.lastQuery.v2 = makeTfIdfVector2;
        }
        return computeDotProduct(makeTfIdfVector, makeTfIdfVector2) / (computeEuclideanNorm(makeTfIdfVector) * computeEuclideanNorm(makeTfIdfVector2));
    }

    public ArrayList<Terms> getTermFreqVectors(String str) throws ParseException, IOException {
        ScoreDoc[] scoreDocArr = this.indexSearcher.search(this.queryParser.parse(QueryParser.escape(str)), this.maxHits).scoreDocs;
        ArrayList<Terms> arrayList = new ArrayList<>();
        for (ScoreDoc scoreDoc : scoreDocArr) {
            arrayList.add(this.indexReader.getTermVector(handlePossibleRedirect(scoreDoc).doc, "text"));
        }
        return arrayList;
    }

    private ScoreDoc handlePossibleRedirect(ScoreDoc scoreDoc) throws ParseException, CorruptIndexException, IOException {
        String str = this.indexSearcher.doc(scoreDoc.doc).get("redirect");
        if (str == null) {
            return scoreDoc;
        }
        ScoreDoc[] scoreDocArr = this.indexSearcher.search(new QueryParser(Version.LUCENE_30, "title", this.standardAnalyzer).parse(QueryParser.escape('\"' + str.replaceAll("_", " ") + '\"')), (Filter) null, 1).scoreDocs;
        if (scoreDocArr.length >= 1) {
            return scoreDocArr[0];
        }
        System.out.println("failed redirect: " + str + " -> " + str);
        return scoreDoc;
    }

    private HashMap<String, Double> makeTfIdfVector(ArrayList<Terms> arrayList) throws IOException {
        CounterMap counterMap = new CounterMap();
        HashMap<String, Double> hashMap = new HashMap<>();
        Iterator<Terms> it = arrayList.iterator();
        while (it.hasNext()) {
            Terms next = it.next();
            if (next != null) {
                TermsEnum it2 = next.iterator(null);
                while (it2.next() != null) {
                    counterMap.add(it2.term().utf8ToString());
                }
                Iterator it3 = counterMap.keySet().iterator();
                while (it3.hasNext()) {
                    hashMap.put((String) it3.next(), Double.valueOf(this.similarity.tf(counterMap.get((Object) r0).intValue()) * this.similarity.idf(this.indexReader.docFreq(new Term("text", r0)), this.numDocs)));
                }
            }
        }
        return hashMap;
    }

    private double computeEuclideanNorm(HashMap<String, Double> hashMap) {
        double d = 0.0d;
        Iterator<Double> it = hashMap.values().iterator();
        while (it.hasNext()) {
            double doubleValue = it.next().doubleValue();
            d += doubleValue * doubleValue;
        }
        return ApproximateMath.asqrt(d);
    }

    private double computeDotProduct(HashMap<String, Double> hashMap, HashMap<String, Double> hashMap2) {
        HashMap<String, Double> hashMap3;
        HashMap<String, Double> hashMap4;
        double d = 0.0d;
        if (hashMap.size() > hashMap2.size()) {
            hashMap3 = hashMap2;
            hashMap4 = hashMap;
        } else {
            hashMap3 = hashMap;
            hashMap4 = hashMap2;
        }
        for (String str : hashMap3.keySet()) {
            if (hashMap4.containsKey(str)) {
                d += hashMap3.get(str).doubleValue() * hashMap4.get(str).doubleValue();
            }
        }
        return d;
    }

    private HashMap<String, Double> addVectors(HashMap<String, Double> hashMap, HashMap<String, Double> hashMap2) {
        HashMap<String, Double> hashMap3;
        HashMap<String, Double> hashMap4;
        HashMap<String, Double> hashMap5 = new HashMap<>();
        if (hashMap.size() > hashMap2.size()) {
            hashMap3 = hashMap2;
            hashMap4 = hashMap;
        } else {
            hashMap3 = hashMap;
            hashMap4 = hashMap2;
        }
        for (String str : hashMap3.keySet()) {
            if (hashMap4.containsKey(str)) {
                hashMap5.put(str, Double.valueOf(hashMap3.get(str).doubleValue() + hashMap4.get(str).doubleValue()));
            }
        }
        return hashMap5;
    }

    public void close() throws IOException {
        this.indexReader.close();
        this.standardAnalyzer.close();
    }
}
