package org.exist.xquery.modules.ngram;

import de.betterform.agent.web.WebProcessor;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.batik.util.XMLConstants;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.xpath.compiler.Keywords;
import org.exist.dom.QName;
import org.exist.dom.persistent.DocumentSet;
import org.exist.dom.persistent.EmptyNodeSet;
import org.exist.dom.persistent.Match;
import org.exist.dom.persistent.NodeProxy;
import org.exist.dom.persistent.NodeSet;
import org.exist.indexing.ngram.NGramIndex;
import org.exist.indexing.ngram.NGramIndexWorker;
import org.exist.xquery.AnalyzeContextInfo;
import org.exist.xquery.Atomize;
import org.exist.xquery.BasicExpressionVisitor;
import org.exist.xquery.Dependency;
import org.exist.xquery.DynamicCardinalityCheck;
import org.exist.xquery.ErrorCodes;
import org.exist.xquery.Expression;
import org.exist.xquery.Function;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.LocationStep;
import org.exist.xquery.NodeTest;
import org.exist.xquery.Optimizable;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.modules.ngram.query.AlternativeStrings;
import org.exist.xquery.modules.ngram.query.EmptyExpression;
import org.exist.xquery.modules.ngram.query.EndAnchor;
import org.exist.xquery.modules.ngram.query.EvaluatableExpression;
import org.exist.xquery.modules.ngram.query.FixedString;
import org.exist.xquery.modules.ngram.query.StartAnchor;
import org.exist.xquery.modules.ngram.query.Wildcard;
import org.exist.xquery.modules.ngram.query.WildcardedExpressionSequence;
import org.exist.xquery.modules.ngram.utils.NodeProxies;
import org.exist.xquery.modules.ngram.utils.NodeSets;
import org.exist.xquery.util.Error;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.FunctionReturnSequenceType;
import org.exist.xquery.value.Item;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.StringValue;
import org.exist.xquery.value.Type;
import org.slf4j.Marker;

/* loaded from: input_file:WEB-INF/lib/exist-index-ngram.jar:org/exist/xquery/modules/ngram/NGramSearch.class */
public class NGramSearch extends Function implements Optimizable {
    private static final String INTERVAL_QUALIFIER_PATTERN = "\\{([0-9]+),([0-9]+)\\}";
    private static final String SEARCH_DESCRIPTION = "Searches the given $queryString in the index defined on the input node set $nodes. String comparison is case insensitive. Nodes need to have an ngram index to be searched.";
    private LocationStep contextStep;
    protected QName contextQName;
    protected int axis;
    private NodeSet preselectResult;
    protected boolean optimizeSelf;
    protected boolean optimizeChild;
    protected static final Logger LOG = LogManager.getLogger((Class<?>) NGramSearch.class);
    private static final String WILDCARD_PATTERN_DESCRIPTION = "The string to search for.A full stop, '.', (not between brackets), without any qualifiers: Matches a single arbitrary character.A full stop, '.', (not between brackets), immediately followed by a single question mark, '?': Matches either no characters or one character.A full stop, '.', (not between brackets), immediately followed by a single asterisk, '*': Matches zero or more characters.A full stop, '.', (not between brackets), immediately followed by a single plus sign, '+': Matches one or more characters.A full stop, '.', immediately followed by a sequence of characters that matches the regular expression {[0-9]+,[0-9]+}: Matches a number of characters, where the number is no less than the number represented by the series of digits before the comma, and no greater than the number represented by the series of digits following the comma.An  expression  \"[…]\"  matches a single character, namely any of the charactersenclosed by the brackets.  The string enclosed by the brackets cannot be empty; therefore ']' can be allowed between  the brackets, provided that it is the first character.(Thus, \"[][?]\" matches the three characters '[', ']' and '?'.)A circumflex accent, '^', at the start of the search string matches the start of the element content.A dollar sign, '$', at the end of the search string matches the end of the element content.One can remove the special meaning of any character mentioned above by preceding them by a backslash.Between brackets these characters stand for themselves.  Thus, \"[[?*\\]\" matchesthe four characters '[', '?', '*' and '\\'.'?', '*', '+' and character sequences matching the regular expression {[0-9]+,[0-9]+} not immediately preceeded by an unescaped period, '.', stand for themselves.'^' and '$' not at the very beginning or end of the search string, respectively, stand for themselves.";
    public static final FunctionSignature[] signatures = {new FunctionSignature(new QName(Keywords.FUNC_CONTAINS_STRING, NGramModule.NAMESPACE_URI, NGramModule.PREFIX), "Similar to the standard XQuery fn:contains function, but based on the NGram index. Searches the given $queryString in the index defined on the input node set $nodes. String comparison is case insensitive. Nodes need to have an ngram index to be searched.The string may appear at any position within the node content.", new SequenceType[]{new FunctionParameterSequenceType("nodes", -1, 7, "The input node set to search"), new FunctionParameterSequenceType(WebProcessor.QUERY_STRING, 22, 3, "The exact string to search for")}, new FunctionReturnSequenceType(-1, 7, "a set of nodes from the input node set $nodes containing the query string or the empty sequence")), new FunctionSignature(new QName("ends-with", NGramModule.NAMESPACE_URI, NGramModule.PREFIX), "Similar to the standard XQuery fn:ends-with function, but based on the NGram index. Searches the given $queryString in the index defined on the input node set $nodes. String comparison is case insensitive. Nodes need to have an ngram index to be searched.The string has to appear at the end of the node's content.", new SequenceType[]{new FunctionParameterSequenceType("nodes", -1, 7, "The input node set to search"), new FunctionParameterSequenceType(WebProcessor.QUERY_STRING, 22, 3, "The exact string to search for")}, new FunctionReturnSequenceType(-1, 7, "a set of nodes from the input node set $nodes ending with the query string or the empty sequence")), new FunctionSignature(new QName(Keywords.FUNC_STARTS_WITH_STRING, NGramModule.NAMESPACE_URI, NGramModule.PREFIX), "Similar to the standard XQuery fn:starts-with function, but based on the NGram index. Searches the given $queryString in the index defined on the input node set $nodes. String comparison is case insensitive. Nodes need to have an ngram index to be searched.The string has to appear at the start of the node's content.", new SequenceType[]{new FunctionParameterSequenceType("nodes", -1, 7, "The input node set to search"), new FunctionParameterSequenceType(WebProcessor.QUERY_STRING, 22, 3, "The exact string to search for")}, new FunctionReturnSequenceType(-1, 7, "a set of nodes from the input node set $nodes starting with the query string or the empty sequence")), new FunctionSignature(new QName("wildcard-contains", NGramModule.NAMESPACE_URI, NGramModule.PREFIX), "Similar to the standard XQuery fn:matches function, but based on the NGram index and allowing wildcards in the query string. Searches the given $queryString in the index defined on the input node set $nodes. String comparison is case insensitive. Nodes need to have an ngram index to be searched.The string has to match the whole node's content.", new SequenceType[]{new FunctionParameterSequenceType("nodes", -1, 7, "The input node set to search"), new FunctionParameterSequenceType(WebProcessor.QUERY_STRING, 22, 3, WILDCARD_PATTERN_DESCRIPTION)}, new FunctionReturnSequenceType(-1, 7, "a set of nodes from the input node set $nodes matching the query string or the empty sequence"))};

    public NGramSearch(XQueryContext xQueryContext, FunctionSignature functionSignature) {
        super(xQueryContext, functionSignature);
        this.contextStep = null;
        this.contextQName = null;
        this.axis = -1;
        this.preselectResult = null;
        this.optimizeSelf = false;
        this.optimizeChild = false;
    }

    @Override // org.exist.xquery.Function
    public void setArguments(List<Expression> list) throws XPathException {
        this.steps.clear();
        this.steps.add(list.get(0));
        Expression dynamicCardinalityCheck = new DynamicCardinalityCheck(this.context, 3, list.get(1), new Error(Error.FUNC_PARAM_CARDINALITY, "2", this.mySignature));
        if (!Type.subTypeOf(dynamicCardinalityCheck.returnsType(), 20)) {
            dynamicCardinalityCheck = new Atomize(this.context, dynamicCardinalityCheck);
        }
        this.steps.add(dynamicCardinalityCheck);
    }

    @Override // org.exist.xquery.Function, org.exist.xquery.PathExpr, org.exist.xquery.Expression
    public void analyze(AnalyzeContextInfo analyzeContextInfo) throws XPathException {
        super.analyze(analyzeContextInfo);
        List<LocationStep> findLocationSteps = BasicExpressionVisitor.findLocationSteps(getArgument(0));
        if (findLocationSteps.isEmpty()) {
            return;
        }
        LocationStep locationStep = findLocationSteps.get(0);
        LocationStep locationStep2 = findLocationSteps.get(findLocationSteps.size() - 1);
        if (locationStep == null || findLocationSteps.size() != 1 || locationStep.getAxis() != 12) {
            if (locationStep2 == null || locationStep == null) {
                return;
            }
            NodeTest test = locationStep2.getTest();
            if (test.isWildcardTest() || test.getName() == null) {
                return;
            }
            if (locationStep2.getAxis() == 6 || locationStep2.getAxis() == 13) {
                this.contextQName = new QName(test.getName(), (byte) 1);
            } else {
                this.contextQName = new QName(test.getName());
            }
            this.axis = locationStep.getAxis();
            this.optimizeChild = findLocationSteps.size() == 1 && (this.axis == 5 || this.axis == 6);
            this.contextStep = locationStep2;
            return;
        }
        Expression contextStep = analyzeContextInfo.getContextStep();
        if (contextStep == null || !(contextStep instanceof LocationStep)) {
            return;
        }
        LocationStep locationStep3 = (LocationStep) contextStep;
        NodeTest test2 = locationStep3.getTest();
        if (test2.isWildcardTest() || test2.getName() == null) {
            return;
        }
        if (locationStep3.getAxis() == 6 || locationStep3.getAxis() == 13) {
            this.contextQName = new QName(test2.getName(), (byte) 1);
        } else {
            this.contextQName = new QName(test2.getName());
        }
        this.contextStep = locationStep;
        this.axis = locationStep3.getAxis();
        this.optimizeSelf = true;
    }

    @Override // org.exist.xquery.Optimizable
    public boolean canOptimize(Sequence sequence) {
        return this.contextQName != null;
    }

    @Override // org.exist.xquery.Optimizable
    public boolean optimizeOnSelf() {
        return this.optimizeSelf;
    }

    @Override // org.exist.xquery.Optimizable
    public boolean optimizeOnChild() {
        return this.optimizeChild;
    }

    @Override // org.exist.xquery.Optimizable
    public int getOptimizeAxis() {
        return this.axis;
    }

    @Override // org.exist.xquery.Optimizable
    public NodeSet preSelect(Sequence sequence, boolean z) throws XPathException {
        this.preselectResult = null;
        long currentTimeMillis = System.currentTimeMillis();
        NGramIndexWorker nGramIndexWorker = (NGramIndexWorker) this.context.getBroker().getIndexController().getWorkerByIndexId(NGramIndex.ID);
        DocumentSet documentSet = sequence.getDocumentSet();
        String stringValue = getArgument(1).eval(sequence).getStringValue();
        ArrayList arrayList = new ArrayList(1);
        arrayList.add(this.contextQName);
        this.preselectResult = processMatches(nGramIndexWorker, documentSet, arrayList, stringValue, z ? sequence.toNodeSet() : null, 1);
        if (this.context.getProfiler().traceFunctions()) {
            this.context.getProfiler().traceIndexUsage(this.context, NGramModule.PREFIX, this, 2, System.currentTimeMillis() - currentTimeMillis);
        }
        return this.preselectResult;
    }

    @Override // org.exist.xquery.Function, org.exist.xquery.PathExpr, org.exist.xquery.AbstractExpression, org.exist.xquery.Expression
    public Sequence eval(Sequence sequence, Item item) throws XPathException {
        NodeSet nodeSet;
        if (item != null) {
            sequence = item.toSequence();
        }
        if (this.preselectResult == null) {
            Sequence eval = getArgument(0).eval(sequence, item);
            if (eval.isEmpty()) {
                nodeSet = NodeSet.EMPTY_SET;
            } else {
                long currentTimeMillis = System.currentTimeMillis();
                NodeSet nodeSet2 = eval.toNodeSet();
                DocumentSet documentSet = nodeSet2.getDocumentSet();
                NGramIndexWorker nGramIndexWorker = (NGramIndexWorker) this.context.getBroker().getIndexController().getWorkerByIndexId(NGramIndex.ID);
                String stringValue = getArgument(1).eval(sequence, item).getStringValue();
                ArrayList arrayList = null;
                if (this.contextQName != null) {
                    arrayList = new ArrayList(1);
                    arrayList.add(this.contextQName);
                }
                nodeSet = processMatches(nGramIndexWorker, documentSet, arrayList, stringValue, nodeSet2, 0);
                if (this.context.getProfiler().traceFunctions()) {
                    this.context.getProfiler().traceIndexUsage(this.context, NGramModule.PREFIX, this, 1, System.currentTimeMillis() - currentTimeMillis);
                }
            }
        } else {
            this.contextStep.setPreloadedData(sequence.getDocumentSet(), this.preselectResult);
            nodeSet = getArgument(0).eval(sequence).toNodeSet();
        }
        return nodeSet;
    }

    private String getLocalName() {
        return getSignature().getName().getLocalPart();
    }

    private NodeSet processMatches(NGramIndexWorker nGramIndexWorker, DocumentSet documentSet, List<QName> list, String str, NodeSet nodeSet, int i) throws XPathException {
        EvaluatableExpression parseQuery = getLocalName().equals("wildcard-contains") ? parseQuery(str) : new FixedString(this, str);
        LOG.debug("Parsed Query: " + parseQuery);
        NodeSet eval = parseQuery.eval(nGramIndexWorker, documentSet, list, nodeSet, i, getExpressionId());
        if (getLocalName().startsWith(Keywords.FUNC_STARTS_WITH_STRING)) {
            eval = NodeSets.getNodesMatchingAtStart(eval, getExpressionId());
        } else if (getLocalName().startsWith("ends-with")) {
            eval = NodeSets.getNodesMatchingAtEnd(eval, getExpressionId());
        }
        return NodeSets.transformNodes(eval, nodeProxy -> {
            return NodeProxies.transformOwnMatches(nodeProxy, (v0) -> {
                return v0.filterOutOverlappingOffsets();
            }, getExpressionId());
        });
    }

    private EvaluatableExpression parseQuery(String str) throws XPathException {
        Wildcard wildcard;
        List<String> list = tokenizeQuery(str);
        LOG.trace("Tokenized query: " + list);
        if (list.isEmpty()) {
            return new EmptyExpression();
        }
        ArrayList arrayList = new ArrayList();
        if (list.get(0).equals("^")) {
            arrayList.add(new StartAnchor());
            list.remove(0);
        }
        if (list.isEmpty()) {
            return new EmptyExpression();
        }
        boolean z = false;
        if (list.get(list.size() - 1).equals("$")) {
            z = true;
            list.remove(list.size() - 1);
        }
        if (list.isEmpty()) {
            return new EmptyExpression();
        }
        for (String str2 : list) {
            if (str2.startsWith(".")) {
                if (str2.length() == 1) {
                    wildcard = new Wildcard(1, 1);
                } else {
                    String substring = str2.substring(1);
                    if (substring.equals("?")) {
                        wildcard = new Wildcard(0, 1);
                    } else if (substring.equals("*")) {
                        wildcard = new Wildcard(0, Integer.MAX_VALUE);
                    } else if (substring.equals(Marker.ANY_NON_NULL_MARKER)) {
                        wildcard = new Wildcard(1, Integer.MAX_VALUE);
                    } else {
                        Matcher matcher = Pattern.compile(INTERVAL_QUALIFIER_PATTERN).matcher(substring);
                        if (!matcher.matches()) {
                            throw new XPathException(this, ErrorCodes.FTDY0020, "query string violates wildcard qualifier syntax");
                        }
                        try {
                            wildcard = new Wildcard(Integer.parseInt(matcher.group(1)), Integer.parseInt(matcher.group(2)));
                        } catch (NumberFormatException e) {
                            throw new XPathException(this, ErrorCodes.FTDY0020, "query string violates wildcard qualifier syntax", new StringValue(str), e);
                        }
                    }
                }
                arrayList.add(wildcard);
            } else if (str2.startsWith("[")) {
                HashSet hashSet = new HashSet(str2.length() - 2);
                for (int i = 1; i < str2.length() - 1; i++) {
                    hashSet.add(Character.toString(str2.charAt(i)));
                }
                arrayList.add(new AlternativeStrings(this, hashSet));
            } else {
                arrayList.add(new FixedString(this, unescape(str2)));
            }
        }
        if (z) {
            arrayList.add(new EndAnchor());
        }
        return new WildcardedExpressionSequence(arrayList);
    }

    private static String unescape(String str) {
        return str.replaceAll("\\\\(.)", "$1");
    }

    private static List<String> tokenizeQuery(String str) throws XPathException {
        ArrayList arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < str.length()) {
            char charAt = str.charAt(i);
            if (charAt == '\\') {
                if (i + 1 >= str.length()) {
                    throw new XPathException("err:FTDY0020: query string is terminated by an unescaped backslash");
                }
                sb.append(str.substring(i, i + 2));
                i++;
            } else if (charAt == '.') {
                int i2 = i;
                if (sb.length() > 0) {
                    arrayList.add(sb.toString());
                    sb = new StringBuilder();
                }
                if (i + 1 < str.length()) {
                    char charAt2 = str.charAt(i + 1);
                    if (charAt2 == '?' || charAt2 == '*' || charAt2 == '+') {
                        i2 = i + 1;
                    }
                    if (charAt2 == '{') {
                        i2 = str.indexOf(125, i + 2);
                        if (i2 == -1) {
                            throw new XPathException("err:FTDY0020: query string violates wildcard syntax: Unmatched qualifier start { in query string; marked by <-- HERE in \"" + str.substring(0, i + 2) + " <-- HERE " + str.substring(i + 2) + XMLConstants.XML_DOUBLE_QUOTE);
                        }
                        if (!str.substring(i + 1, i2 + 1).matches(INTERVAL_QUALIFIER_PATTERN)) {
                            throw new XPathException("err:FTDY0020: query string violates wildcard qualifier syntax;  marked by <-- HERE in \"" + str.substring(0, i2 + 1) + " <-- HERE " + str.substring(i2 + 1) + XMLConstants.XML_DOUBLE_QUOTE);
                        }
                    }
                }
                arrayList.add(str.substring(i, i2 + 1));
                i = i2;
            } else if (charAt == '[') {
                int indexOf = str.indexOf(93, i + 2);
                if (indexOf == -1) {
                    throw new XPathException("err:FTDY0020: query string violates wildcard syntax: Unmatched [ in query string; marked by <-- HERE in \"" + str.substring(0, i + 1) + " <-- HERE " + str.substring(i + 1) + XMLConstants.XML_DOUBLE_QUOTE);
                }
                if (sb.length() > 0) {
                    arrayList.add(sb.toString());
                    sb = new StringBuilder();
                }
                arrayList.add(str.substring(i, indexOf + 1));
                i = indexOf;
            } else if (charAt == '^') {
                if (sb.length() > 0) {
                    arrayList.add(sb.toString());
                    sb = new StringBuilder();
                }
                arrayList.add("^");
            } else if (charAt == '$') {
                if (sb.length() > 0) {
                    arrayList.add(sb.toString());
                    sb = new StringBuilder();
                }
                arrayList.add("$");
            } else {
                sb.append(charAt);
            }
            i++;
        }
        if (sb.length() > 0) {
            arrayList.add(sb.toString());
        }
        return arrayList;
    }

    public NodeSet fixedStringSearch(NGramIndexWorker nGramIndexWorker, DocumentSet documentSet, List<QName> list, String str, NodeSet nodeSet, int i) throws XPathException {
        String[] distinctNGrams = getDistinctNGrams(str, nGramIndexWorker.getN());
        if (distinctNGrams.length == 0) {
            return new EmptyNodeSet();
        }
        String str2 = distinctNGrams[0];
        LOG.trace("First NGRAM: " + str2);
        NodeSet search = nGramIndexWorker.search(getExpressionId(), documentSet, list, str2, str2, this.context, nodeSet, i);
        for (int i2 = 1; i2 < distinctNGrams.length; i2++) {
            String str3 = distinctNGrams[i2];
            int codePointCount = str3.codePointCount(0, str3.length());
            int n = nGramIndexWorker.getN() - codePointCount;
            String str4 = str3;
            if (n > 0) {
                String str5 = distinctNGrams[i2 - 1];
                StringBuilder sb = new StringBuilder();
                int offsetByCodePoints = str5.offsetByCodePoints(0, codePointCount);
                for (int i3 = 0; i3 < n; i3++) {
                    int codePointAt = str5.codePointAt(offsetByCodePoints);
                    offsetByCodePoints += Character.charCount(codePointAt);
                    sb.appendCodePoint(codePointAt);
                }
                sb.append(str3);
                str4 = sb.toString();
                LOG.debug("Filled: " + str4);
            }
            NodeSet search2 = nGramIndexWorker.search(getExpressionId(), documentSet, list, str4, str3, this.context, nodeSet, i);
            NodeSet nodeSet2 = search;
            search = NodeSets.transformNodes(search2, nodeProxy -> {
                return (NodeProxy) Optional.ofNullable(nodeSet2.get(nodeProxy)).map(nodeProxy -> {
                    return getContinuousMatches(nodeProxy, nodeProxy);
                }).orElse(null);
            });
        }
        return search;
    }

    private NodeProxy getContinuousMatches(NodeProxy nodeProxy, NodeProxy nodeProxy2) {
        Match match = null;
        Match matches = nodeProxy.getMatches();
        while (true) {
            Match match2 = matches;
            if (match2 == null || match != null) {
                break;
            }
            Match matches2 = nodeProxy2.getMatches();
            while (true) {
                Match match3 = matches2;
                if (match3 != null && match == null) {
                    match = match2.continuedBy(match3);
                    matches2 = match3.getNextMatch();
                }
            }
            matches = match2.getNextMatch();
        }
        if (match == null) {
            return null;
        }
        NodeProxies.filterMatches(nodeProxy2, match4 -> {
            return match4.getContextId() != getExpressionId();
        });
        nodeProxy2.addMatch(match);
        return nodeProxy2;
    }

    @Override // org.exist.xquery.Function, org.exist.xquery.PathExpr, org.exist.xquery.AbstractExpression, org.exist.xquery.Expression
    public int getDependencies() {
        Expression argument = getArgument(0);
        return (!Type.subTypeOf(argument.returnsType(), -1) || Dependency.dependsOn(argument, 2)) ? 3 : 1;
    }

    @Override // org.exist.xquery.Function, org.exist.xquery.PathExpr, org.exist.xquery.AbstractExpression, org.exist.xquery.Expression
    public int returnsType() {
        return -1;
    }

    private static String[] getDistinctNGrams(String str, int i) {
        int codePointCount = str.codePointCount(0, str.length());
        int i2 = codePointCount / i;
        int i3 = codePointCount % i;
        String[] strArr = new String[i3 > 0 ? i2 + 1 : i2];
        int i4 = 0;
        for (int i5 = 0; i5 < i2; i5++) {
            StringBuilder sb = new StringBuilder(i);
            for (int i6 = 0; i6 < i; i6++) {
                int lowerCase = Character.toLowerCase(str.codePointAt(i4));
                i4 += Character.charCount(lowerCase);
                sb.appendCodePoint(lowerCase);
            }
            strArr[i5] = sb.toString();
        }
        if (i3 > 0) {
            StringBuilder sb2 = new StringBuilder(i3);
            for (int i7 = 0; i7 < i3; i7++) {
                int lowerCase2 = Character.toLowerCase(str.codePointAt(i4));
                i4 += Character.charCount(lowerCase2);
                sb2.appendCodePoint(lowerCase2);
            }
            strArr[i2] = sb2.toString();
        }
        return strArr;
    }
}
