/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.IntArrayList;
import java.util.ArrayList;
import org.carrot2.attrs.AttrBoolean;
import org.carrot2.attrs.AttrComposite;
import org.carrot2.attrs.AttrInteger;
import org.carrot2.language.TokenTypeUtils;
import org.carrot2.text.preprocessing.LabelFilterProcessor;
import org.carrot2.text.preprocessing.PreprocessingContext;

public class DocumentAssigner
extends AttrComposite {
    public AttrBoolean exactPhraseAssignment;
    public AttrInteger minClusterSize;

    public DocumentAssigner() {
        this.exactPhraseAssignment = this.attributes.register("exactPhraseAssignment", AttrBoolean.builder().label("Exact phrase assignment").defaultValue(false));
        this.minClusterSize = this.attributes.register("minClusterSize", AttrInteger.builder().label("Minimum cluster size").min(1).max(100).defaultValue(2));
    }

    void assign(PreprocessingContext context) {
        int[] labelsFeatureIndex = context.allLabels.featureIndex;
        int[][] stemsTfByDocument = context.allStems.tfByDocument;
        int[] wordsStemIndex = context.allWords.stemIndex;
        short[] wordsTypes = context.allWords.type;
        int[][] phrasesTfByDocument = context.allPhrases.tfByDocument;
        int[][] phrasesWordIndices = context.allPhrases.wordIndices;
        int wordCount = wordsStemIndex.length;
        int documentCount = context.documentCount;
        BitSet[] labelsDocumentIndices = new BitSet[labelsFeatureIndex.length];
        for (int i = 0; i < labelsFeatureIndex.length; ++i) {
            BitSet documentIndices = new BitSet((long)documentCount);
            int featureIndex = labelsFeatureIndex[i];
            if (featureIndex < wordCount) {
                DocumentAssigner.addTfByDocumentToBitSet(documentIndices, stemsTfByDocument[wordsStemIndex[featureIndex]]);
            } else {
                int phraseIndex = featureIndex - wordCount;
                if (((Boolean)this.exactPhraseAssignment.get()).booleanValue()) {
                    DocumentAssigner.addTfByDocumentToBitSet(documentIndices, phrasesTfByDocument[phraseIndex]);
                } else {
                    int[] wordIndices = phrasesWordIndices[phraseIndex];
                    boolean firstAdded = false;
                    for (int j = 0; j < wordIndices.length; ++j) {
                        int wordIndex = wordIndices[j];
                        if (TokenTypeUtils.isCommon(wordsTypes[wordIndex])) continue;
                        if (!firstAdded) {
                            DocumentAssigner.addTfByDocumentToBitSet(documentIndices, stemsTfByDocument[wordsStemIndex[wordIndex]]);
                            firstAdded = true;
                            continue;
                        }
                        BitSet temp = new BitSet((long)documentCount);
                        DocumentAssigner.addTfByDocumentToBitSet(temp, stemsTfByDocument[wordsStemIndex[wordIndex]]);
                        documentIndices.and(temp);
                    }
                }
            }
            labelsDocumentIndices[i] = documentIndices;
        }
        int minClusterSize = (Integer)this.minClusterSize.get();
        if (minClusterSize > 1) {
            IntArrayList newFeatureIndex = new IntArrayList(labelsFeatureIndex.length);
            ArrayList<BitSet> newDocumentIndices = new ArrayList<BitSet>(labelsFeatureIndex.length);
            for (int i = 0; i < labelsFeatureIndex.length; ++i) {
                if (labelsDocumentIndices[i].cardinality() < (long)minClusterSize) continue;
                newFeatureIndex.add(labelsFeatureIndex[i]);
                newDocumentIndices.add(labelsDocumentIndices[i]);
            }
            context.allLabels.documentIndices = newDocumentIndices.toArray(new BitSet[0]);
            context.allLabels.featureIndex = newFeatureIndex.toArray();
            LabelFilterProcessor.updateFirstPhraseIndex(context);
        } else {
            context.allLabels.documentIndices = labelsDocumentIndices;
        }
    }

    private static void addTfByDocumentToBitSet(BitSet documentIndices, int[] tfByDocument) {
        for (int j = 0; j < tfByDocument.length / 2; ++j) {
            documentIndices.set((long)tfByDocument[j * 2]);
        }
    }
}

