/*
 * Decompiled with CFR 0.152.
 */
package edu.msu.cme.rdp.alignment.pairwise;

import edu.msu.cme.rdp.alignment.AlignmentMode;
import edu.msu.cme.rdp.alignment.pairwise.PairwiseAligner;
import edu.msu.cme.rdp.alignment.pairwise.PairwiseAlignment;
import edu.msu.cme.rdp.alignment.pairwise.ScoringMatrix;
import edu.msu.cme.rdp.alignment.pairwise.rna.DistanceModel;
import edu.msu.cme.rdp.alignment.pairwise.rna.IdentityDistanceModel;
import edu.msu.cme.rdp.alignment.pairwise.rna.JukesCantorModel;
import edu.msu.cme.rdp.alignment.pairwise.rna.Kimura2ParameterModel;
import edu.msu.cme.rdp.alignment.pairwise.rna.OverlapCheckFailedException;
import edu.msu.cme.rdp.alignment.pairwise.rna.UncorrectedDistanceModel;
import edu.msu.cme.rdp.readseq.SequenceType;
import edu.msu.cme.rdp.readseq.readers.Sequence;
import edu.msu.cme.rdp.readseq.readers.SequenceReader;
import edu.msu.cme.rdp.readseq.utils.IUBUtilities;
import edu.msu.cme.rdp.readseq.utils.SeqUtils;
import edu.msu.cme.rdp.readseq.utils.kmermatch.KmerMatchCore;
import edu.msu.cme.rdp.readseq.utils.kmermatch.NuclSeqMatch;
import edu.msu.cme.rdp.readseq.utils.kmermatch.ProteinSeqMatch;
import edu.msu.cme.rdp.readseq.utils.orientation.OrientationChecker;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;

public class PairwiseKNN {
    private final File refFile;
    private final int k;
    private final int prefilter;
    private int wordSize;
    private final String gene;
    private final AlignmentMode mode;
    private final DistanceModel distModel;
    private final HashMap<String, Sequence> dbSeqsMap = new HashMap();
    private final ScoringMatrix matrix;
    private KmerMatchCore kerMatchCore;
    private static final String dformat = "%1$.3f";
    private final Comparator comparator;
    private final SequenceType refSeqType;
    public static final String Gene_16S = "16S";
    public static final String jukescantor = "jukescantor";
    public static final String kimura = "kimura";
    public static final String uncorrected = "uncorrected";
    public static final String identity = "identity";

    public PairwiseKNN(File refFile, String gene, AlignmentMode mode, DistanceModel dist, int k, int ws, int prefilter, Comparator c) throws IOException {
        this.refFile = refFile;
        this.gene = gene;
        this.mode = mode;
        this.k = k;
        this.prefilter = prefilter;
        this.wordSize = ws;
        this.comparator = c;
        this.distModel = dist;
        this.refSeqType = SeqUtils.guessSequenceType((File)refFile);
        this.parseRefSeq(refFile);
        if (this.refSeqType == SequenceType.Protein) {
            this.matrix = ScoringMatrix.getDefaultProteinMatrix();
            if (this.wordSize == 0) {
                this.wordSize = 4;
            }
            if (prefilter > 0) {
                this.kerMatchCore = new ProteinSeqMatch(new ArrayList<Sequence>(this.dbSeqsMap.values()), this.wordSize);
            }
        } else {
            this.matrix = ScoringMatrix.getDefaultNuclMatrix();
            if (this.wordSize == 0) {
                this.wordSize = 8;
            }
            if (prefilter > 0) {
                this.kerMatchCore = new NuclSeqMatch(new ArrayList<Sequence>(this.dbSeqsMap.values()), this.wordSize);
            }
        }
    }

    public PairwiseKNN(File refFile, AlignmentMode mode, DistanceModel dist, int k, int ws, int prefilter, Comparator c) throws IOException {
        this(refFile, Gene_16S, mode, dist, k, ws, prefilter, c);
    }

    private synchronized void parseRefSeq(File file) throws IOException {
        Sequence seq;
        SequenceReader reader = new SequenceReader(file);
        while ((seq = reader.readNextSequence()) != null) {
            this.dbSeqsMap.put(seq.getSeqName(), seq);
        }
        reader.close();
    }

    public Sequence getRefSeq(String seqName) {
        return this.dbSeqsMap.get(seqName);
    }

    public String getRefFilename() {
        return this.refFile.getName();
    }

    public int getK() {
        return this.k;
    }

    public List<Neighbor> getKNN(Sequence seq, Collection<Sequence> refList, boolean removeBaseN, boolean isSeqReversed, boolean checkReverse) throws IOException, OverlapCheckFailedException {
        TreeSet<Neighbor> orderedResults = new TreeSet<Neighbor>(this.comparator);
        if (removeBaseN) {
            Sequence temp;
            seq = temp = new Sequence(seq.getSeqName(), seq.getDesc(), seq.getSeqString().toUpperCase().replace("N", ""));
        }
        for (Sequence dbSeq : refList) {
            Neighbor n = new Neighbor();
            n.dbSeq = dbSeq;
            PairwiseAlignment fwd = PairwiseAligner.align(n.dbSeq.getSeqString(), seq.getSeqString(), this.matrix, this.mode);
            if (this.refSeqType == SequenceType.Nucleotide && checkReverse) {
                PairwiseAlignment rc = PairwiseAligner.align(n.dbSeq.getSeqString(), IUBUtilities.reverseComplement((String)seq.getSeqString()), this.matrix, this.mode);
                if (rc.getScore() > fwd.getScore()) {
                    n.alignment = rc;
                    n.reverse = !isSeqReversed;
                } else {
                    n.alignment = fwd;
                    n.reverse = isSeqReversed;
                }
            } else {
                n.alignment = fwd;
                n.reverse = isSeqReversed;
            }
            double ident = 0.0;
            ident = this.refSeqType == SequenceType.Nucleotide ? 1.0 - this.distModel.getDistance(SeqUtils.toBytes((String)n.alignment.getAlignedSeqi()), SeqUtils.toBytes((String)n.alignment.getAlignedSeqj()), 0) : 1.0 - this.distModel.getDistance(n.alignment.getAlignedSeqi().getBytes(), n.alignment.getAlignedSeqj().getBytes(), 0);
            n.alignment.setIdent(ident);
            orderedResults.add(n);
        }
        ArrayList<Neighbor> ret = new ArrayList<Neighbor>();
        int uniqk = 0;
        double prevIdent = 100.0;
        for (Neighbor h : orderedResults) {
            if (h.alignment.getIdent() < prevIdent) {
                ++uniqk;
                prevIdent = h.alignment.getIdent();
            }
            if (uniqk > this.k) break;
            ret.add(h);
        }
        return ret;
    }

    public List<Neighbor> findMatch(Sequence seq, boolean removeBaseN) throws IOException, OverlapCheckFailedException {
        boolean isReversed = false;
        if (this.gene.equals(Gene_16S) && (isReversed = OrientationChecker.getChecker().isSeqReversed(seq.getSeqString()))) {
            seq = new Sequence(seq.getSeqName(), seq.getDesc(), IUBUtilities.reverseComplement((String)seq.getSeqString()));
        }
        if (this.prefilter == 0) {
            return this.getKNN(seq, this.dbSeqsMap.values(), removeBaseN, isReversed, true);
        }
        ArrayList<Sequence> refList = new ArrayList<Sequence>();
        ArrayList topKMatches = this.kerMatchCore.findTopKMatch(seq, this.prefilter);
        for (KmerMatchCore.BestMatch bestTarget : topKMatches) {
            refList.add(bestTarget.getBestMatch());
        }
        if (this.gene.equals(Gene_16S)) {
            return this.getKNN(seq, refList, removeBaseN, isReversed, false);
        }
        Sequence revSeq = new Sequence(seq.getSeqName(), seq.getDesc(), IUBUtilities.reverseComplement((String)seq.getSeqString()));
        ArrayList topRevKMatches = this.kerMatchCore.findTopKMatch(revSeq, this.prefilter);
        for (KmerMatchCore.BestMatch bestTarget : topRevKMatches) {
            refList.add(bestTarget.getBestMatch());
        }
        return this.getKNN(seq, refList, removeBaseN, isReversed, true);
    }

    private synchronized void printAlignment(Sequence seq, List<Neighbor> alignments, PrintStream out) throws IOException {
        for (int index = 0; index < alignments.size(); ++index) {
            Neighbor n = alignments.get(index);
            PairwiseAlignment alignment = n.alignment;
            out.println("@" + seq.getSeqName() + "\t" + (index + 1) + "\t" + (n.reverse ? "-" : "+") + "\t" + alignment.getScore() + "\t" + String.format(dformat, alignment.getIdent()) + "\t" + alignment.getStartj() + "\t" + alignment.getEndj() + "\t" + seq.getSeqString().length() + "\t" + alignment.getStarti() + "\t" + alignment.getEndi() + "\t" + n.dbSeq.getSeqName() + "\t" + n.dbSeq.getDesc());
            out.println(">" + alignment.getAlignedSeqj());
            out.println(">" + alignment.getAlignedSeqi());
        }
    }

    public static void main(String[] args) throws Exception {
        Sequence seq;
        File refFile;
        File queryFile;
        boolean removeBaseN;
        PrintStream out;
        int maxThreads;
        int maxTasks = 1000;
        AlignmentMode mode = AlignmentMode.glocal;
        IdentityDistanceModel protDistmodel = new IdentityDistanceModel();
        DistanceModel dnaDistmodel = new UncorrectedDistanceModel();
        int k = 1;
        int wordSize = 0;
        int prefilter = 10;
        IdentityComparator comparator = new IdentityComparator();
        String gene = Gene_16S;
        Options options = new Options();
        options.addOption("g", "gene", true, "Only fixed the orientation for 16S gene.(default is 16S)");
        options.addOption("m", "mode", true, "Alignment mode {global, glocal, local, overlap, overlap_trim} (default= glocal)");
        options.addOption("k", true, "K-nearest neighbors to return. (default = 1)");
        options.addOption("o", "out", true, "Redirect output to file instead of stdout");
        options.addOption("p", "prefilter", true, "The top p closest targets from kmer prefilter step. Set p=0 to disable the prefilter step. (default = 10) ");
        options.addOption("w", "word-size", true, "The word size used to find closest targets during prefilter. (default 4 for protein, 8 for nucleotide)");
        options.addOption("n", false, "Remove Ns from the query. Default is false");
        options.addOption("t", "threads", true, "#Threads to use. This process is CPU intensive. (default 1)");
        options.addOption("d", "distanceModel", true, "DNA distance model, jukescantor, kimura, uncorrected, identity. identity will count Ns as mismatches. (default uncorrected)");
        try {
            CommandLine line = new PosixParser().parse(options, args);
            if (line.hasOption("threads")) {
                maxThreads = Integer.valueOf(line.getOptionValue("threads"));
                if (maxThreads >= Runtime.getRuntime().availableProcessors()) {
                    System.err.println(" Runtime.getRuntime().availableProcessors() " + Runtime.getRuntime().availableProcessors());
                }
            } else {
                maxThreads = 1;
            }
            if (line.hasOption("gene")) {
                gene = line.getOptionValue("gene");
            }
            if (line.hasOption("mode")) {
                mode = AlignmentMode.valueOf(line.getOptionValue("mode"));
            }
            if (line.hasOption("distanceModel")) {
                String distStr = line.getOptionValue("distanceModel");
                if (distStr.equals(jukescantor)) {
                    dnaDistmodel = new JukesCantorModel();
                } else if (distStr.equals(uncorrected)) {
                    dnaDistmodel = new UncorrectedDistanceModel();
                } else if (distStr.equals(kimura)) {
                    dnaDistmodel = new Kimura2ParameterModel();
                } else if (distStr.equals(identity)) {
                    dnaDistmodel = new IdentityDistanceModel(true);
                } else {
                    throw new Exception(distStr + " is not available. distance model must be jukescantor, kimura, uncorrected or identity.");
                }
            }
            if (line.hasOption('k') && (k = Integer.valueOf(line.getOptionValue('k')).intValue()) < 1) {
                throw new Exception("k must be at least 1");
            }
            if (line.hasOption("word-size") && (wordSize = Integer.parseInt(line.getOptionValue("word-size"))) < 3) {
                throw new Exception("Word size must be at least 3");
            }
            if (line.hasOption("prefilter") && (prefilter = Integer.parseInt(line.getOptionValue("prefilter"))) > 0 && prefilter < k) {
                throw new Exception("prefilter must be at least as big as k " + k);
            }
            out = line.hasOption("out") ? new PrintStream(line.getOptionValue("out")) : new PrintStream(System.out);
            removeBaseN = line.hasOption('n');
            args = line.getArgs();
            if (args.length != 2) {
                throw new Exception("Unexpected number of command line arguments");
            }
            queryFile = new File(args[0]);
            refFile = new File(args[1]);
        }
        catch (Exception e) {
            new HelpFormatter().printHelp("PairwiseKNN <options> <queryFile> <dbFile>", options);
            System.err.println("ERROR: " + e.getMessage());
            return;
        }
        SequenceType querySeqType = SeqUtils.guessSequenceType((File)queryFile);
        SequenceType refSeqType = SeqUtils.guessSequenceType((File)refFile);
        if (querySeqType != refSeqType) {
            throw new RuntimeException("reference seqs and query seqs must be the same type, either protein or nucleotide. ");
        }
        if (refSeqType == SequenceType.Protein && gene.equals(Gene_16S)) {
            gene = "NA";
        }
        DistanceModel distModel = refSeqType == SequenceType.Protein ? protDistmodel : dnaDistmodel;
        final PairwiseKNN theObj = new PairwiseKNN(refFile, gene, mode, distModel, k, wordSize, prefilter, comparator);
        final AtomicInteger outstandingTasks = new AtomicInteger();
        ExecutorService service = Executors.newFixedThreadPool(maxThreads);
        out.println("#query file: " + queryFile.getName() + " db file: " + refFile.getName() + " k: " + k + " mode: " + (Object)((Object)mode) + " usePrefilter: " + prefilter);
        out.println("#seqname\tk\torientation\tscore\tident\tquery_start\tquery_end\tquery_length\tref_start\tref_end\tref_seqid\tref_desc");
        SequenceReader queryReader = new SequenceReader(queryFile);
        while ((seq = queryReader.readNextSequence()) != null) {
            if (seq.getSeqString().length() < 8) continue;
            final Sequence threadSeq = seq;
            Runnable r = new Runnable(){

                @Override
                public void run() {
                    try {
                        List<Neighbor> alignments = theObj.findMatch(threadSeq, removeBaseN);
                        theObj.printAlignment(threadSeq, alignments, out);
                        outstandingTasks.decrementAndGet();
                    }
                    catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            };
            outstandingTasks.incrementAndGet();
            service.submit(r);
            while (outstandingTasks.get() >= 1000) {
            }
        }
        service.shutdown();
        service.awaitTermination(1L, TimeUnit.DAYS);
        queryReader.close();
        out.close();
    }

    public static class IdentityComparator
    implements Comparator<Neighbor> {
        @Override
        public int compare(Neighbor t, Neighbor t1) {
            if (t.alignment.getIdent() == t1.alignment.getIdent()) {
                if (t1.alignment.getScore() == t.alignment.getScore()) {
                    return t1.dbSeq.getSeqName().compareTo(t.dbSeq.getSeqName());
                }
                return t1.alignment.getScore() - t.alignment.getScore();
            }
            return t.alignment.getIdent() - t1.alignment.getIdent() > 0.0 ? -1 : 1;
        }
    }

    public static class ScoreComparator
    implements Comparator<Neighbor> {
        @Override
        public int compare(Neighbor t, Neighbor t1) {
            return t.alignment.getScore() - t1.alignment.getScore();
        }
    }

    public static class Neighbor {
        PairwiseAlignment alignment;
        boolean reverse;
        Sequence dbSeq;

        public boolean isReverse() {
            return this.reverse;
        }

        public PairwiseAlignment getAlignment() {
            return this.alignment;
        }

        public Sequence getDbSeq() {
            return this.dbSeq;
        }
    }
}

