/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;

import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.TrainingSet;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantDatum;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;

public class VariantDataManager {
    private ExpandingArrayList<VariantDatum> data = null;
    private final double[] meanVector;
    private final double[] varianceVector;
    public final List<String> annotationKeys;
    private final VariantRecalibratorArgumentCollection VRAC;
    protected static final Logger logger = Logger.getLogger(VariantDataManager.class);
    protected final List<TrainingSet> trainingSets;

    public VariantDataManager(List<String> annotationKeys, VariantRecalibratorArgumentCollection VRAC) {
        this.annotationKeys = new ArrayList<String>(annotationKeys);
        this.VRAC = VRAC;
        this.meanVector = new double[this.annotationKeys.size()];
        this.varianceVector = new double[this.annotationKeys.size()];
        this.trainingSets = new ArrayList<TrainingSet>();
    }

    public void setData(ExpandingArrayList<VariantDatum> data) {
        this.data = data;
    }

    public ExpandingArrayList<VariantDatum> getData() {
        return this.data;
    }

    public void normalizeData() {
        boolean foundZeroVarianceAnnotation = false;
        for (int iii = 0; iii < this.meanVector.length; ++iii) {
            double theMean = this.mean(iii);
            double theSTD = this.standardDeviation(theMean, iii);
            logger.info(this.annotationKeys.get(iii) + String.format(": \t mean = %.2f\t standard deviation = %.2f", theMean, theSTD));
            if (Double.isNaN(theMean)) {
                throw new UserException.BadInput("Values for " + this.annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See http://www.broadinstitute.org/gsa/wiki/index.php/VariantAnnotator");
            }
            foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || theSTD < 1.0E-6;
            this.meanVector[iii] = theMean;
            this.varianceVector[iii] = theSTD;
            for (VariantDatum datum : this.data) {
                datum.annotations[iii] = datum.isNull[iii] ? GenomeAnalysisEngine.getRandomGenerator().nextGaussian() : (datum.annotations[iii] - theMean) / theSTD;
            }
        }
        if (foundZeroVarianceAnnotation) {
            throw new UserException.BadInput("Found annotations with zero variance. They must be excluded before proceeding.");
        }
        for (VariantDatum datum : this.data) {
            boolean remove = false;
            for (double val : datum.annotations) {
                remove = remove || Math.abs(val) > this.VRAC.STD_THRESHOLD;
            }
            datum.failingSTDThreshold = remove;
        }
    }

    public void addTrainingSet(TrainingSet trainingSet) {
        this.trainingSets.add(trainingSet);
    }

    public boolean checkHasTrainingSet() {
        for (TrainingSet trainingSet : this.trainingSets) {
            if (!trainingSet.isTraining) continue;
            return true;
        }
        return false;
    }

    public boolean checkHasTruthSet() {
        for (TrainingSet trainingSet : this.trainingSets) {
            if (!trainingSet.isTruth) continue;
            return true;
        }
        return false;
    }

    public boolean checkHasKnownSet() {
        for (TrainingSet trainingSet : this.trainingSets) {
            if (!trainingSet.isKnown) continue;
            return true;
        }
        return false;
    }

    public ExpandingArrayList<VariantDatum> getTrainingData() {
        ExpandingArrayList<VariantDatum> trainingData = new ExpandingArrayList<VariantDatum>();
        for (VariantDatum datum : this.data) {
            if (!datum.atTrainingSite || datum.failingSTDThreshold || !(datum.originalQual > this.VRAC.QUAL_THRESHOLD)) continue;
            trainingData.add(datum);
        }
        logger.info("Training with " + trainingData.size() + " variants after standard deviation thresholding.");
        if (trainingData.size() < this.VRAC.MIN_NUM_BAD_VARIANTS) {
            logger.warn("WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable.");
        }
        return trainingData;
    }

    public ExpandingArrayList<VariantDatum> selectWorstVariants(double bottomPercentage, int minimumNumber) {
        ExpandingArrayList<VariantDatum> trainingData = new ExpandingArrayList<VariantDatum>();
        for (VariantDatum datum : this.data) {
            if (!datum.atAntiTrainingSite || datum.failingSTDThreshold || Double.isInfinite(datum.lod)) continue;
            trainingData.add(datum);
        }
        int numBadSitesAdded = trainingData.size();
        logger.info("Found " + numBadSitesAdded + " variants overlapping bad sites training tracks.");
        Collections.sort(this.data);
        int numToAdd = Math.max(minimumNumber - trainingData.size(), Math.round((float)bottomPercentage * (float)this.data.size()));
        if (numToAdd > this.data.size()) {
            throw new UserException.BadInput("Error during negative model training. Minimum number of variants to use in training is larger than the whole call set. One can attempt to lower the --minNumBadVariants arugment but this is unsafe.");
        }
        if (numToAdd == minimumNumber - trainingData.size()) {
            logger.warn("WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable.");
            bottomPercentage = (float)numToAdd / (float)this.data.size();
        }
        int index = 0;
        int numAdded = 0;
        while (numAdded < numToAdd && index < this.data.size()) {
            VariantDatum datum;
            if ((datum = this.data.get(index++)) == null || datum.atAntiTrainingSite || datum.failingSTDThreshold || Double.isInfinite(datum.lod)) continue;
            datum.atAntiTrainingSite = true;
            trainingData.add(datum);
            ++numAdded;
        }
        logger.info("Additionally training with worst " + String.format("%.3f", Float.valueOf((float)bottomPercentage * 100.0f)) + "% of passing data --> " + (trainingData.size() - numBadSitesAdded) + " variants with LOD <= " + String.format("%.4f", this.data.get((int)index).lod) + ".");
        return trainingData;
    }

    public ExpandingArrayList<VariantDatum> getRandomDataForPlotting(int numToAdd) {
        VariantDatum datum;
        int iii;
        numToAdd = Math.min(numToAdd, this.data.size());
        ExpandingArrayList<VariantDatum> returnData = new ExpandingArrayList<VariantDatum>();
        for (iii = 0; iii < numToAdd; ++iii) {
            datum = this.data.get(GenomeAnalysisEngine.getRandomGenerator().nextInt(this.data.size()));
            if (datum.failingSTDThreshold) continue;
            returnData.add(datum);
        }
        iii = 0;
        while ((double)iii < Math.floor(0.05 * (double)numToAdd)) {
            datum = this.data.get(GenomeAnalysisEngine.getRandomGenerator().nextInt(this.data.size()));
            if (datum.atAntiTrainingSite && !datum.failingSTDThreshold) {
                returnData.add(datum);
            } else {
                --iii;
            }
            ++iii;
        }
        return returnData;
    }

    private double mean(int index) {
        double sum = 0.0;
        int numNonNull = 0;
        for (VariantDatum datum : this.data) {
            if (!datum.atTrainingSite || datum.isNull[index]) continue;
            sum += datum.annotations[index];
            ++numNonNull;
        }
        return sum / (double)numNonNull;
    }

    private double standardDeviation(double mean, int index) {
        double sum = 0.0;
        int numNonNull = 0;
        for (VariantDatum datum : this.data) {
            if (!datum.atTrainingSite || datum.isNull[index]) continue;
            sum += (datum.annotations[index] - mean) * (datum.annotations[index] - mean);
            ++numNonNull;
        }
        return Math.sqrt(sum / (double)numNonNull);
    }

    public void decodeAnnotations(VariantDatum datum, VariantContext vc, boolean jitter) {
        double[] annotations = new double[this.annotationKeys.size()];
        boolean[] isNull = new boolean[this.annotationKeys.size()];
        int iii = 0;
        for (String key : this.annotationKeys) {
            isNull[iii] = false;
            annotations[iii] = VariantDataManager.decodeAnnotation(key, vc, jitter);
            if (Double.isNaN(annotations[iii])) {
                isNull[iii] = true;
            }
            ++iii;
        }
        datum.annotations = annotations;
        datum.isNull = isNull;
    }

    private static double decodeAnnotation(String annotationKey, VariantContext vc, boolean jitter) {
        double value;
        try {
            int eventLength;
            value = Double.parseDouble((String)vc.getAttribute(annotationKey));
            if (Double.isInfinite(value)) {
                value = Double.NaN;
            }
            if (jitter && annotationKey.equalsIgnoreCase("HRUN")) {
                value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
            }
            if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD") && (eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length())) > 0) {
                value /= (double)eventLength;
            }
            if (jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 1.0E-4) == 0) {
                value = -0.2 + 0.4 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
            }
            if (jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0) {
                value = -0.2 + 0.4 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
            }
        }
        catch (Exception e) {
            value = Double.NaN;
        }
        return value;
    }

    public void parseTrainingSets(RefMetaDataTracker tracker, GenomeLoc genomeLoc, VariantContext evalVC, VariantDatum datum, boolean TRUST_ALL_POLYMORPHIC) {
        datum.isKnown = false;
        datum.atTruthSite = false;
        datum.atTrainingSite = false;
        datum.atAntiTrainingSite = false;
        datum.prior = 2.0;
        for (TrainingSet trainingSet : this.trainingSets) {
            for (VariantContext trainVC : tracker.getValues(trainingSet.rodBinding, genomeLoc)) {
                if (this.isValidVariant(evalVC, trainVC, TRUST_ALL_POLYMORPHIC)) {
                    datum.isKnown = datum.isKnown || trainingSet.isKnown;
                    datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth;
                    datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining;
                    datum.prior = Math.max(datum.prior, trainingSet.prior);
                    datum.consensusCount = datum.consensusCount + (trainingSet.isConsensus ? 1 : 0);
                }
                if (trainVC == null) continue;
                datum.atAntiTrainingSite = datum.atAntiTrainingSite || trainingSet.isAntiTraining;
            }
        }
    }

    private boolean isValidVariant(VariantContext evalVC, VariantContext trainVC, boolean TRUST_ALL_POLYMORPHIC) {
        return !(trainVC == null || !trainVC.isNotFiltered() || !trainVC.isVariant() || (!evalVC.isSNP() || !trainVC.isSNP()) && (!evalVC.isIndel() && !evalVC.isMixed() || !trainVC.isIndel() && !trainVC.isMixed()) || !TRUST_ALL_POLYMORPHIC && trainVC.hasGenotypes() && !trainVC.isPolymorphicInSamples());
    }

    public void writeOutRecalibrationTable(PrintStream RECAL_FILE) {
        for (VariantDatum datum : this.data) {
            RECAL_FILE.println(String.format("%s,%d,%d,%.4f,%s", datum.contig, datum.start, datum.stop, datum.lod, datum.worstAnnotation != -1 ? this.annotationKeys.get(datum.worstAnnotation) : "NULL"));
        }
    }
}

