/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.sting.gatk.walkers.recalibration;

import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Gather;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.BAQMode;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
import org.broadinstitute.sting.gatk.walkers.PartitionType;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer;
import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
import org.broadinstitute.sting.gatk.walkers.recalibration.QualityScoreCovariate;
import org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatumOptimized;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.recalibration.RequiredCovariate;
import org.broadinstitute.sting.gatk.walkers.recalibration.StandardCovariate;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

@BAQMode(ApplicationTime=BAQ.ApplicationTime.FORBIDDEN)
@By(value=DataSource.READS)
@ReadFilters(value={MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class})
@Requires(value={DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@PartitionBy(value=PartitionType.LOCUS)
public class CountCovariatesWalker
extends LocusWalker<CountedData, CountedData>
implements TreeReducible<CountedData> {
    private static final String SKIP_RECORD_ATTRIBUTE = "SKIP";
    private static final String SEEN_ATTRIBUTE = "SEEN";
    private static final String COVARS_ATTRIBUTE = "COVARS";
    @ArgumentCollection
    private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
    @Input(fullName="knownSites", shortName="knownSites", doc="A database of known polymorphic sites to skip over in the recalibration algorithm", required=false)
    public List<RodBinding<Feature>> knownSites = Collections.emptyList();
    @Output(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the output covariates table recalibration file")
    @Gather(value=CountCovariatesGatherer.class)
    public PrintStream RECAL_FILE;
    @Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false)
    private boolean LIST_ONLY = false;
    @Argument(fullName="covariate", shortName="cov", doc="Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required=false)
    private String[] COVARIATES = null;
    @Argument(fullName="standard_covs", shortName="standard", doc="Use the standard set of covariates in addition to the ones listed using the -cov argument", required=false)
    private boolean USE_STANDARD_COVARIATES = false;
    @Argument(fullName="dont_sort_output", shortName="unsorted", required=false, doc="If specified, the output table recalibration csv file will be in an unsorted, arbitrary order to save some run time.")
    private boolean DONT_SORT_OUTPUT = false;
    @Argument(fullName="run_without_dbsnp_potentially_ruining_quality", shortName="run_without_dbsnp_potentially_ruining_quality", required=false, doc="If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
    private boolean RUN_WITHOUT_DBSNP = false;
    private final RecalDataManager dataManager = new RecalDataManager();
    private final ArrayList<Covariate> requestedCovariates = new ArrayList();
    private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0;
    private static int DBSNP_VALIDATION_CHECK_FREQUENCY = 1000000;

    @Override
    public void initialize() {
        if (this.RAC.FORCE_PLATFORM != null) {
            this.RAC.DEFAULT_PLATFORM = this.RAC.FORCE_PLATFORM;
        }
        List<Class<Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
        List<Class<RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
        List<Class<StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
        if (this.LIST_ONLY) {
            logger.info("Available covariates:");
            for (Class<Covariate> covClass : covariateClasses) {
                logger.info(covClass.getSimpleName());
            }
            logger.info("");
            System.exit(0);
        }
        if (this.knownSites.isEmpty() && !this.RUN_WITHOUT_DBSNP) {
            throw new UserException.CommandLineException("This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.");
        }
        if (requiredClasses.size() != 2) {
            throw new UserException.CommandLineException("There are more required covariates than expected. The instantiation list needs to be updated with the new required covariate and in the correct order.");
        }
        this.requestedCovariates.add(new ReadGroupCovariate());
        this.requestedCovariates.add(new QualityScoreCovariate());
        if (this.USE_STANDARD_COVARIATES) {
            ArrayList<String> standardClassNames = new ArrayList<String>();
            for (Class<StandardCovariate> covClass : standardClasses) {
                standardClassNames.add(covClass.getName());
            }
            Collections.sort(standardClassNames);
            for (String className : standardClassNames) {
                for (Class<StandardCovariate> covClass : standardClasses) {
                    if (!covClass.getName().equals(className)) continue;
                    try {
                        Covariate covariate = covClass.newInstance();
                        this.requestedCovariates.add(covariate);
                    }
                    catch (Exception e) {
                        throw new DynamicClassResolutionException(covClass, e);
                    }
                }
            }
        }
        if (this.COVARIATES != null) {
            for (String requestedCovariateString : this.COVARIATES) {
                boolean foundClass = false;
                for (Class<Covariate> covClass : covariateClasses) {
                    if (!requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) continue;
                    foundClass = true;
                    if (requiredClasses.contains(covClass) || this.USE_STANDARD_COVARIATES && standardClasses.contains(covClass)) continue;
                    try {
                        Covariate covariate = covClass.newInstance();
                        this.requestedCovariates.add(covariate);
                    }
                    catch (Exception e) {
                        throw new DynamicClassResolutionException(covClass, e);
                    }
                }
                if (foundClass) continue;
                throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
            }
        }
        logger.info("The covariates being used here: ");
        for (Covariate cov : this.requestedCovariates) {
            logger.info("\t" + cov.getClass().getSimpleName());
            cov.initialize(this.RAC);
        }
    }

    @Override
    public CountedData map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        CountedData counter = new CountedData();
        if (tracker.getValues(this.knownSites).size() == 0) {
            for (PileupElement p : context.getBasePileup()) {
                GATKSAMRecord gatkRead = p.getRead();
                int offset = p.getOffset();
                if (gatkRead.containsTemporaryAttribute(SKIP_RECORD_ATTRIBUTE)) continue;
                if (!gatkRead.containsTemporaryAttribute(SEEN_ATTRIBUTE)) {
                    gatkRead.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
                    RecalDataManager.parseSAMRecord(gatkRead, this.RAC);
                    if (this.RAC.SOLID_NOCALL_STRATEGY != RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION && RecalDataManager.checkNoCallColorSpace(gatkRead)) {
                        gatkRead.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
                        continue;
                    }
                    RecalDataManager.parseColorSpace(gatkRead);
                    gatkRead.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(gatkRead, this.requestedCovariates));
                }
                if (gatkRead.getBaseQualities()[offset] <= 0) continue;
                byte[] bases = gatkRead.getReadBases();
                byte refBase = ref.getBase();
                if (!BaseUtils.isRegularBase(bases[offset])) continue;
                if (!gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || this.RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING || !RecalDataManager.isInconsistentColorSpace(gatkRead, offset)) {
                    this.updateDataFromRead(counter, gatkRead, offset, refBase);
                    continue;
                }
                if (refBase == bases[offset]) {
                    counter.solidInsertedReferenceBases++;
                    continue;
                }
                counter.otherColorSpaceInconsistency++;
            }
            counter.countedSites++;
        } else {
            counter.skippedSites++;
            CountCovariatesWalker.updateMismatchCounts(counter, context, ref.getBase());
        }
        return counter;
    }

    private static void updateMismatchCounts(CountedData counter, AlignmentContext context, byte refBase) {
        for (PileupElement p : context.getBasePileup()) {
            byte readBase = p.getBase();
            int readBaseIndex = BaseUtils.simpleBaseToBaseIndex(readBase);
            int refBaseIndex = BaseUtils.simpleBaseToBaseIndex(refBase);
            if (readBaseIndex == -1 || refBaseIndex == -1) continue;
            if (readBaseIndex != refBaseIndex) {
                counter.novelCountsMM++;
            }
            counter.novelCountsBases++;
        }
    }

    private void updateDataFromRead(CountedData counter, GATKSAMRecord gatkRead, int offset, byte refBase) {
        NestedHashMap data = this.dataManager.data;
        Comparable[][] covars = (Comparable[][])gatkRead.getTemporaryAttribute(COVARS_ATTRIBUTE);
        Object[] key = covars[offset];
        RecalDatumOptimized datum = (RecalDatumOptimized)data.get(key);
        if (datum == null) {
            datum = (RecalDatumOptimized)data.put((Object)new RecalDatumOptimized(), true, key);
        }
        byte base = gatkRead.getReadBases()[offset];
        long curMismatches = datum.getNumMismatches();
        datum.incrementBaseCounts(base, refBase);
        counter.countedBases++;
        counter.novelCountsBases++;
        counter.novelCountsMM += datum.getNumMismatches() - curMismatches;
    }

    @Override
    public CountedData reduceInit() {
        return new CountedData();
    }

    @Override
    public CountedData reduce(CountedData mapped, CountedData sum) {
        return this.validatingDbsnpMismatchRate(sum.add(mapped));
    }

    private CountedData validatingDbsnpMismatchRate(CountedData counter) {
        if (++counter.lociSinceLastDbsnpCheck >= DBSNP_VALIDATION_CHECK_FREQUENCY) {
            counter.lociSinceLastDbsnpCheck = 0;
            if (counter.novelCountsBases != 0L && counter.dbSNPCountsBases != 0L) {
                double fractionMM_novel = (double)counter.novelCountsMM / (double)counter.novelCountsBases;
                double fractionMM_dbsnp = (double)counter.dbSNPCountsMM / (double)counter.dbSNPCountsBases;
                if (fractionMM_dbsnp < 2.0 * fractionMM_novel) {
                    Utils.warnUser("The variation rate at the supplied list of known variant sites seems suspiciously low. Please double-check that the correct ROD is being used. " + String.format("[dbSNP variation rate = %.4f, novel variation rate = %.4f]", fractionMM_dbsnp, fractionMM_novel));
                    DBSNP_VALIDATION_CHECK_FREQUENCY *= 2;
                }
            }
        }
        return counter;
    }

    @Override
    public CountedData treeReduce(CountedData sum1, CountedData sum2) {
        return this.validatingDbsnpMismatchRate(sum1.add(sum2));
    }

    @Override
    public void onTraversalDone(CountedData sum) {
        logger.info("Writing raw recalibration data...");
        if (sum.countedBases == 0L) {
            throw new UserException.BadInput("Could not find any usable data in the input BAM file(s).");
        }
        this.outputToCSV(sum, this.RECAL_FILE);
        logger.info("...done!");
    }

    private void outputToCSV(CountedData sum, PrintStream recalTableStream) {
        recalTableStream.printf("# Counted Sites    %d%n", sum.countedSites);
        recalTableStream.printf("# Counted Bases    %d%n", sum.countedBases);
        recalTableStream.printf("# Skipped Sites    %d%n", sum.skippedSites);
        recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double)sum.countedSites / (double)sum.skippedSites);
        if (sum.solidInsertedReferenceBases != 0L) {
            recalTableStream.printf("# Fraction SOLiD inserted reference 1 / %.0f bases%n", (double)sum.countedBases / (double)sum.solidInsertedReferenceBases);
            recalTableStream.printf("# Fraction other color space inconsistencies 1 / %.0f bases%n", (double)sum.countedBases / (double)sum.otherColorSpaceInconsistency);
        }
        for (Covariate cov : this.requestedCovariates) {
            recalTableStream.print(cov.getClass().getSimpleName().split("Covariate")[0] + ",");
        }
        recalTableStream.println("nObservations,nMismatches,Qempirical");
        if (this.DONT_SORT_OUTPUT) {
            this.printMappings(recalTableStream, 0, new Object[this.requestedCovariates.size()], this.dataManager.data.data);
        } else {
            this.printMappingsSorted(recalTableStream, 0, new Object[this.requestedCovariates.size()], this.dataManager.data.data);
        }
        recalTableStream.println("EOF");
    }

    private void printMappingsSorted(PrintStream recalTableStream, int curPos, Object[] key, Map data) {
        ArrayList<Comparable> keyList = new ArrayList<Comparable>();
        for (Object k : data.keySet()) {
            keyList.add((Comparable)k);
        }
        Collections.sort(keyList);
        for (Comparable comparable : keyList) {
            key[curPos] = comparable;
            Object val = data.get(comparable);
            if (val instanceof RecalDatumOptimized) {
                for (Object compToPrint : key) {
                    recalTableStream.print(compToPrint + ",");
                }
                recalTableStream.println(((RecalDatumOptimized)val).outputToCSV());
                continue;
            }
            this.printMappingsSorted(recalTableStream, curPos + 1, key, (Map)val);
        }
    }

    private void printMappings(PrintStream recalTableStream, int curPos, Object[] key, Map data) {
        for (Object comp : data.keySet()) {
            key[curPos] = comp;
            Object val = data.get(comp);
            if (val instanceof RecalDatumOptimized) {
                for (Object compToPrint : key) {
                    recalTableStream.print(compToPrint + ",");
                }
                recalTableStream.println(((RecalDatumOptimized)val).outputToCSV());
                continue;
            }
            this.printMappings(recalTableStream, curPos + 1, key, (Map)val);
        }
    }

    public static class CountedData {
        private long countedSites = 0L;
        private long countedBases = 0L;
        private long skippedSites = 0L;
        private long solidInsertedReferenceBases = 0L;
        private long otherColorSpaceInconsistency = 0L;
        private long dbSNPCountsMM = 0L;
        private long dbSNPCountsBases = 0L;
        private long novelCountsMM = 0L;
        private long novelCountsBases = 0L;
        private int lociSinceLastDbsnpCheck = 0;

        public CountedData add(CountedData other) {
            this.countedSites += other.countedSites;
            this.countedBases += other.countedBases;
            this.skippedSites += other.skippedSites;
            this.solidInsertedReferenceBases += other.solidInsertedReferenceBases;
            this.otherColorSpaceInconsistency += other.otherColorSpaceInconsistency;
            this.dbSNPCountsMM += other.dbSNPCountsMM;
            this.dbSNPCountsBases += other.dbSNPCountsBases;
            this.novelCountsMM += other.novelCountsMM;
            this.novelCountsBases += other.novelCountsBases;
            this.lociSinceLastDbsnpCheck += other.lociSinceLastDbsnpCheck;
            return this;
        }
    }
}

