/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.sting.gatk.walkers.variantutils;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;

public class SelectVariants
extends RodWalker<Integer, Integer>
implements TreeReducible<Integer> {
    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
    @Input(fullName="discordance", shortName="disc", doc="Output variants that were not called in this comparison track", required=false)
    private RodBinding<VariantContext> discordanceTrack;
    @Input(fullName="concordance", shortName="conc", doc="Output variants that were also called in this comparison track", required=false)
    private RodBinding<VariantContext> concordanceTrack;
    @Output(doc="File to which variants should be written", required=true)
    protected VCFWriter vcfWriter = null;
    @Argument(fullName="sample_name", shortName="sn", doc="Include genotypes from this sample. Can be specified multiple times", required=false)
    public Set<String> sampleNames = new HashSet<String>(0);
    @Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false)
    public Set<String> sampleExpressions;
    @Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false)
    public Set<File> sampleFiles;
    @Argument(fullName="exclude_sample_name", shortName="xl_sn", doc="Exclude genotypes from this sample. Can be specified multiple times", required=false)
    public Set<String> XLsampleNames = new HashSet<String>(0);
    @Input(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false)
    public Set<File> XLsampleFiles = new HashSet<File>(0);
    @Argument(shortName="select", doc="One or more criteria to use when selecting the data", required=false)
    public ArrayList<String> SELECT_EXPRESSIONS = new ArrayList();
    @Argument(fullName="excludeNonVariants", shortName="env", doc="Don't include loci found to be non-variant after the subsetting procedure", required=false)
    private boolean EXCLUDE_NON_VARIANTS = false;
    @Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci in the analysis", required=false)
    private boolean EXCLUDE_FILTERED = false;
    @Argument(fullName="restrictAllelesTo", shortName="restrictAllelesTo", doc="Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC", required=false)
    private NumberAlleleRestriction alleleRestriction = NumberAlleleRestriction.ALL;
    @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false)
    private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
    @Argument(fullName="mendelianViolation", shortName="mv", doc="output mendelian violation sites only", required=false)
    private Boolean MENDELIAN_VIOLATIONS = false;
    @Argument(fullName="mendelianViolationQualThreshold", shortName="mvq", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
    private double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 0.0;
    @Argument(fullName="select_random_number", shortName="number", doc="Selects a number of variants at random from the variant track", required=false)
    private int numRandom = 0;
    @Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
    private double fractionRandom = 0.0;
    @Argument(fullName="remove_fraction_genotypes", shortName="fractionGenotypes", doc="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall", required=false)
    private double fractionGenotypes = 0.0;
    @Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
    private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
    @Argument(fullName="keepIDs", shortName="IDs", doc="Only emit sites whose ID is found in this file (one ID per line)", required=false)
    private File rsIDFile = null;
    @Hidden
    @Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false)
    private String outMVFile = null;
    private ArrayList<VariantContext.Type> selectedTypes = new ArrayList();
    private ArrayList<String> selectNames = new ArrayList();
    private List<VariantContextUtils.JexlVCMatchExp> jexls = null;
    private TreeSet<String> samples = new TreeSet();
    private boolean NO_SAMPLES_SPECIFIED = false;
    private boolean DISCORDANCE_ONLY = false;
    private boolean CONCORDANCE_ONLY = false;
    private MendelianViolation mv;
    private boolean SELECT_RANDOM_NUMBER = false;
    private boolean SELECT_RANDOM_FRACTION = false;
    private int variantNumber = 0;
    private int nVariantsAdded = 0;
    private int positionToAdd = 0;
    private RandomVariantStructure[] variantArray;
    private PrintStream outMVFileStream = null;
    private Random randomGenotypes = new Random();
    private Set<String> IDsToKeep = null;

    @Override
    public void initialize() {
        List<String> rodNames = Arrays.asList(this.variantCollection.variants.getName());
        Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(this.getToolkit(), rodNames);
        TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
        Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(this.sampleFiles);
        Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, this.sampleExpressions);
        this.samples.addAll(samplesFromFile);
        this.samples.addAll(samplesFromExpressions);
        this.samples.addAll(this.sampleNames);
        if (this.samples.isEmpty()) {
            this.samples.addAll(vcfSamples);
            this.NO_SAMPLES_SPECIFIED = true;
        }
        Collection<String> XLsamplesFromFile = SampleUtils.getSamplesFromFiles(this.XLsampleFiles);
        this.samples.removeAll(XLsamplesFromFile);
        this.samples.removeAll(this.XLsampleNames);
        if (this.samples.size() == 0 && !this.NO_SAMPLES_SPECIFIED) {
            throw new UserException("All samples requested to be included were also requested to be excluded.");
        }
        for (String sample : this.samples) {
            logger.info("Including sample '" + sample + "'");
        }
        if (this.TYPES_TO_INCLUDE.isEmpty()) {
            for (VariantContext.Type t : VariantContext.Type.values()) {
                this.selectedTypes.add(t);
            }
        } else {
            for (VariantContext.Type t : this.TYPES_TO_INCLUDE) {
                this.selectedTypes.add(t);
            }
        }
        Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
        headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
        if (this.KEEP_ORIGINAL_CHR_COUNTS) {
            headerLines.add(new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC"));
            headerLines.add(new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
            headerLines.add(new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
        }
        this.vcfWriter.writeHeader(new VCFHeader(headerLines, this.samples));
        for (int i = 0; i < this.SELECT_EXPRESSIONS.size(); ++i) {
            this.selectNames.add(String.format("select-%d", i));
        }
        this.jexls = VariantContextUtils.initializeMatchExps(this.selectNames, this.SELECT_EXPRESSIONS);
        this.DISCORDANCE_ONLY = this.discordanceTrack.isBound();
        if (this.DISCORDANCE_ONLY) {
            logger.info("Selecting only variants discordant with the track: " + this.discordanceTrack.getName());
        }
        this.CONCORDANCE_ONLY = this.concordanceTrack.isBound();
        if (this.CONCORDANCE_ONLY) {
            logger.info("Selecting only variants concordant with the track: " + this.concordanceTrack.getName());
        }
        if (this.MENDELIAN_VIOLATIONS.booleanValue()) {
            this.mv = new MendelianViolation(this.MENDELIAN_VIOLATION_QUAL_THRESHOLD, false, true);
        }
        boolean bl = this.SELECT_RANDOM_NUMBER = this.numRandom > 0;
        if (this.SELECT_RANDOM_NUMBER) {
            logger.info("Selecting " + this.numRandom + " variants at random from the variant track");
            this.variantArray = new RandomVariantStructure[this.numRandom];
        }
        boolean bl2 = this.SELECT_RANDOM_FRACTION = this.fractionRandom > 0.0;
        if (this.SELECT_RANDOM_FRACTION) {
            logger.info("Selecting approximately " + 100.0 * this.fractionRandom + "% of the variants at random from the variant track");
        }
        if (this.rsIDFile != null) {
            this.IDsToKeep = new HashSet<String>();
            try {
                for (String line : new XReadLines(this.rsIDFile).readLines()) {
                    this.IDsToKeep.add(line.trim());
                }
                logger.info("Selecting only variants with one of " + this.IDsToKeep.size() + " IDs from " + this.rsIDFile);
            }
            catch (FileNotFoundException e) {
                throw new UserException.CouldNotReadInputFile(this.rsIDFile, (Exception)e);
            }
        }
    }

    @Override
    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        if (tracker == null) {
            return 0;
        }
        List<VariantContext> vcs = tracker.getValues(this.variantCollection.variants, context.getLocation());
        if (vcs == null || vcs.size() == 0) {
            return 0;
        }
        for (VariantContext vc : vcs) {
            VariantContext sub;
            List<VariantContext> compVCs;
            if (this.IDsToKeep != null && !this.IDsToKeep.contains(vc.getID())) continue;
            if (this.MENDELIAN_VIOLATIONS.booleanValue() && this.mv.countViolations(this.getSampleDB().getFamilies(this.samples), vc) < 1) break;
            if (this.outMVFile != null) {
                for (String familyId : this.mv.getViolationFamilies()) {
                    for (Sample sample : this.getSampleDB().getFamily(familyId)) {
                        if (sample.getParents().size() <= 0) continue;
                        this.outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, childG=%s childGL=%s\n", vc.getChr(), vc.getStart(), vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)), sample.getMaternalID(), sample.getPaternalID(), sample.getID(), vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(), vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(), vc.getGenotype(sample.getID()).toBriefString(), vc.getGenotype(sample.getID()).getLikelihoods().getAsString());
                    }
                }
            }
            if (this.DISCORDANCE_ONLY && !this.isDiscordant(vc, compVCs = tracker.getValues(this.discordanceTrack, context.getLocation())) || this.CONCORDANCE_ONLY && !this.isConcordant(vc, compVCs = tracker.getValues(this.concordanceTrack, context.getLocation())) || this.alleleRestriction.equals((Object)NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic() || this.alleleRestriction.equals((Object)NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic() || !this.selectedTypes.contains((Object)vc.getType()) || !(sub = this.subsetRecord(vc, this.samples)).isPolymorphicInSamples() && this.EXCLUDE_NON_VARIANTS || sub.isFiltered() && this.EXCLUDE_FILTERED) continue;
            boolean failedJexlMatch = false;
            for (VariantContextUtils.JexlVCMatchExp jexl : this.jexls) {
                if (VariantContextUtils.match(sub, jexl)) continue;
                failedJexlMatch = true;
                break;
            }
            if (failedJexlMatch) continue;
            if (this.SELECT_RANDOM_NUMBER) {
                this.randomlyAddVariant(++this.variantNumber, sub);
                continue;
            }
            if (this.SELECT_RANDOM_FRACTION && !(GenomeAnalysisEngine.getRandomGenerator().nextDouble() < this.fractionRandom)) continue;
            this.vcfWriter.add(sub);
        }
        return 1;
    }

    private boolean isDiscordant(VariantContext vc, Collection<VariantContext> compVCs) {
        if (vc == null) {
            return false;
        }
        if (this.NO_SAMPLES_SPECIFIED) {
            return compVCs == null || compVCs.isEmpty();
        }
        GenotypesContext genotypes = vc.getGenotypes((Set<String>)this.samples);
        for (Genotype g : genotypes) {
            if (!this.sampleHasVariant(g)) continue;
            if (compVCs == null) {
                return true;
            }
            boolean foundVariant = false;
            for (VariantContext compVC : compVCs) {
                if (!this.haveSameGenotypes(g, compVC.getGenotype(g.getSampleName()))) continue;
                foundVariant = true;
                break;
            }
            if (foundVariant) continue;
            return true;
        }
        return false;
    }

    private boolean isConcordant(VariantContext vc, Collection<VariantContext> compVCs) {
        if (vc == null || compVCs == null || compVCs.isEmpty()) {
            return false;
        }
        if (this.NO_SAMPLES_SPECIFIED) {
            return true;
        }
        Set<String> variantSamples = vc.getSampleNames();
        variantSamples.retainAll(this.samples);
        for (String sample : variantSamples) {
            boolean foundSample = false;
            for (VariantContext compVC : compVCs) {
                Genotype compG;
                Genotype varG = vc.getGenotype(sample);
                if (!this.haveSameGenotypes(varG, compG = compVC.getGenotype(sample))) continue;
                foundSample = true;
                break;
            }
            if (foundSample) continue;
            return false;
        }
        return true;
    }

    private boolean sampleHasVariant(Genotype g) {
        return g != null && !g.isHomRef() && (g.isCalled() || g.isFiltered() && !this.EXCLUDE_FILTERED);
    }

    private boolean haveSameGenotypes(Genotype g1, Genotype g2) {
        List<Allele> a2s;
        if (g1.isCalled() && g2.isFiltered() || g2.isCalled() && g1.isFiltered() || g1.isFiltered() && g2.isFiltered() && this.EXCLUDE_FILTERED) {
            return false;
        }
        List<Allele> a1s = g1.getAlleles();
        return a1s.containsAll(a2s = g2.getAlleles()) && a2s.containsAll(a1s);
    }

    @Override
    public Integer reduceInit() {
        return 0;
    }

    @Override
    public Integer reduce(Integer value, Integer sum) {
        return value + sum;
    }

    @Override
    public Integer treeReduce(Integer lhs, Integer rhs) {
        return lhs + rhs;
    }

    @Override
    public void onTraversalDone(Integer result) {
        logger.info(result + " records processed.");
        if (this.SELECT_RANDOM_NUMBER) {
            int positionToPrint = this.positionToAdd;
            for (int i = 0; i < this.numRandom; ++i) {
                this.vcfWriter.add(this.variantArray[positionToPrint].vc);
                positionToPrint = this.nextCircularPosition(positionToPrint);
            }
        }
    }

    private VariantContext subsetRecord(VariantContext vc, Set<String> samples) {
        if (samples == null || samples.isEmpty()) {
            return vc;
        }
        VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles());
        VariantContextBuilder builder = new VariantContextBuilder(sub);
        GenotypesContext newGC = sub.getGenotypes();
        if (vc.getAlleles().size() != sub.getAlleles().size()) {
            newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
        }
        if (this.fractionGenotypes > 0.0) {
            ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
            for (Genotype genotype : newGC) {
                if (this.fractionGenotypes > 0.0 && this.randomGenotypes.nextDouble() < this.fractionGenotypes) {
                    ArrayList<Allele> alleles = new ArrayList<Allele>(2);
                    alleles.add(Allele.create((byte)46));
                    alleles.add(Allele.create((byte)46));
                    genotypes.add(new Genotype(genotype.getSampleName(), alleles, 1.0, genotype.getFilters(), new HashMap<String, Object>(), false));
                    continue;
                }
                genotypes.add(genotype);
            }
            newGC = GenotypesContext.create(genotypes);
        }
        builder.genotypes(newGC);
        int depth = 0;
        for (String sample : sub.getSampleNames()) {
            String dp;
            Genotype g = sub.getGenotype(sample);
            if (!g.isNotFiltered() || (dp = (String)g.getAttribute("DP")) == null || dp.equals("-1") || dp.equals(".")) continue;
            depth += Integer.valueOf(dp).intValue();
        }
        if (this.KEEP_ORIGINAL_CHR_COUNTS) {
            if (sub.hasAttribute("AC")) {
                builder.attribute("AC_Orig", sub.getAttribute("AC"));
            }
            if (sub.hasAttribute("AF")) {
                builder.attribute("AF_Orig", sub.getAttribute("AF"));
            }
            if (sub.hasAttribute("AN")) {
                builder.attribute("AN_Orig", sub.getAttribute("AN"));
            }
        }
        VariantContextUtils.calculateChromosomeCounts(builder, false);
        builder.attribute("DP", depth);
        return builder.make();
    }

    private void randomlyAddVariant(int rank, VariantContext vc) {
        if (this.nVariantsAdded < this.numRandom) {
            this.variantArray[this.nVariantsAdded++] = new RandomVariantStructure(vc);
        } else {
            double t;
            double v = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
            if (v < (t = 1.0 / (double)(rank - this.numRandom + 1))) {
                this.variantArray[this.positionToAdd].set(vc);
                ++this.nVariantsAdded;
                this.positionToAdd = this.nextCircularPosition(this.positionToAdd);
            }
        }
    }

    private int nextCircularPosition(int cur) {
        if (cur + 1 == this.variantArray.length) {
            return 0;
        }
        return cur + 1;
    }

    public static enum NumberAlleleRestriction {
        ALL,
        BIALLELIC,
        MULTIALLELIC;

    }

    private class RandomVariantStructure {
        private VariantContext vc;

        RandomVariantStructure(VariantContext vcP) {
            this.vc = vcP;
        }

        public void set(VariantContext vcP) {
            this.vc = vcP;
        }
    }
}

