/*
 * Decompiled with CFR 0.152.
 */
package net.sf.picard.sam;

import java.io.File;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import net.sf.picard.PicardException;
import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.io.IoUtil;
import net.sf.picard.metrics.MetricsFile;
import net.sf.picard.sam.AbstractDuplicateFindingAlgorithm;
import net.sf.picard.sam.DiskReadEndsMap;
import net.sf.picard.sam.DuplicationMetrics;
import net.sf.picard.sam.MergingSamRecordIterator;
import net.sf.picard.sam.ReadEnds;
import net.sf.picard.sam.ReadEndsCodec;
import net.sf.picard.sam.ReservedTagConstants;
import net.sf.picard.sam.SamFileHeaderMerger;
import net.sf.picard.util.Histogram;
import net.sf.picard.util.Log;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileWriterFactory;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import net.sf.samtools.util.SortingCollection;
import net.sf.samtools.util.SortingLongCollection;

public class MarkDuplicates
extends AbstractDuplicateFindingAlgorithm {
    private final Log log = Log.getInstance(MarkDuplicates.class);
    @Usage
    public final String USAGE = "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged.";
    @Option(shortName="I", doc="One or more input SAM or BAM files to analyze.  Must be coordinate sorted.")
    public List<File> INPUT;
    @Option(shortName="O", doc="The output file to right marked records to")
    public File OUTPUT;
    @Option(shortName="M", doc="File to write duplication metrics to")
    public File METRICS_FILE;
    @Option(doc="Comment(s) to include in the output file's header.", optional=true, shortName="CO")
    public List<String> COMMENT = new ArrayList<String>();
    @Option(doc="If true do not write duplicates to the output file instead of writing them with appropriate flags set.")
    public boolean REMOVE_DUPLICATES = false;
    @Option(doc="If true, assume that the input file is coordinate sorted, even if the header says otherwise.", shortName="AS")
    public boolean ASSUME_SORTED = false;
    @Option(doc="This option is obsolete.  ReadEnds will always be spilled to disk.", shortName="MAX_SEQS")
    public int MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP = 50000;
    @Option(doc="Maximum number of file handles to keep open when spilling read ends to disk.  Set this number a little lower than the per-process maximum number of file that may be open.  This number can be found by executing the 'ulimit -n' command on a Unix system.", shortName="MAX_FILE_HANDLES")
    public int MAX_FILE_HANDLES_FOR_READ_ENDS_MAP = 8000;
    @Option(doc="This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections.  If you are running out of memory, try reducing this number.")
    public double SORTING_COLLECTION_SIZE_RATIO = 0.25;
    private SortingCollection<ReadEnds> pairSort;
    private SortingCollection<ReadEnds> fragSort;
    private SortingLongCollection duplicateIndexes;
    private int numDuplicateIndices = 0;
    private final Map<String, Short> libraryIds = new HashMap<String, Short>();
    private short nextLibraryId = 1;
    private final Histogram<Short> opticalDupesByLibraryId = new Histogram();

    public static void main(String[] args) {
        System.exit(new MarkDuplicates().instanceMain(args));
    }

    @Override
    protected int doWork() {
        for (File f : this.INPUT) {
            IoUtil.assertFileIsReadable(f);
        }
        IoUtil.assertFileIsWritable(this.OUTPUT);
        IoUtil.assertFileIsWritable(this.METRICS_FILE);
        this.reportMemoryStats("Start of doWork");
        this.log.info("Reading input file and constructing read end information.");
        this.buildSortedReadEndLists();
        this.reportMemoryStats("After buildSortedReadEndLists");
        this.generateDuplicateIndexes();
        this.reportMemoryStats("After generateDuplicateIndexes");
        this.log.info("Marking " + this.numDuplicateIndices + " records as duplicates.");
        this.log.info("Found " + (long)this.opticalDupesByLibraryId.getSumOfValues() + " optical duplicate clusters.");
        HashMap<String, DuplicationMetrics> metricsByLibrary = new HashMap<String, DuplicationMetrics>();
        SamHeaderAndIterator headerAndIterator = this.openInputs();
        SAMFileHeader header = headerAndIterator.header;
        SAMFileHeader outputHeader = header.clone();
        outputHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        for (String comment : this.COMMENT) {
            outputHeader.addComment(comment);
        }
        SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader, true, this.OUTPUT);
        long recordInFileIndex = 0L;
        long nextDuplicateIndex = this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : -1L;
        for (SAMReadGroupRecord readGroup : header.getReadGroups()) {
            String library = readGroup.getLibrary();
            DuplicationMetrics metrics = (DuplicationMetrics)metricsByLibrary.get(library);
            if (metrics != null) continue;
            metrics = new DuplicationMetrics();
            metrics.LIBRARY = library;
            metricsByLibrary.put(library, metrics);
        }
        long written = 0L;
        CloseableIterator<SAMRecord> iterator = headerAndIterator.iterator;
        while (iterator.hasNext()) {
            SAMRecord rec = (SAMRecord)iterator.next();
            if (!rec.getNotPrimaryAlignmentFlag()) {
                String library = this.getLibraryName(header, rec);
                DuplicationMetrics metrics = (DuplicationMetrics)metricsByLibrary.get(library);
                if (metrics == null) {
                    metrics = new DuplicationMetrics();
                    metrics.LIBRARY = library;
                    metricsByLibrary.put(library, metrics);
                }
                if (rec.getReadUnmappedFlag()) {
                    ++metrics.UNMAPPED_READS;
                } else if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
                    ++metrics.UNPAIRED_READS_EXAMINED;
                } else {
                    ++metrics.READ_PAIRS_EXAMINED;
                }
                if (recordInFileIndex == nextDuplicateIndex) {
                    rec.setDuplicateReadFlag(true);
                    if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
                        ++metrics.UNPAIRED_READ_DUPLICATES;
                    } else {
                        ++metrics.READ_PAIR_DUPLICATES;
                    }
                    nextDuplicateIndex = this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : -1L;
                } else {
                    rec.setDuplicateReadFlag(false);
                }
            }
            ++recordInFileIndex;
            if (this.REMOVE_DUPLICATES && rec.getDuplicateReadFlag()) continue;
            out.addAlignment(rec);
            if (++written % 10000000L != 0L) continue;
            this.log.info("Written " + written + " records.");
        }
        this.duplicateIndexes.cleanup();
        this.reportMemoryStats("Before output close");
        out.close();
        this.reportMemoryStats("After output close");
        MetricsFile file = this.getMetricsFile();
        for (Map.Entry entry : metricsByLibrary.entrySet()) {
            Histogram.Bin bin;
            String libraryName = (String)entry.getKey();
            DuplicationMetrics metrics = (DuplicationMetrics)entry.getValue();
            metrics.READ_PAIRS_EXAMINED /= 2L;
            metrics.READ_PAIR_DUPLICATES /= 2L;
            Short libraryId = this.libraryIds.get(libraryName);
            if (libraryId != null && (bin = (Histogram.Bin)this.opticalDupesByLibraryId.get(libraryId)) != null) {
                metrics.READ_PAIR_OPTICAL_DUPLICATES = (long)bin.getValue();
            }
            metrics.calculateDerivedMetrics();
            file.addMetric(metrics);
        }
        if (metricsByLibrary.size() == 1) {
            file.setHistogram(((DuplicationMetrics)metricsByLibrary.values().iterator().next()).calculateRoiHistogram());
        }
        file.write(this.METRICS_FILE);
        return 0;
    }

    private SamHeaderAndIterator openInputs() {
        ArrayList<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(this.INPUT.size());
        ArrayList<SAMFileReader> readers = new ArrayList<SAMFileReader>(this.INPUT.size());
        for (File f : this.INPUT) {
            SAMFileReader reader = new SAMFileReader(f);
            SAMFileHeader header = reader.getFileHeader();
            if (!this.ASSUME_SORTED && header.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
                throw new PicardException("Input file " + f.getAbsolutePath() + " is not coordinate sorted.");
            }
            headers.add(header);
            readers.add(reader);
        }
        if (headers.size() == 1) {
            return new SamHeaderAndIterator((SAMFileHeader)headers.get(0), ((SAMFileReader)readers.get(0)).iterator());
        }
        SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, false);
        MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, this.ASSUME_SORTED);
        return new SamHeaderAndIterator(headerMerger.getMergedHeader(), iterator);
    }

    private void reportMemoryStats(String stage) {
        System.gc();
        Runtime runtime = Runtime.getRuntime();
        this.log.info(stage + " freeMemory: " + runtime.freeMemory() + "; totalMemory: " + runtime.totalMemory() + "; maxMemory: " + runtime.maxMemory());
    }

    private void buildSortedReadEndLists() {
        int maxInMemory = (int)((double)Runtime.getRuntime().maxMemory() * this.SORTING_COLLECTION_SIZE_RATIO / 63.0);
        this.log.info("Will retain up to " + maxInMemory + " data points before spilling to disk.");
        this.pairSort = SortingCollection.newInstance(ReadEnds.class, new ReadEndsCodec(), new ReadEndsComparator(), maxInMemory, this.TMP_DIR);
        this.fragSort = SortingCollection.newInstance(ReadEnds.class, new ReadEndsCodec(), new ReadEndsComparator(), maxInMemory, this.TMP_DIR);
        SamHeaderAndIterator headerAndIterator = this.openInputs();
        SAMFileHeader header = headerAndIterator.header;
        DiskReadEndsMap tmp = new DiskReadEndsMap(this.MAX_FILE_HANDLES_FOR_READ_ENDS_MAP);
        long index = 0L;
        CloseableIterator<SAMRecord> iterator = headerAndIterator.iterator;
        while (iterator.hasNext()) {
            SAMRecord rec = (SAMRecord)iterator.next();
            if (rec.getReadUnmappedFlag()) {
                if (rec.getReferenceIndex() == -1) {
                    break;
                }
            } else if (!rec.getNotPrimaryAlignmentFlag()) {
                ReadEnds fragmentEnd = this.buildReadEnds(header, index, rec);
                this.fragSort.add(fragmentEnd);
                if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
                    String key = rec.getAttribute(ReservedTagConstants.READ_GROUP_ID) + ":" + rec.getReadName();
                    ReadEnds pairedEnds = tmp.remove(rec.getReferenceIndex(), key);
                    if (pairedEnds == null) {
                        pairedEnds = this.buildReadEnds(header, index, rec);
                        tmp.put(pairedEnds.read2Sequence, key, pairedEnds);
                    } else {
                        int sequence = fragmentEnd.read1Sequence;
                        int coordinate = fragmentEnd.read1Coordinate;
                        if (sequence > pairedEnds.read1Sequence || sequence == pairedEnds.read1Sequence && coordinate >= pairedEnds.read1Coordinate) {
                            pairedEnds.read2Sequence = sequence;
                            pairedEnds.read2Coordinate = coordinate;
                            pairedEnds.read2IndexInFile = index;
                            pairedEnds.orientation = this.getOrientationByte(pairedEnds.orientation == 1, rec.getReadNegativeStrandFlag());
                        } else {
                            pairedEnds.read2Sequence = pairedEnds.read1Sequence;
                            pairedEnds.read2Coordinate = pairedEnds.read1Coordinate;
                            pairedEnds.read2IndexInFile = pairedEnds.read1IndexInFile;
                            pairedEnds.read1Sequence = sequence;
                            pairedEnds.read1Coordinate = coordinate;
                            pairedEnds.read1IndexInFile = index;
                            pairedEnds.orientation = this.getOrientationByte(rec.getReadNegativeStrandFlag(), pairedEnds.orientation == 1);
                        }
                        pairedEnds.score = (short)(pairedEnds.score + this.getScore(rec));
                        this.pairSort.add(pairedEnds);
                    }
                }
            }
            if (++index % 1000000L != 0L) continue;
            this.log.info("Read " + index + " records. Tracking " + tmp.size() + " as yet unmatched pairs. " + tmp.sizeInRam() + " records in RAM.  Last sequence index: " + rec.getReferenceIndex());
        }
        this.log.info("Read " + index + " records. " + tmp.size() + " pairs never matched.");
        iterator.close();
        this.pairSort.doneAdding();
        this.fragSort.doneAdding();
    }

    private ReadEnds buildReadEnds(SAMFileHeader header, long index, SAMRecord rec) {
        ReadEnds ends = new ReadEnds();
        ends.read1Sequence = rec.getReferenceIndex();
        ends.read1Coordinate = rec.getReadNegativeStrandFlag() ? rec.getUnclippedEnd() : rec.getUnclippedStart();
        ends.orientation = rec.getReadNegativeStrandFlag() ? (byte)1 : 0;
        ends.read1IndexInFile = index;
        ends.score = this.getScore(rec);
        if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
            ends.read2Sequence = rec.getMateReferenceIndex();
        }
        ends.libraryId = this.getLibraryId(header, rec);
        if (this.addLocationInformation(rec.getReadName(), ends)) {
            ends.readGroup = 0;
            String rg = (String)rec.getAttribute("RG");
            List<SAMReadGroupRecord> readGroups = header.getReadGroups();
            if (rg != null && readGroups != null) {
                for (SAMReadGroupRecord readGroup : readGroups) {
                    if (readGroup.getReadGroupId().equals(rg)) break;
                    ends.readGroup = (short)(ends.readGroup + 1);
                }
            }
        }
        return ends;
    }

    private short getLibraryId(SAMFileHeader header, SAMRecord rec) {
        String library = this.getLibraryName(header, rec);
        Short libraryId = this.libraryIds.get(library);
        if (libraryId == null) {
            short s = this.nextLibraryId;
            this.nextLibraryId = (short)(s + 1);
            libraryId = s;
            this.libraryIds.put(library, libraryId);
        }
        return libraryId;
    }

    private String getLibraryName(SAMFileHeader header, SAMRecord rec) {
        SAMReadGroupRecord rg;
        String readGroupId = (String)rec.getAttribute("RG");
        if (readGroupId != null && (rg = header.getReadGroup(readGroupId)) != null) {
            return rg.getLibrary();
        }
        return "Unknown Library";
    }

    private byte getOrientationByte(boolean read1NegativeStrand, boolean read2NegativeStrand) {
        if (read1NegativeStrand) {
            if (read2NegativeStrand) {
                return 4;
            }
            return 5;
        }
        if (read2NegativeStrand) {
            return 3;
        }
        return 2;
    }

    private short getScore(SAMRecord rec) {
        short score = 0;
        for (byte b : rec.getBaseQualities()) {
            if (b < 15) continue;
            score = (short)(score + b);
        }
        return score;
    }

    private void generateDuplicateIndexes() {
        int maxInMemory = (int)((double)Runtime.getRuntime().maxMemory() * 0.25 / 8.0);
        this.log.info("Will retain up to " + maxInMemory + " duplicate indices before spilling to disk.");
        this.duplicateIndexes = new SortingLongCollection(maxInMemory, this.TMP_DIR.toArray(new File[this.TMP_DIR.size()]));
        ReadEnds firstOfNextChunk = null;
        ArrayList<ReadEnds> nextChunk = new ArrayList<ReadEnds>(200);
        this.log.info("Traversing read pair information and detecting duplicates.");
        for (ReadEnds next : this.pairSort) {
            if (firstOfNextChunk == null) {
                firstOfNextChunk = next;
                nextChunk.add(firstOfNextChunk);
                continue;
            }
            if (this.areComparableForDuplicates(firstOfNextChunk, next, true)) {
                nextChunk.add(next);
                continue;
            }
            if (nextChunk.size() > 1) {
                this.markDuplicatePairs(nextChunk);
            }
            nextChunk.clear();
            nextChunk.add(next);
            firstOfNextChunk = next;
        }
        this.markDuplicatePairs(nextChunk);
        this.pairSort.cleanup();
        this.pairSort = null;
        this.log.info("Traversing fragment information and detecting duplicates.");
        boolean containsPairs = false;
        boolean containsFrags = false;
        for (ReadEnds next : this.fragSort) {
            if (firstOfNextChunk != null && this.areComparableForDuplicates(firstOfNextChunk, next, false)) {
                nextChunk.add(next);
                containsPairs = containsPairs || next.isPaired();
                containsFrags = containsFrags || !next.isPaired();
                continue;
            }
            if (nextChunk.size() > 1 && containsFrags) {
                this.markDuplicateFragments(nextChunk, containsPairs);
            }
            nextChunk.clear();
            nextChunk.add(next);
            firstOfNextChunk = next;
            containsPairs = next.isPaired();
            containsFrags = !next.isPaired();
        }
        this.markDuplicateFragments(nextChunk, containsPairs);
        this.fragSort.cleanup();
        this.fragSort = null;
        this.log.info("Sorting list of duplicate records.");
        this.duplicateIndexes.doneAddingStartIteration();
    }

    private boolean areComparableForDuplicates(ReadEnds lhs, ReadEnds rhs, boolean compareRead2) {
        boolean retval;
        boolean bl = retval = lhs.libraryId == rhs.libraryId && lhs.read1Sequence == rhs.read1Sequence && lhs.read1Coordinate == rhs.read1Coordinate && lhs.orientation == rhs.orientation;
        if (retval && compareRead2) {
            retval = lhs.read2Sequence == rhs.read2Sequence && lhs.read2Coordinate == rhs.read2Coordinate;
        }
        return retval;
    }

    private void addIndexAsDuplicate(long bamIndex) {
        this.duplicateIndexes.add(bamIndex);
        ++this.numDuplicateIndices;
    }

    private void markDuplicatePairs(List<ReadEnds> list) {
        short maxScore = 0;
        ReadEnds best = null;
        for (ReadEnds end : list) {
            if (end.score <= maxScore && best != null) continue;
            maxScore = end.score;
            best = end;
        }
        for (ReadEnds end : list) {
            if (end == best) continue;
            this.addIndexAsDuplicate(end.read1IndexInFile);
            this.addIndexAsDuplicate(end.read2IndexInFile);
        }
        this.trackOpticalDuplicates(list);
    }

    private void trackOpticalDuplicates(List<ReadEnds> list) {
        boolean[] opticalDuplicateFlags = this.findOpticalDuplicates(list, this.OPTICAL_DUPLICATE_PIXEL_DISTANCE);
        int opticalDuplicates = 0;
        for (boolean b : opticalDuplicateFlags) {
            if (!b) continue;
            ++opticalDuplicates;
        }
        if (opticalDuplicates > 0) {
            this.opticalDupesByLibraryId.increment(list.get((int)0).libraryId, opticalDuplicates);
        }
    }

    private void markDuplicateFragments(List<ReadEnds> list, boolean containsPairs) {
        if (containsPairs) {
            for (ReadEnds end : list) {
                if (end.isPaired()) continue;
                this.addIndexAsDuplicate(end.read1IndexInFile);
            }
        } else {
            short maxScore = 0;
            ReadEnds best = null;
            for (ReadEnds end : list) {
                if (end.score <= maxScore && best != null) continue;
                maxScore = end.score;
                best = end;
            }
            for (ReadEnds end : list) {
                if (end == best) continue;
                this.addIndexAsDuplicate(end.read1IndexInFile);
            }
        }
    }

    static class ReadEndsComparator
    implements Comparator<ReadEnds> {
        ReadEndsComparator() {
        }

        @Override
        public int compare(ReadEnds lhs, ReadEnds rhs) {
            int retval = lhs.libraryId - rhs.libraryId;
            if (retval == 0) {
                retval = lhs.read1Sequence - rhs.read1Sequence;
            }
            if (retval == 0) {
                retval = lhs.read1Coordinate - rhs.read1Coordinate;
            }
            if (retval == 0) {
                retval = lhs.orientation - rhs.orientation;
            }
            if (retval == 0) {
                retval = lhs.read2Sequence - rhs.read2Sequence;
            }
            if (retval == 0) {
                retval = lhs.read2Coordinate - rhs.read2Coordinate;
            }
            if (retval == 0) {
                retval = (int)(lhs.read1IndexInFile - rhs.read1IndexInFile);
            }
            if (retval == 0) {
                retval = (int)(lhs.read2IndexInFile - rhs.read2IndexInFile);
            }
            return retval;
        }
    }

    private static final class SamHeaderAndIterator {
        final SAMFileHeader header;
        final CloseableIterator<SAMRecord> iterator;

        private SamHeaderAndIterator(SAMFileHeader header, CloseableIterator<SAMRecord> iterator) {
            this.header = header;
            this.iterator = iterator;
        }
    }
}

