Mercurial > repos > yufei-luo > s_mart
view SMART/Java/Python/ComputeCoverage.py @ 38:2c0c0a89fad7
Uploaded
author | m-zytnicki |
---|---|
date | Thu, 02 May 2013 09:56:47 -0400 |
parents | 44d5973c188c |
children | 169d364ddd91 |
line wrap: on
line source
#! /usr/bin/env python # # Copyright INRA-URGI 2009-2011 # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. # import os, random from optparse import OptionParser, OptionGroup from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer from SMART.Java.Python.misc.Progress import Progress from commons.core.writer.Gff3Writer import Gff3Writer class CoverageComputer(object): def __init__(self, verbosity = 0): self.verbosity = verbosity self.queryReader = None self.referenceReader = None self.outputWriter = None self.introns = False self.nbNucleotides = 0 self.nbCovered = 0 def setInputQueryFile(self, fileName, format): self.queryReader = TranscriptContainer(fileName, format, self.verbosity-1) def setInputReferenceFile(self, fileName, format): self.referenceReader = TranscriptContainer(fileName, format, self.verbosity-1) def includeIntrons(self, boolean): self.introns = boolean def setOutputFileName(self, fileName, title="S-MART", feature="transcript", featurePart="exon"): self.outputWriter = Gff3Writer(fileName, self.verbosity-1) self.outputWriter.setTitle(title) self.outputWriter.setFeature(feature) self.outputWriter.setFeaturePart(featurePart) def readReference(self): self.coveredRegions = {} progress = Progress(self.referenceReader.getNbTranscripts(), "Reading reference file", self.verbosity-1) for transcript in self.referenceReader.getIterator(): chromosome = transcript.getChromosome() if chromosome not in self.coveredRegions: self.coveredRegions[chromosome] = {} if self.introns: transcript.removeExons() for exon in transcript.getExons(): for position in range(exon.getStart(), exon.getEnd()+1): self.coveredRegions[chromosome][position] = 1 progress.inc() progress.done() def readQuery(self): progress = Progress(self.queryReader.getNbTranscripts(), "Reading query file", self.verbosity-1) for transcript in self.queryReader.getIterator(): progress.inc() chromosome = transcript.getChromosome() if chromosome not in self.coveredRegions: continue if self.introns: transcript.removeExons() for exon in transcript.getExons(): for position in range(exon.getStart(), exon.getEnd()+1): self.nbNucleotides += 1 self.nbCovered += self.coveredRegions[chromosome].get(position, 0) progress.done() def write(self): progress = Progress(self.queryReader.getNbTranscripts(), "Writing output file", self.verbosity-1) for transcript in self.queryReader.getIterator(): chromosome = transcript.getChromosome() if self.introns: transcript.removeExons() size = transcript.getSize() coverage = 0 for exon in transcript.getExons(): for position in range(exon.getStart(), exon.getEnd()+1): coverage += self.coveredRegions[chromosome].get(position, 0) transcript.setTagValue("coverage", 0 if size == 0 else float(coverage) / size * 100) self.outputWriter.addTranscript(transcript) progress.inc() progress.done() def sumUp(self): print "%d nucleotides in query, %d (%.f%%) covered" % (self.nbNucleotides, self.nbCovered, 0 if self.nbNucleotides == 0 else float(self.nbCovered) / self.nbNucleotides * 100) def run(self): self.readReference() self.readQuery() if self.outputWriter != None: self.write() self.sumUp() if __name__ == "__main__": # parse command line description = "Compute Coverage v1.0.1: Compute the coverage of a set with respect to another set. [Category: Personal]" parser = OptionParser(description = description) parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input query file [compulsory] [format: file in transcript format given by -f]") parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of the first file [compulsory] [format: transcript file format]") parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input reference file [compulsory] [format: file in transcript format given by -f]") parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of the second file [compulsory] [format: transcript file format]") parser.add_option("-t", "--introns", dest="introns", action="store_true", default=False, help="also include introns [format: boolean] [default: false]") parser.add_option("-o", "--output", dest="outputFileName", action="store", default=None, type="string", help="output file [format: output file in GFF3 format]") parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [default: 1] [format: int]") (options, args) = parser.parse_args() computer = CoverageComputer(options.verbosity) computer.setInputQueryFile(options.inputFileName1, options.format1) computer.setInputReferenceFile(options.inputFileName2, options.format2) computer.includeIntrons(options.introns) computer.setOutputFileName(options.outputFileName) computer.run()