# HG changeset patch # User m-zytnicki # Date 1363252985 14400 # Node ID 1eb55963fe390177112e01e662ef1e10a7722ee5 # Parent 4dded8b1fbc4a306e6c0592a77bf67b15cbb25f5 Updated CompareOverlappingSmall*.py diff -r 4dded8b1fbc4 -r 1eb55963fe39 SMART/Java/Python/CompareOverlappingSmallQuery.py --- a/SMART/Java/Python/CompareOverlappingSmallQuery.py Fri Jan 18 09:01:47 2013 -0500 +++ b/SMART/Java/Python/CompareOverlappingSmallQuery.py Thu Mar 14 05:23:05 2013 -0400 @@ -72,6 +72,11 @@ self.invert = False self.antisense = False self.collinear = False + self.pcOverlapQuery = False + self.pcOverlapRef = False + self.minOverlap = False + self.included = False + self.including = False self.bins = {} self.overlaps = {} self.notOverlapping = False @@ -101,6 +106,17 @@ def setAntisense(self, boolean): self.antisense = boolean + def setMinPercentOverlap(self, pcOverlapQuery, pcOverlapRef): + self.pcOverlapQuery = pcOverlapQuery + self.pcOverlapRef = pcOverlapRef + + def setMinOverlap(self, minOverlap): + self.minOverlap = minOverlap + + def setInclude(self, included, including): + self.included = included + self.including = including + def includeNotOverlapping(self, boolean): self.notOverlapping = boolean @@ -129,6 +145,18 @@ return False if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection(): return False + if self.included and not refTranscript.include(queryTranscript): + return False + if self.including and not queryTranscript.include(refTranscript): + return False + querySize = queryTranscript.getSize() + if self.pcOverlapQuery and not queryTranscript.overlapWithExon(refTranscript, int(querySize * self.pcOverlapQuery / 100.0)): + return False + refSize = refTranscript.getSize() + if self.pcOverlapRef and not queryTranscript.overlapWithExon(refTranscript, int(refSize * self.pcOverlapRef / 100.0)): + return False + if self.minOverlap and not queryTranscript.overlapWithExon(refTranscript, self.minOverlap): + return False return True def _alterTranscript(self, transcript, type): @@ -184,9 +212,10 @@ self.writer.close() def displayResults(self): - print "# queries: %d" % (self.nbQueries) - print "# refs: %d" % (self.nbRefs) - print "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps) + if self.verbosity: + print "# queries: %d" % (self.nbQueries) + print "# refs: %d" % (self.nbRefs) + print "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps) def run(self): self.loadQuery() @@ -199,17 +228,22 @@ description = "Compare Overlapping Small Query v1.0.1: Provide the queries that overlap with a reference, when the query is small. [Category: Data Comparison]" parser = OptionParser(description = description) - parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") - parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") - parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]") - parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") - parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") + parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") + parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]") + parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]") - parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]") + parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]") parser.add_option("-c", "--collinear", dest="collinear", action="store_true", default=False, help="provide collinear features [format: bool] [default: false]") parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="provide antisense features [format: bool] [default: false]") + parser.add_option("-m", "--minOverlap", dest="minOverlap", action="store", default=False, type="int", help="min. #nt overlap [format: bool] [default: false]") + parser.add_option("-p", "--pcOverlapQuery", dest="pcOverlapQuery", action="store", default=False, type="int", help="min. % overlap of the query [format: bool] [default: false]") + parser.add_option("-P", "--pcOverlapRef", dest="pcOverlapRef", action="store", default=False, type="int", help="min. % overlap of the reference [format: bool] [default: false]") + parser.add_option("-k", "--included", dest="included", action="store_true", default=False, help="provide query elements which are nested in reference elements [format: bool] [default: false]") + parser.add_option("-K", "--including", dest="including", action="store_true", default=False, help="provide query elements in which reference elements are nested [format: bool] [default: false]") parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]") - parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") (options, args) = parser.parse_args() cosq = CompareOverlappingSmallQuery(options.verbosity) @@ -220,7 +254,8 @@ cosq.setDistance(options.distance) cosq.setCollinear(options.collinear) cosq.setAntisense(options.antisense) + cosq.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef) + cosq.setMinOverlap(options.minOverlap) + cosq.setInclude(options.included, options.including) cosq.setInvert(options.exclude) cosq.run() - -