comparison smart_toolShed/SMART/Java/Python/compareOverlapping.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e0f8dcca02ed
1 #! /usr/bin/env python
2 #
3 # Copyright INRA-URGI 2009-2010
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30 #
31 """Compare overlap of two transcript lists"""
32 import sys
33 import os
34 from optparse import OptionParser
35 from SMART.Java.Python.misc import Utils
36 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
37 from commons.core.writer.TranscriptWriter import TranscriptWriter
38 from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
39 from SMART.Java.Python.misc.RPlotter import RPlotter
40 from commons.core.writer.Gff3Writer import Gff3Writer
41
42 class CompareOverlapping(object):
43
44 def __init__(self):
45 self._options = None
46
47
48 def setAttributesFromCmdLine(self):
49 description = "Compare Overlapping v1.0.3: Get the data which overlap with a reference set. [Category: Data Comparison]"
50
51 parser = OptionParser(description = description)
52 parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
53 parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of file 1 [compulsory] [format: transcript file format]")
54 parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
55 parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of file 2 [compulsory] [format: transcript file format]")
56 parser.add_option("-o", "--output", dest="output", action="store", default=None, type="string", help="output file [compulsory] [format: output file in GFF3 format]")
57 parser.add_option("-S", "--start1", dest="start1", action="store", default=None, type="int", help="only consider the n first nucleotides of the transcripts in file 1 (do not use it with -U) [format: int]")
58 parser.add_option("-s", "--start2", dest="start2", action="store", default=None, type="int", help="only consider the n first nucleotides of the transcripts in file 2 (do not use it with -u) [format: int]")
59 parser.add_option("-U", "--end1", dest="end1", action="store", default=None, type="int", help="only consider the n last nucleotides of the transcripts in file 1 (do not use it with -S) [format: int]")
60 parser.add_option("-u", "--end2", dest="end2", action="store", default=None, type="int", help="only consider the n last nucleotides of the transcripts in file 2 (do not use it with -s) [format: int]")
61 parser.add_option("-t", "--intron", dest="introns", action="store_true", default=False, help="also report introns [format: bool] [default: false]")
62 parser.add_option("-E", "--5primeExtension1", dest="fivePrime1", action="store", default=None, type="int", help="extension towards 5' in file 1 [format: int]")
63 parser.add_option("-e", "--5primeExtension2", dest="fivePrime2", action="store", default=None, type="int", help="extension towards 5' in file 2 [format: int]")
64 parser.add_option("-N", "--3primeExtension1", dest="threePrime1", action="store", default=None, type="int", help="extension towards 3' in file 1 [format: int]")
65 parser.add_option("-n", "--3primeExtension2", dest="threePrime2", action="store", default=None, type="int", help="extension towards 3' in file 2 [format: int]")
66 parser.add_option("-c", "--colinear", dest="colinear", action="store_true", default=False, help="colinear only [format: bool] [default: false]")
67 parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="antisense only [format: bool] [default: false]")
68 parser.add_option("-d", "--distance", dest="distance", action="store", default=None, type="int", help="accept some distance between query and reference [format: int]")
69 parser.add_option("-k", "--included", dest="included", action="store_true", default=False, help="keep only elements from file 1 which are included in an element of file 2 [format: bool] [default: false]")
70 parser.add_option("-K", "--including", dest="including", action="store_true", default=False, help="keep only elements from file 2 which are included in an element of file 1 [format: bool] [default: false]")
71 parser.add_option("-m", "--minOverlap", dest="minOverlap", action="store", default=1, type="int", help="minimum number of nucleotides overlapping to declare an overlap [format: int] [default: 1]")
72 parser.add_option("-p", "--pcOverlap", dest="pcOverlap", action="store", default=None, type="int", help="minimum percentage of nucleotides to overlap to declare an overlap [format: int]")
73 parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]")
74 parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]")
75 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
76 parser.add_option("-l", "--log", dest="log", action="store_true", default=False, help="write a log file [format: bool] [default: false]")
77 (self._options, args) = parser.parse_args()
78
79
80 def run(self):
81 logHandle = None
82 if self._options.log:
83 logHandle = open(self._options.output, "w")
84
85 transcriptContainer1 = TranscriptContainer(self._options.inputFileName1, self._options.format1, self._options.verbosity)
86 transcriptContainer2 = TranscriptContainer(self._options.inputFileName2, self._options.format2, self._options.verbosity)
87 writer = TranscriptWriter(self._options.output, "gff3", self._options.verbosity)
88
89 transcriptListComparator = TranscriptListsComparator(logHandle, self._options.verbosity)
90 transcriptListComparator.restrictToStart(transcriptListComparator.QUERY, self._options.start1)
91 transcriptListComparator.restrictToStart(transcriptListComparator.REFERENCE, self._options.start2)
92 transcriptListComparator.restrictToEnd(transcriptListComparator.QUERY, self._options.end1)
93 transcriptListComparator.restrictToEnd(transcriptListComparator.REFERENCE, self._options.end2)
94 transcriptListComparator.extendFivePrime(transcriptListComparator.QUERY, self._options.fivePrime1)
95 transcriptListComparator.extendFivePrime(transcriptListComparator.REFERENCE, self._options.fivePrime2)
96 transcriptListComparator.extendThreePrime(transcriptListComparator.QUERY, self._options.threePrime1)
97 transcriptListComparator.extendThreePrime(transcriptListComparator.REFERENCE, self._options.threePrime2)
98 transcriptListComparator.acceptIntrons(transcriptListComparator.QUERY, self._options.introns)
99 transcriptListComparator.acceptIntrons(transcriptListComparator.REFERENCE, self._options.introns)
100 transcriptListComparator.getAntisenseOnly(self._options.antisense)
101 transcriptListComparator.getColinearOnly(self._options.colinear)
102 transcriptListComparator.getInvert(self._options.exclude)
103 transcriptListComparator.setMaxDistance(self._options.distance)
104 transcriptListComparator.setMinOverlap(self._options.minOverlap)
105 transcriptListComparator.setPcOverlap(self._options.pcOverlap)
106 transcriptListComparator.setIncludedOnly(self._options.included)
107 transcriptListComparator.setIncludingOnly(self._options.including)
108 transcriptListComparator.includeNotOverlapping(self._options.notOverlapping)
109 transcriptListComparator.computeOdds(True)
110 transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, transcriptContainer1)
111 transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.REFERENCE, transcriptContainer2)
112 transcriptListComparator.setOutputWriter(writer)
113 transcriptListComparator.compareTranscriptList()
114
115 if self._options.log:
116 logHandle.close()
117
118 if not self._options.exclude:
119 odds = transcriptListComparator.getOdds()
120 if self._options.verbosity > 0 and odds:
121 print "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(odds)
122
123 if __name__ == "__main__":
124 icompareOverlapping = CompareOverlapping()
125 icompareOverlapping.setAttributesFromCmdLine()
126 icompareOverlapping.run()