comparison SMART/Java/Python/getDistance.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 #! /usr/bin/env python
2 #
3 # Copyright INRA-URGI 2009-2010
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30 #
31 """Get the distance between the transcripts of two lists"""
32
33 import os
34 import sys
35 from optparse import OptionParser
36 from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
37 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
38 from SMART.Java.Python.misc.RPlotter import RPlotter
39 from commons.core.writer.Gff3Writer import Gff3Writer
40
41 class GetDistance(object):
42
43 def __init__(self, verbosity = 0):
44 self.verbosity = verbosity
45 self.writer = None
46 self.spearman = False
47 self.tlc = TranscriptListsComparator(None, self.verbosity)
48 self.strands = (0, )
49 self.buckets = None
50 self.title = ""
51 self.xMin = None
52 self.xMax = None
53 self.proportion = False
54 self.outputFileName = None
55 self.keep = False
56
57 def __del__(self):
58 pass
59
60 def setQueryFile(self, fileName, format):
61 self.transcriptContainer1 = TranscriptContainer(fileName, format, self.verbosity)
62
63 def setReferenceFile(self, fileName, format):
64 self.transcriptContainer2 = TranscriptContainer(fileName, format, self.verbosity)
65
66 def setOutputFile(self, fileName):
67 self.outputFileName = fileName
68
69 def setOutputTranscriptFile(self, fileName):
70 if fileName != None:
71 self.writer = Gff3Writer(fileName, self.verbosity)
72
73 def restrictQueryToStart(self, number):
74 self.tlc.restrictToStart(self.tlc.QUERY, number)
75
76 def restrictReferenceToStart(self, number):
77 self.tlc.restrictToStart(self.tlc.REFERENCE, number)
78
79 def restrictQueryToEnd(self, number):
80 self.tlc.restrictToEnd(self.tlc.QUERY, number)
81
82 def restrictReferenceToEnd(self, number):
83 self.tlc.restrictToEnd(self.tlc.REFERENCE, number)
84
85 def setAbsolute(self, boolean):
86 self.tlc.setAbsolute(boolean)
87
88 def setProportion(self, boolean):
89 self.proportion = boolean
90
91 def setColinear(self, boolean):
92 self.tlc.getColinearOnly(boolean)
93
94 def setAntisense(self, boolean):
95 self.tlc.getAntisenseOnly(boolean)
96
97 def setDistances(self, minDistance, maxDistance):
98 self.tlc.setMinDistance(minDistance)
99 self.tlc.setMaxDistance(maxDistance)
100
101 def setStrands(self, boolean):
102 self.tlc.setStrandedDistance(boolean)
103 if boolean:
104 self.strands = (-1, 1)
105
106 def setUpstream(self, number):
107 self.tlc.setUpstream(self.tlc.REFERENCE, number)
108
109 def setDownstream(self, number):
110 self.tlc.setDownstream(self.tlc.REFERENCE, number)
111
112 def setBuckets(self, number):
113 self.buckets = number
114
115 def setTitle(self, title):
116 self.title = title
117
118 def setXValues(self, xMin, xMax):
119 self.xMin, self.xMax = xMin, xMax
120
121 def keepTmpValues(self, boolean):
122 self.keep = boolean
123
124 def getSpearman(self, boolean):
125 self.spearman = True
126
127 def compare(self):
128 self.tlc.setInputTranscriptContainer(self.tlc.QUERY, self.transcriptContainer1)
129 self.tlc.setInputTranscriptContainer(self.tlc.REFERENCE, self.transcriptContainer2)
130 self.tlc.setOutputWriter(self.writer)
131 self.distances = self.tlc.compareTranscriptListDistance()
132
133 def checkEmptyDistances(self):
134 return (sum([len(self.distances[strand].keys()) for strand in self.strands]) == 0)
135
136 def setPlotterMinusStrand(self):
137 if -1 in self.strands:
138 for x, y in self.distances[-1].iteritems():
139 self.distances[-1][x] = -y
140
141 def setPlotterProportion(self):
142 if not self.proportion:
143 return
144 self.nbElements = sum([abs(sum(self.distances[strand].values())) for strand in self.strands])
145 for strand in self.strands:
146 self.distances[strand] = dict([(distance, float(nb) / self.nbElements * 100) for distance, nb in self.distances[strand].iteritems()])
147
148 def setPlotter(self):
149 self.plotter = RPlotter(self.outputFileName, self.verbosity, self.keep)
150 if self.buckets != None:
151 self.plotter.setBarplot(True)
152 self.plotter.setFill(0)
153 self.plotter.setXLabel("distance")
154 self.plotter.setYLabel("# elements")
155 if self.proportion:
156 self.plotter.setYLabel("%% elements (%d in toto)" % (self.nbElements))
157 self.plotter.setBuckets(self.buckets)
158 self.plotter.setMinimumX(self.xMin)
159 self.plotter.setMaximumX(self.xMax)
160 self.plotter.setTitle(self.title)
161
162 def plot(self):
163 if len(self.strands) == 1:
164 self.distances = {0: self.distances}
165 if self.checkEmptyDistances():
166 print "No output."
167 sys.exit()
168 self.setPlotterMinusStrand()
169 self.setPlotterProportion()
170 if self.outputFileName == None:
171 return
172 self.setPlotter()
173 for strand in self.strands:
174 self.plotter.addLine(self.distances[strand])
175 self.plotter.plot()
176
177 def printSpearman(self):
178 if self.spearman:
179 print "Spearman's rho: %.5f" % (self.plotter.getSpearmanRho())
180
181 def run(self):
182 self.compare()
183 self.plot()
184 self.printSpearman()
185
186 if __name__ == "__main__":
187
188 # parse command line
189 description = "Get Distance v1.0.3: Compute the distance of a set of transcript with respect to a reference set. [Category: Visualization]"
190
191 parser = OptionParser(description = description)
192 parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
193 parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of file 1 [compulsory] [format: transcript file format]")
194 parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
195 parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of file 2 [compulsory] [format: transcript file format]")
196 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="plot output file [format: output file in PNG format]")
197 parser.add_option("-O", "--outputDistances", dest="outputDistances", action="store", default=None, type="string", help="output file containing the distance for each element of the query [format: output file in GFF3 format] [default: None]")
198 parser.add_option("-c", "--colinear", dest="colinear", action="store_true", default=False, help="only consider features on the same strand [format: bool] [default: false]")
199 parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="only consider features on the opposite strand [format: bool] [default: false]")
200 parser.add_option("-b", "--absolute", dest="absolute", action="store_true", default=False, help="give the absolute value of the distance [format: bool] [default: false]")
201 parser.add_option("-p", "--proportion", dest="proportion", action="store_true", default=False, help="give the proportion on the y-axis instead of the number of distances [format: bool] [default: false]")
202 parser.add_option("-s", "--start1", dest="start1", action="store", default=None, type="int", help="only consider the n first 5' nucleotides for list 1 [format: int]")
203 parser.add_option("-S", "--start2", dest="start2", action="store", default=None, type="int", help="only consider the n first 5' nucleotides for list 2 [format: int]")
204 parser.add_option("-e", "--end1", dest="end1", action="store", default=None, type="int", help="only consider the n last 3' nucleotides for list 1 [format: int]")
205 parser.add_option("-E", "--end2", dest="end2", action="store", default=None, type="int", help="only consider the n last 3' nucleotides for list 2 [format: int]")
206 parser.add_option("-m", "--minDistance", dest="minDistance", action="store", default=None, type="int", help="minimum distance considered between two transcripts [format: int] [default: None]")
207 parser.add_option("-M", "--maxDistance", dest="maxDistance", action="store", default=1000, type="int", help="maximum distance considered between two transcripts [format: int] [default: 1000]")
208 parser.add_option("-5", "--fivePrime", dest="fivePrime", action="store_true", default=False, help="consider the elements from list 1 which are upstream of elements of list 2 [format: bool] [default: False]")
209 parser.add_option("-3", "--threePrime", dest="threePrime", action="store_true", default=False, help="consider the elements from list 1 which are downstream of elements of list 2 [format: bool] [default: False]")
210 parser.add_option("-u", "--buckets", dest="buckets", action="store", default=None, type="int", help="plot histogram instead of line plot with given interval size [format: int] [default: None]")
211 parser.add_option("-2", "--2strands", dest="twoStrands", action="store_true", default=False, help="plot the distributions of each strand separately [format: bool] [default: False]")
212 parser.add_option("-r", "--spearman", dest="spearman", action="store_true", default=False, help="compute Spearman rho [format: bool] [default: False]")
213 parser.add_option("-x", "--xMin", dest="xMin", action="store", default=None, type="int", help="minimum value on the x-axis to plot [format: int] [default: None]")
214 parser.add_option("-X", "--xMax", dest="xMax", action="store", default=None, type="int", help="maximum value on the x-axis to plot [format: int] [default: None]")
215 parser.add_option("-t", "--title", dest="title", action="store", default=None, type="string", help="title for the graph [format: int] [default: None]")
216 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
217 parser.add_option("-k", "--keep", dest="keep", action="store_true", default=False, help="keep temporary files [format: bool]")
218 (options, args) = parser.parse_args()
219
220 gd = GetDistance(options.verbosity)
221 gd.setQueryFile(options.inputFileName1, options.format1)
222 gd.setReferenceFile(options.inputFileName2, options.format2)
223 gd.setOutputFile(options.outputFileName)
224 gd.setOutputTranscriptFile(options.outputDistances)
225 gd.setColinear(options.colinear)
226 gd.setAntisense(options.antisense)
227 gd.setAbsolute(options.absolute)
228 gd.setProportion(options.proportion)
229 gd.restrictQueryToStart(options.start1)
230 gd.restrictReferenceToStart(options.start2)
231 gd.restrictQueryToEnd(options.end1)
232 gd.restrictReferenceToEnd(options.end2)
233 gd.setDistances(options.minDistance, options.maxDistance)
234 gd.setUpstream(options.fivePrime)
235 gd.setDownstream(options.threePrime)
236 gd.setStrands(options.twoStrands)
237 gd.setBuckets(options.buckets)
238 gd.setTitle(options.title)
239 gd.setXValues(options.xMin, options.xMax)
240 gd.keepTmpValues(options.keep)
241 gd.run()