Mercurial > repos > yufei-luo > s_mart
comparison SMART/Java/Python/getSizes.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | 769e306b7933 |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
42 from commons.core.utils.RepetOptionParser import RepetOptionParser | 42 from commons.core.utils.RepetOptionParser import RepetOptionParser |
43 | 43 |
44 LOG_DEPTH = "smart" | 44 LOG_DEPTH = "smart" |
45 | 45 |
46 class GetSizes(object): | 46 class GetSizes(object): |
47 | 47 |
48 def __init__(self, inFileName = None, inFormat=None, outFileName = None, query=None,xMax=None, xMin=None, csv=False, verbosity = 0): | 48 def __init__(self, inFileName = None, inFormat=None, outFileName = None, query=None,xMax=None, xMin=None, verbosity = 0): |
49 self.inFileName = inFileName | 49 self.inFileName = inFileName |
50 self.inFormat= inFormat | 50 self.inFormat= inFormat |
51 self.outFileName = outFileName | 51 self.outFileName = outFileName |
52 self.query = query | 52 self.query = query |
53 self.xMax = xMax | 53 self.xMax = xMax |
54 self.xMin = xMin | 54 self.xMin = xMin |
55 self.xLab = "Size" | 55 self.xLab = "Size" |
56 self.yLab = "# reads" | 56 self.yLab = "# reads" |
57 self.barplot = False | 57 self.barplot = False |
58 self.csv = csv | 58 self._verbosity = verbosity |
59 self._verbosity = verbosity | 59 self.parser = None |
60 self.parser = None | 60 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) |
61 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) | 61 |
62 | 62 def setAttributesFromCmdLine(self): |
63 def setAttributesFromCmdLine(self): | 63 description = "Usage: getSizes.py [options]\n\nGet Sizes v1.0.2: Get the sizes of a set of genomic coordinates. [Category: Visualization]\n" |
64 description = "Usage: getSizes.py [options]\n\nGet Sizes v1.0.2: Get the sizes of a set of genomic coordinates. [Category: Visualization]\n" | 64 epilog = "" |
65 epilog = "" | 65 parser = RepetOptionParser(description = description, epilog = epilog) |
66 parser = RepetOptionParser(description = description, epilog = epilog) | 66 parser.add_option("-i", "--input", dest="inputFileName", action="store", default=None, type="string", help="input file [compulsory] [format: file in transcript or sequence format given by -f]") |
67 parser.add_option("-i", "--input", dest="inputFileName", action="store", default=None, type="string", help="input file [compulsory] [format: file in transcript or sequence format given by -f]") | 67 parser.add_option("-f", "--format", dest="format", action="store", default=None, type="string", help="format of the input [compulsory] [format: transcript or sequence file format]") |
68 parser.add_option("-f", "--format", dest="format", action="store", default=None, type="string", help="format of the input [compulsory] [format: transcript or sequence file format]") | 68 parser.add_option("-q", "--query", dest="query", action="store", default=None, type="string", help="type to mesure [default: size] [format: choice (size, intron size, exon size, 1st exon size)]") |
69 parser.add_option("-q", "--query", dest="query", action="store", default=None, type="string", help="type to mesure [default: size] [format: choice (size, intron size, exon size, 1st exon size)]") | 69 parser.add_option("-o", "--output", dest="outputFileName", action="store", default=None, type="string", help="output file [format: output file in PNG format]") |
70 parser.add_option("-o", "--output", dest="outputFileName", action="store", default=None, type="string", help="output file [format: output file in PNG format]") | 70 parser.add_option("-x", "--xMax", dest="xMax", action="store", default=None, type="int", help="maximum value on the x-axis to plot [format: int]") |
71 parser.add_option("-x", "--xMax", dest="xMax", action="store", default=None, type="int", help="maximum value on the x-axis to plot [format: int]") | 71 parser.add_option("-X", "--xMin", dest="xMin", action="store", default=None, type="int", help="minimum value on the x-axis to plot [format: int]") |
72 parser.add_option("-X", "--xMin", dest="xMin", action="store", default=None, type="int", help="minimum value on the x-axis to plot [format: int]") | 72 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") |
73 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") | 73 parser.add_option("-a", "--xLabel", dest="xLab", action="store", default="Size", type="string", help="x absis label name [format: string] [default: Size]") |
74 parser.add_option("-c", "--csv", dest="csv", action="store", type="string", help="write a .csv file [format: bool] [default: false]") | 74 parser.add_option("-b", "--yLabel", dest="yLab", action="store", default="# reads", type="string", help="y absis label name [format: string] [default: Reads]") |
75 parser.add_option("-a", "--xLabel", dest="xLab", action="store", default="Size", type="string", help="x absis label name [format: string] [default: Size]") | 75 parser.add_option("-B", "--barplot", dest="barplot", action="store_true", default=False, help="use barplot representation [format: bool] [default: false]") |
76 parser.add_option("-b", "--yLabel", dest="yLab", action="store", default="# reads", type="string", help="y absis label name [format: string] [default: Reads]") | 76 options = parser.parse_args()[0] |
77 parser.add_option("-B", "--barplot", dest="barplot", action="store_true", default=False, help="use barplot representation [format: bool] [default: false]") | 77 self._setAttributesFromOptions(options) |
78 options = parser.parse_args()[0] | 78 |
79 self._setAttributesFromOptions(options) | 79 def _setAttributesFromOptions(self, options): |
80 | 80 self.setInFileName(options.inputFileName) |
81 def _setAttributesFromOptions(self, options): | 81 self.setInFormat(options.format) |
82 self.setInFileName(options.inputFileName) | 82 self.setQuery(options.query) |
83 self.setInFormat(options.format) | 83 self.setOutFileName(options.outputFileName) |
84 self.setQuery(options.query) | 84 self.setXMax(options.xMax) |
85 self.setOutFileName(options.outputFileName) | 85 self.setXMin(options.xMin) |
86 self.setXMax(options.xMax) | 86 self.setxLab(options.xLab) |
87 self.setXMin(options.xMin) | 87 self.setyLab(options.yLab) |
88 self.setxLab(options.xLab) | 88 self.setBarplot(options.barplot) |
89 self.setyLab(options.yLab) | 89 self.setVerbosity(options.verbosity) |
90 self.setBarplot(options.barplot) | 90 |
91 self.setVerbosity(options.verbosity) | 91 def setInFileName(self, inputFileName): |
92 | 92 self.inFileName = inputFileName |
93 def setInFileName(self, inputFileName): | 93 |
94 self.inFileName = inputFileName | 94 def setInFormat(self, inFormat): |
95 | 95 self.inFormat = inFormat |
96 def setInFormat(self, inFormat): | 96 |
97 self.inFormat = inFormat | 97 def setQuery(self, query): |
98 | 98 self.query = query |
99 def setQuery(self, query): | 99 |
100 self.query = query | 100 def setOutFileName(self, outFileName): |
101 | 101 self.outFileName = outFileName |
102 def setOutFileName(self, outFileName): | 102 |
103 self.outFileName = outFileName | 103 def setXMax(self, xMax): |
104 | 104 self.xMax = xMax |
105 def setXMax(self, xMax): | 105 |
106 self.xMax = xMax | 106 def setXMin(self, xMin): |
107 | 107 self.xMin = xMin |
108 def setXMin(self, xMin): | 108 |
109 self.xMin = xMin | 109 def setxLab(self, xLab): |
110 | 110 self.xLab = xLab |
111 def setxLab(self, xLab): | 111 |
112 self.xLab = xLab | 112 def setyLab(self, yLab): |
113 | 113 self.yLab = yLab |
114 def setyLab(self, yLab): | 114 |
115 self.yLab = yLab | 115 def setBarplot(self, barplot): |
116 | 116 self.barplot = barplot |
117 def setBarplot(self, barplot): | 117 |
118 self.barplot = barplot | 118 def setVerbosity(self, verbosity): |
119 | 119 self._verbosity = verbosity |
120 def setCsv(self, csv): | 120 |
121 self.csv = csv | 121 def _checkOptions(self): |
122 | 122 if self.inFileName == None: |
123 def setVerbosity(self, verbosity): | 123 self._logAndRaise("ERROR: Missing input file name") |
124 self._verbosity = verbosity | 124 if self.inFormat == "fasta": |
125 | 125 self.parser = FastaParser(self.inFileName, self._verbosity) |
126 def _checkOptions(self): | 126 elif self.inFormat == "fastq": |
127 if self.inFileName == None: | 127 self.parser = FastqParser(self.inFileName, self._verbosity) |
128 self._logAndRaise("ERROR: Missing input file name") | 128 else: |
129 if self.inFormat == "fasta": | 129 self.parser = TranscriptContainer(self.inFileName, self.inFormat, self._verbosity) |
130 self.parser = FastaParser(self.inFileName, self._verbosity) | 130 |
131 elif self.inFormat == "fastq": | 131 def _logAndRaise(self, errorMsg): |
132 self.parser = FastqParser(self.inFileName, self._verbosity) | 132 self._log.error(errorMsg) |
133 else: | 133 raise Exception(errorMsg) |
134 self.parser = TranscriptContainer(self.inFileName, self.inFormat, self._verbosity) | 134 |
135 | 135 def run(self): |
136 def _logAndRaise(self, errorMsg): | 136 LoggerFactory.setLevel(self._log, self._verbosity) |
137 self._log.error(errorMsg) | 137 self._checkOptions() |
138 raise Exception(errorMsg) | 138 self._log.info("START getsizes") |
139 | 139 self._log.debug("Input file name: %s" % self.inFileName) |
140 def run(self): | 140 |
141 LoggerFactory.setLevel(self._log, self._verbosity) | 141 nbItems = self.parser.getNbItems() |
142 self._checkOptions() | 142 self._log.info( "%i items found" % (nbItems)) |
143 self._log.info("START getsizes") | 143 |
144 self._log.debug("Input file name: %s" % self.inFileName) | 144 # treat items |
145 | 145 progress = Progress(nbItems, "Analyzing sequences of %s" % (self.inFileName), self._verbosity) |
146 nbItems = self.parser.getNbItems() | 146 sizes = {} |
147 self._log.info( "%i items found" % (nbItems)) | 147 minimum = 1000000000000 |
148 | 148 maximum = 0 |
149 # treat items | 149 sum = 0 |
150 progress = Progress(nbItems, "Analyzing sequences of %s" % (self.inFileName), self._verbosity) | 150 number = 0 |
151 sizes = {} | 151 nbSubItems = 0 |
152 names = {} | 152 for item in self.parser.getIterator(): |
153 minimum = 1000000000000 | 153 items = [] |
154 maximum = 0 | 154 if self.query == "exon": |
155 sum = 0 | 155 items = item.getExons() |
156 number = 0 | 156 elif self.query == "exon1": |
157 nbSubItems = 0 | 157 if len(item.getExons()) > 1: |
158 for item in self.parser.getIterator(): | 158 item.sortExons() |
159 items = [] | 159 items = [item.getExons()[0]] |
160 if self.query == "exon": | 160 elif self.query == "intron": |
161 items = item.getExons() | 161 items = item.getIntrons() |
162 elif self.query == "exon1": | 162 else: |
163 if len(item.getExons()) > 1: | 163 items = [item, ] |
164 item.sortExons() | 164 |
165 items = [item.getExons()[0]] | 165 for thisItem in items: |
166 elif self.query == "intron": | 166 try: |
167 items = item.getIntrons() | 167 nbElements = int(float(thisItem.getTagValue("nbElements"))) |
168 else: | 168 if nbElements == None: |
169 items = [item, ] | 169 nbElements = 1 |
170 | 170 except: |
171 for thisItem in items: | 171 nbElements = 1 |
172 try: | 172 size = thisItem.getSize() |
173 nbElements = int(float(thisItem.getTagValue("nbElements"))) | 173 minimum = min(minimum, size) |
174 if nbElements == None: | 174 maximum = max(maximum, size) |
175 nbElements = 1 | 175 |
176 except: | 176 if size not in sizes: |
177 nbElements = 1 | 177 sizes[size] = nbElements |
178 size = thisItem.getSize() | 178 else: |
179 minimum = min(minimum, size) | 179 sizes[size] += nbElements |
180 maximum = max(maximum, size) | 180 sum += size |
181 name = thisItem.name.split()[0] | 181 nbSubItems += nbElements |
182 | 182 number += 1 |
183 if size not in sizes: | 183 progress.inc() |
184 sizes[size] = nbElements | 184 progress.done() |
185 if self.csv: | 185 |
186 names[size] = [name, ] | 186 if self.outFileName != None: |
187 else: | 187 plotter = RPlotter(self.outFileName, self._verbosity) |
188 sizes[size] += nbElements | 188 plotter.setFill(0) |
189 if self.csv: | 189 plotter.setMinimumX(self.xMin) |
190 names[size].append(name) | 190 plotter.setMaximumX(self.xMax) |
191 sum += size | 191 plotter.setXLabel(self.xLab) |
192 nbSubItems += nbElements | 192 plotter.setYLabel(self.yLab) |
193 number += 1 | 193 plotter.setBarplot(self.barplot) |
194 progress.inc() | 194 plotter.addLine(sizes) |
195 progress.done() | 195 plotter.plot() |
196 | 196 |
197 if self.outFileName != None: | 197 if nbSubItems == 0: |
198 plotter = RPlotter(self.outFileName, self._verbosity) | 198 self._logAndRaise("No item found") |
199 plotter.setFill(0) | 199 |
200 plotter.setMinimumX(self.xMin) | 200 self.items = number |
201 plotter.setMaximumX(self.xMax) | 201 self.subItems = nbSubItems |
202 plotter.setXLabel(self.xLab) | 202 self.nucleotides = sum |
203 plotter.setYLabel(self.yLab) | 203 self.minAvgMedMax = Utils.getMinAvgMedMax(sizes) |
204 plotter.setBarplot(self.barplot) | 204 |
205 plotter.addLine(sizes) | 205 print "%d items" % (number) |
206 plotter.plot() | 206 print "%d sub-items" % (nbSubItems) |
207 | 207 print "%d nucleotides" % (sum) |
208 if nbSubItems == 0: | 208 print "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(sizes) |
209 self._logAndRaise("No item found") | 209 |
210 | 210 self._log.info("END getsizes") |
211 if self.csv: | |
212 csvHandle = open(self.csv, "w") | |
213 for size in range(min(sizes.keys()), max(sizes.keys())+1): | |
214 if size not in sizes: | |
215 csvHandle.write("%d,0,\n" % (size)) | |
216 else: | |
217 csvHandle.write("%d,%d,%s\n" % (size, sizes[size], ";".join(names[size]))) | |
218 csvHandle.close() | |
219 | |
220 self.items = number | |
221 self.subItems = nbSubItems | |
222 self.nucleotides = sum | |
223 self.minAvgMedMax = Utils.getMinAvgMedMax(sizes) | |
224 | |
225 print "%d items" % (number) | |
226 print "%d sub-items" % (nbSubItems) | |
227 print "%d nucleotides" % (sum) | |
228 print "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(sizes) | |
229 | |
230 self._log.info("END getsizes") | |
231 | 211 |
232 | 212 |
233 if __name__ == "__main__": | 213 if __name__ == "__main__": |
234 iGetSizes = GetSizes() | 214 iGetSizes = GetSizes() |
235 iGetSizes.setAttributesFromCmdLine() | 215 iGetSizes.setAttributesFromCmdLine() |
236 iGetSizes.run() | 216 iGetSizes.run() |
237 | 217 |
238 #TODO: add two more options!!!!!! | 218 #TODO: add two more options!!!!!! |