6
|
1 #! /usr/bin/env python
|
|
2 #
|
|
3 # Copyright INRA-URGI 2009-2010
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30 #
|
|
31 """
|
|
32 Read a mapping file (many formats supported) and select some of them
|
|
33 Mappings should be sorted by read names
|
|
34 """
|
|
35 import os, random, shelve
|
|
36 from optparse import OptionParser, OptionGroup
|
|
37 from commons.core.parsing.ParserChooser import ParserChooser
|
|
38 from commons.core.parsing.FastaParser import FastaParser
|
|
39 from commons.core.parsing.FastqParser import FastqParser
|
|
40 from commons.core.parsing.GffParser import GffParser
|
|
41 from commons.core.writer.BedWriter import BedWriter
|
|
42 from commons.core.writer.UcscWriter import UcscWriter
|
|
43 from commons.core.writer.GbWriter import GbWriter
|
|
44 from commons.core.writer.Gff2Writer import Gff2Writer
|
|
45 from commons.core.writer.Gff3Writer import Gff3Writer
|
|
46 from commons.core.writer.FastaWriter import FastaWriter
|
|
47 from commons.core.writer.FastqWriter import FastqWriter
|
|
48 from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
|
|
49 from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection
|
|
50 from SMART.Java.Python.mySql.MySqlTable import MySqlTable
|
|
51 from SMART.Java.Python.misc.RPlotter import RPlotter
|
|
52 from SMART.Java.Python.misc.Progress import Progress
|
|
53 from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
|
|
54
|
|
55
|
|
56 distanceExons = 20
|
|
57 exonSize = 20
|
|
58
|
|
59
|
|
60 class MapperAnalyzer(object):
|
|
61 """
|
|
62 Analyse the output of a parser
|
|
63 """
|
|
64
|
|
65 def __init__(self, verbosity = 0):
|
|
66 self.verbosity = verbosity
|
|
67 self.mySqlConnection = MySqlConnection(verbosity)
|
|
68 self.tooShort = 0
|
|
69 self.tooManyMismatches = 0
|
|
70 self.tooManyGaps = 0
|
|
71 self.tooShortExons = 0
|
|
72 self.tooManyMappings = 0
|
|
73 self.nbMappings = 0
|
|
74 self.nbSequences = 0
|
|
75 self.nbAlreadyMapped = 0
|
|
76 self.nbAlreadyMappedSequences = 0
|
|
77 self.nbWrittenMappings = 0
|
|
78 self.nbWrittenSequences = 0
|
|
79 self.parser = None
|
|
80 self.logHandle = None
|
|
81 self.randomNumber = random.randint(0, 100000)
|
|
82 self.gff3Writer = None
|
|
83 self.alreadyMappedReader = None
|
|
84 self.unmatchedWriter = None
|
|
85 self.sequenceListParser = None
|
|
86 self.sequences = None
|
|
87 self.alreadyMapped = None
|
|
88 self.mappedNamesTable = None
|
|
89 self.minSize = None
|
|
90 self.minId = None
|
|
91 self.maxMismatches = None
|
|
92 self.maxGaps = None
|
|
93 self.maxMappings = None
|
|
94 self.merge = False
|
|
95 self.checkExons = False
|
|
96 self.suffix = None
|
|
97 self.tmpDirectory = "%s%s" % (os.environ["SMARTMPPATH"], os.sep) if "SMARTMPPATH" in os.environ else ""
|
|
98
|
|
99
|
|
100 def __del__(self):
|
|
101 if self.sequences != None:
|
|
102 self.sequences.close()
|
|
103 if self.alreadyMapped != None:
|
|
104 self.alreadyMapped.close()
|
|
105 if self.mappedNamesTable != None:
|
|
106 self.mappedNamesTable.remove()
|
|
107 if self.gff3Writer != None:
|
|
108 self.gff3Writer.close()
|
|
109
|
|
110 if self.logHandle != None:
|
|
111 self.logHandle.close()
|
|
112
|
|
113
|
|
114 def setMappingFile(self, fileName, format):
|
|
115 parserChooser = ParserChooser(self.verbosity)
|
|
116 parserChooser.findFormat(format, "mapping")
|
|
117 self.parser = parserChooser.getParser(fileName)
|
|
118
|
|
119
|
|
120 def setSequenceFile(self, fileName, format):
|
|
121 if format == "fasta":
|
|
122 self.sequenceListParser = FastaParser(fileName, self.verbosity)
|
|
123 elif format == "fastq":
|
|
124 self.sequenceListParser = FastqParser(fileName, self.verbosity)
|
|
125 else:
|
|
126 raise Exception("Do not understand sequence format %s" % (format))
|
|
127
|
|
128
|
|
129 def setOutputFile(self, fileName, title):
|
|
130 self.gff3Writer = Gff3Writer(fileName, self.verbosity)
|
|
131 self.gff3Writer.setTitle(title)
|
|
132
|
|
133
|
|
134 def setAlreadyMatched(self, fileName):
|
|
135 self.alreadyMappedReader = GffParser(fileName, self.verbosity)
|
|
136
|
|
137
|
|
138 def setRemainingFile(self, fileName, format):
|
|
139 if format == "fasta":
|
|
140 self.unmatchedWriter = FastaWriter("%s_unmatched.fasta" % (fileName), self.verbosity)
|
|
141 elif format == "fastq":
|
|
142 self.unmatchedWriter = FastqWriter("%s_unmatched.fastq" % (fileName), self.verbosity)
|
|
143 else:
|
|
144 raise Exception("Do not understand %s format." % (format))
|
|
145 self.mappedNamesTable = MySqlTable(self.mySqlConnection, "mappedNames_%d" % (self.randomNumber), self.verbosity)
|
|
146 self.mappedNamesTable.create(["name"], {"name": "char"}, {"name": 50})
|
|
147 self.mappedNamesTable.createIndex("iNameMapped", ["name", ], True)
|
|
148
|
|
149
|
|
150 def setLog(self, fileName):
|
|
151 self.logHandle = open(fileName, "w")
|
|
152
|
|
153
|
|
154 def setMinSize(self, size):
|
|
155 self.minSize = size
|
|
156
|
|
157
|
|
158 def setMinId(self, id):
|
|
159 self.minId = id
|
|
160
|
|
161
|
|
162 def setMaxMismatches(self, mismatches):
|
|
163 self.maxMismatches = mismatches
|
|
164
|
|
165
|
|
166 def setMaxGaps(self, gaps):
|
|
167 self.maxGaps = gaps
|
|
168
|
|
169
|
|
170 def setMaxMappings(self, mappings):
|
|
171 self.maxMappings = mappings
|
|
172
|
|
173
|
|
174 def mergeExons(self, b):
|
|
175 self.merge = b
|
|
176
|
|
177
|
|
178 def acceptShortExons(self, b):
|
|
179 self.checkExons = not b
|
|
180
|
|
181
|
|
182 def countMappings(self):
|
|
183 self.nbMappings = self.parser.getNbMappings()
|
|
184 if self.verbosity > 0:
|
|
185 print "%i matches found" % (self.nbMappings)
|
|
186
|
|
187
|
|
188 def storeAlreadyMapped(self):
|
|
189 self.alreadyMapped = shelve.open("%stmpAlreadyMapped_%d" % (self.tmpDirectory, self.randomNumber))
|
|
190 progress = Progress(self.alreadyMappedReader.getNbTranscripts(), "Reading already mapped reads", self.verbosity)
|
|
191 self.nbAlreadyMappedSequences = 0
|
|
192 for transcript in self.alreadyMappedReader.getIterator():
|
|
193 if not self.alreadyMapped.has_key(transcript.getName()):
|
|
194 self.alreadyMapped[transcript.getName()] = 1
|
|
195 self.nbAlreadyMappedSequences += 1
|
|
196 progress.inc()
|
|
197 progress.done()
|
|
198 self.nbAlreadyMapped = self.alreadyMappedReader.getNbTranscripts()
|
|
199
|
|
200
|
|
201 def storeSequences(self):
|
|
202 self.sequences = shelve.open("%stmpSequences_%d" % (self.tmpDirectory, self.randomNumber))
|
|
203 progress = Progress(self.sequenceListParser.getNbSequences(), "Reading sequences", self.verbosity)
|
|
204 for sequence in self.sequenceListParser.getIterator():
|
|
205 self.sequences[sequence.getName().split(" ")[0]] = len(sequence.getSequence())
|
|
206 self.nbSequences += 1
|
|
207 progress.inc()
|
|
208 progress.done()
|
|
209 if self.verbosity > 0:
|
|
210 print "%i sequences read" % (self.nbSequences)
|
|
211
|
|
212
|
|
213 def checkOrder(self):
|
|
214 names = shelve.open("%stmpNames_%d" % (self.tmpDirectory, self.randomNumber))
|
|
215 previousName = None
|
|
216 progress = Progress(self.nbMappings, "Checking mapping file", self.verbosity)
|
|
217 for mapping in self.parser.getIterator():
|
|
218 name = mapping.queryInterval.getName()
|
|
219 if name != previousName and previousName != None:
|
|
220 if names.has_key(previousName):
|
|
221 raise Exception("Error! Input mapping file is not ordered! (Name '%s' occurs at least twice)" % (previousName))
|
|
222 names[previousName] = 1
|
|
223 previousName = name
|
|
224 progress.inc()
|
|
225 progress.done()
|
|
226 names.close()
|
|
227
|
|
228
|
|
229 def checkPreviouslyMapped(self, name):
|
|
230 if self.alreadyMappedReader == None:
|
|
231 return False
|
|
232 return self.alreadyMapped.has_key(name)
|
|
233
|
|
234
|
|
235 def findOriginalSize(self, name):
|
|
236 alternate = "%s/1" % (name)
|
|
237 if (self.suffix == None) or (not self.suffix):
|
|
238 if self.sequences.has_key(name):
|
|
239 self.suffix = False
|
|
240 return self.sequences[name]
|
|
241 if self.suffix == None:
|
|
242 self.suffix = True
|
|
243 else:
|
|
244 raise Exception("Cannot find name %n" % (name))
|
|
245 if (self.suffix):
|
|
246 if self.sequences.has_key(alternate):
|
|
247 return self.sequences[alternate]
|
|
248 raise Exception("Cannot find name %s" % (name))
|
|
249
|
|
250
|
|
251 def checkErrors(self, mapping):
|
|
252 accepted = True
|
|
253 # short size
|
|
254 if self.minSize != None and mapping.size * 100 < self.minSize * mapping.queryInterval.size:
|
|
255 self.tooShort += 1
|
|
256 accepted = False
|
|
257 if self.logHandle != None:
|
|
258 self.logHandle.write("size of mapping %s is too short (%i instead of %i)\n" % (str(mapping), mapping.queryInterval.size, mapping.size))
|
|
259 # low identity
|
|
260 if self.minId != None and mapping.getTagValue("identity") < self.minId:
|
|
261 self.tooManyMismatches += 1
|
|
262 accepted = False
|
|
263 if self.logHandle != None:
|
|
264 self.logHandle.write("mapping %s has a low identity rate\n" % (str(mapping)))
|
|
265 # too many mismatches
|
|
266 if self.maxMismatches != None and mapping.getTagValue("nbMismatches") > self.maxMismatches:
|
|
267 self.tooManyMismatches += 1
|
|
268 accepted = False
|
|
269 if self.logHandle != None:
|
|
270 self.logHandle.write("mapping %s has more mismatches than %i\n" % (str(mapping), self.maxMismatches))
|
|
271 # too many gaps
|
|
272 if self.maxGaps != None and mapping.getTagValue("nbGaps") > self.maxGaps:
|
|
273 self.tooManyGaps += 1
|
|
274 accepted = False
|
|
275 if self.logHandle != None:
|
|
276 self.logHandle.write("mapping %s has more gaps than %i\n" % (str(mapping), self.maxGaps))
|
|
277 # short exons
|
|
278 if self.checkExons and len(mapping.subMappings) > 1 and min([subMapping.targetInterval.getSize() for subMapping in mapping.subMappings]) < exonSize:
|
|
279 self.tooShortExons += 1
|
|
280 accepted = False
|
|
281 if self.logHandle != None:
|
|
282 self.logHandle.write("sequence %s maps as too short exons\n" % (mapping))
|
|
283 return accepted
|
|
284
|
|
285
|
|
286 def checkNbMappings(self, mappings):
|
|
287 nbOccurrences = 0
|
|
288 for mapping in mappings:
|
|
289 nbOccurrences += 1 if "nbOccurrences" not in mapping.getTagNames() else mapping.getTagValue("nbOccurrences")
|
|
290 if (self.maxMappings != None and nbOccurrences > self.maxMappings):
|
|
291 self.tooManyMappings += 1
|
|
292 if self.logHandle != None:
|
|
293 self.logHandle.write("sequence %s maps %i times\n" % (mappings[0].queryInterval.getName(), nbOccurrences))
|
|
294 return False
|
|
295 return (nbOccurrences > 0)
|
|
296
|
|
297
|
|
298 def sortMappings(self, mappings):
|
|
299 nbOccurrences = 0
|
|
300 for mapping in mappings:
|
|
301 nbOccurrences += 1 if "nbOccurrences" not in mapping.getTagNames() else mapping.getTagValue("nbOccurrences")
|
|
302
|
|
303 orderedMappings = sorted(mappings, key = lambda mapping: mapping.getErrorScore())
|
|
304 cpt = 1
|
|
305 rank = 1
|
|
306 previousMapping = None
|
|
307 previousScore = None
|
|
308 wasLastTie = False
|
|
309 rankedMappings = []
|
|
310 bestRegion = "%s:%d-%d" % (orderedMappings[0].targetInterval.getChromosome(), orderedMappings[0].targetInterval.getStart(), orderedMappings[0].targetInterval.getEnd())
|
|
311 for mapping in orderedMappings:
|
|
312 mapping.setNbOccurrences(nbOccurrences)
|
|
313 mapping.setOccurrence(cpt)
|
|
314
|
|
315 score = mapping.getErrorScore()
|
|
316 if previousScore != None and previousScore == score:
|
|
317 if "Rank" in previousMapping.getTagNames():
|
|
318 if not wasLastTie:
|
|
319 previousMapping.setRank("%sTie" % (rank))
|
|
320 mapping.setRank("%sTie" % (rank))
|
|
321 wasLastTie = True
|
|
322 else:
|
|
323 rank = cpt
|
|
324 mapping.setRank(rank)
|
|
325 wasLastTie = False
|
|
326 if cpt != 1:
|
|
327 mapping.setBestRegion(bestRegion)
|
|
328
|
|
329 rankedMappings.append(mapping)
|
|
330 previousMapping = mapping
|
|
331 previousScore = score
|
|
332 cpt += 1
|
|
333 return rankedMappings
|
|
334
|
|
335
|
|
336 def processMappings(self, mappings):
|
|
337 if not mappings:
|
|
338 return
|
|
339 selectedMappings = []
|
|
340 name = mappings[0].queryInterval.getName()
|
|
341 size = self.findOriginalSize(name)
|
|
342 for mapping in mappings:
|
|
343 if self.merge:
|
|
344 mapping.mergeExons(distanceExons)
|
|
345 mapping.queryInterval.size = size
|
|
346 if self.checkErrors(mapping):
|
|
347 selectedMappings.append(mapping)
|
|
348
|
|
349 if self.checkNbMappings(selectedMappings):
|
|
350 if self.unmatchedWriter != None:
|
|
351 query = self.mySqlConnection.executeQuery("INSERT INTO %s (name) VALUES ('%s')" % (self.mappedNamesTable.name, name if not self.suffix else "%s/1" % (name)))
|
|
352 self.nbWrittenSequences += 1
|
|
353 mappings = self.sortMappings(selectedMappings)
|
|
354 for mapping in mappings:
|
|
355 self.nbWrittenMappings += 1
|
|
356 self.gff3Writer.addTranscript(mapping.getTranscript())
|
|
357
|
|
358
|
|
359 def readMappings(self):
|
|
360 previousQueryName = None
|
|
361 mappings = []
|
|
362 self.parser.reset()
|
|
363 progress = Progress(self.nbMappings, "Reading mappings", self.verbosity)
|
|
364 for mapping in self.parser.getIterator():
|
|
365 queryName = mapping.queryInterval.getName().split(" ")[0]
|
|
366 if self.checkPreviouslyMapped(queryName):
|
|
367 if self.logHandle != None:
|
|
368 self.logHandle.write("Mapping %s has already been mapped.\n" % (queryName))
|
|
369 else:
|
|
370 if previousQueryName == queryName:
|
|
371 mappings.append(mapping)
|
|
372 else:
|
|
373 if previousQueryName != None:
|
|
374 self.processMappings(mappings)
|
|
375 previousQueryName = queryName
|
|
376 mappings = [mapping, ]
|
|
377 progress.inc()
|
|
378 self.processMappings(mappings)
|
|
379 self.gff3Writer.write()
|
|
380 self.gff3Writer.close()
|
|
381 progress.done()
|
|
382
|
|
383
|
|
384 def writeUnmatched(self):
|
|
385 progress = Progress(self.nbSequences, "Reading unmatched sequences", self.verbosity)
|
|
386 for sequence in self.sequenceListParser.getIterator():
|
|
387 name = sequence.getName().split(" ")[0]
|
|
388 query = self.mySqlConnection.executeQuery("SELECT * FROM %s WHERE name = '%s' LIMIT 1" % (self.mappedNamesTable.name, name))
|
|
389 if query.isEmpty():
|
|
390 self.unmatchedWriter.addSequence(sequence)
|
|
391 progress.inc()
|
|
392 progress.done()
|
|
393
|
|
394
|
|
395 def analyze(self):
|
|
396 self.countMappings()
|
|
397 self.checkOrder()
|
|
398 self.storeSequences()
|
|
399 if self.alreadyMappedReader != None:
|
|
400 self.storeAlreadyMapped()
|
|
401 self.readMappings()
|
|
402 if self.unmatchedWriter != None:
|
|
403 self.writeUnmatched()
|
|
404
|
|
405
|
|
406
|
|
407
|
|
408 if __name__ == "__main__":
|
|
409
|
|
410 # parse command line
|
|
411 description = "Mapper Analyzer v1.0.1: Read the output of an aligner, print statistics and possibly translate into BED or GBrowse formats. [Category: Conversion]"
|
|
412
|
|
413 parser = OptionParser(description = description)
|
|
414 compGroup = OptionGroup(parser, "Compulsory options")
|
|
415 filtGroup = OptionGroup(parser, "Filtering options")
|
|
416 tranGroup = OptionGroup(parser, "Transformation options")
|
|
417 outpGroup = OptionGroup(parser, "Output options")
|
|
418 otheGroup = OptionGroup(parser, "Other options")
|
|
419 compGroup.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file (output of the tool) [compulsory] [format: file in mapping format given by -f]")
|
|
420 compGroup.add_option("-f", "--format", dest="format", action="store", default="seqmap", type="string", help="format of the file [compulsory] [format: mapping file format]")
|
|
421 compGroup.add_option("-q", "--sequences", dest="sequencesFileName", action="store", type="string", help="file of the sequences [compulsory] [format: file in sequence format given by -k]")
|
|
422 compGroup.add_option("-k", "--seqFormat", dest="sequenceFormat", action="store", default="fasta", type="string", help="format of the sequences: fasta or fastq [default: fasta] [format: sequence file format]")
|
|
423 compGroup.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in GFF3 format]")
|
|
424 filtGroup.add_option("-n", "--number", dest="number", action="store", default=None, type="int", help="max. number of occurrences of a sequence [format: int]")
|
|
425 filtGroup.add_option("-s", "--size", dest="size", action="store", default=None, type="int", help="minimum pourcentage of size [format: int]")
|
|
426 filtGroup.add_option("-d", "--identity", dest="identity", action="store", default=None, type="int", help="minimum pourcentage of identity [format: int]")
|
|
427 filtGroup.add_option("-m", "--mismatch", dest="mismatch", action="store", default=None, type="int", help="maximum number of mismatches [format: int]")
|
|
428 filtGroup.add_option("-p", "--gap", dest="gap", action="store", default=None, type="int", help="maximum number of gaps [format: int]")
|
|
429 tranGroup.add_option("-e", "--mergeExons", dest="mergeExons", action="store_true", default=False, help="merge exons when introns are short [format: bool] [default: false]")
|
|
430 tranGroup.add_option("-x", "--removeExons", dest="removeExons", action="store_true", default=False, help="remove transcripts when exons are short [format: bool] [default: false]")
|
|
431 outpGroup.add_option("-t", "--title", dest="title", action="store", default="SMART", type="string", help="title of the UCSC track [format: string] [default: SMART]")
|
|
432 outpGroup.add_option("-r", "--remaining", dest="remaining", action="store_true", default=False, help="print the unmatched sequences [format: bool] [default: false]")
|
|
433 otheGroup.add_option("-a", "--append", dest="appendFileName", action="store", default=None, type="string", help="append to GFF3 file [format: file in GFF3 format]")
|
|
434 otheGroup.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [default: 1] [format: int]")
|
|
435 otheGroup.add_option("-l", "--log", dest="log", action="store_true", default=False, help="write a log file [format: bool] [default: false]")
|
|
436 parser.add_option_group(compGroup)
|
|
437 parser.add_option_group(filtGroup)
|
|
438 parser.add_option_group(tranGroup)
|
|
439 parser.add_option_group(outpGroup)
|
|
440 parser.add_option_group(otheGroup)
|
|
441 (options, args) = parser.parse_args()
|
|
442
|
|
443
|
|
444 analyzer = MapperAnalyzer(options.verbosity)
|
|
445 analyzer.setMappingFile(options.inputFileName, options.format)
|
|
446 analyzer.setSequenceFile(options.sequencesFileName, options.sequenceFormat)
|
|
447 analyzer.setOutputFile(options.outputFileName, options.title)
|
|
448 if options.appendFileName != None:
|
|
449 analyzer.setAlreadyMatched(options.appendFileName)
|
|
450 if options.remaining:
|
|
451 analyzer.setRemainingFile(options.outputFileName, options.sequenceFormat)
|
|
452 if options.number != None:
|
|
453 analyzer.setMaxMappings(options.number)
|
|
454 if options.size != None:
|
|
455 analyzer.setMinSize(options.size)
|
|
456 if options.identity != None:
|
|
457 analyzer.setMinId(options.identity)
|
|
458 if options.mismatch != None:
|
|
459 analyzer.setMaxMismatches(options.mismatch)
|
|
460 if options.gap != None:
|
|
461 analyzer.setMaxGaps(options.gap)
|
|
462 if options.mergeExons:
|
|
463 analyzer.mergeExons(True)
|
|
464 if options.removeExons:
|
|
465 analyzer.acceptShortExons(False)
|
|
466 if options.log:
|
|
467 analyzer.setLog("%s.log" % (options.outputFileName))
|
|
468 analyzer.analyze()
|
|
469
|
|
470 if options.verbosity > 0:
|
|
471 print "kept %i sequences over %s (%f%%)" % (analyzer.nbWrittenSequences, analyzer.nbSequences, float(analyzer.nbWrittenSequences) / analyzer.nbSequences * 100)
|
|
472 if options.appendFileName != None:
|
|
473 print "kept %i sequences over %s (%f%%) including already mapped sequences" % (analyzer.nbWrittenSequences + analyzer.nbAlreadyMappedSequences, analyzer.nbSequences, float(analyzer.nbWrittenSequences + analyzer.nbAlreadyMappedSequences) / analyzer.nbSequences * 100)
|
|
474 print "kept %i mappings over %i (%f%%)" % (analyzer.nbWrittenMappings, analyzer.nbMappings, float(analyzer.nbWrittenMappings) / analyzer.nbMappings * 100)
|
|
475 if options.appendFileName != None:
|
|
476 print "kept %i mappings over %i (%f%%) including already mapped" % (analyzer.nbWrittenMappings + analyzer.nbAlreadyMapped, analyzer.nbMappings, float(analyzer.nbWrittenMappings + analyzer.nbAlreadyMapped) / analyzer.nbMappings * 100)
|
|
477 print "removed %i too short mappings (%f%%)" % (analyzer.tooShort, float(analyzer.tooShort) / analyzer.nbMappings * 100)
|
|
478 print "removed %i mappings with too many mismatches (%f%%)" % (analyzer.tooManyMismatches, float(analyzer.tooManyMismatches) / analyzer.nbMappings * 100)
|
|
479 print "removed %i mappings with too many gaps (%f%%)" % (analyzer.tooManyGaps, float(analyzer.tooManyGaps) / analyzer.nbMappings * 100)
|
|
480 print "removed %i mappings with too short exons (%f%%)" % (analyzer.tooShortExons, float(analyzer.tooShortExons) / analyzer.nbMappings * 100)
|
|
481 print "removed %i sequences with too many hits (%f%%)" % (analyzer.tooManyMappings, float(analyzer.tooManyMappings) / analyzer.nbSequences * 100)
|
|
482 print "%i sequences have no mapping (%f%%)" % (analyzer.nbSequences - analyzer.nbWrittenSequences, float(analyzer.nbSequences - analyzer.nbWrittenSequences) / analyzer.nbSequences * 100)
|
|
483 if options.appendFileName != None:
|
|
484 print "%i sequences have no mapping (%f%%) excluding already mapped sequences" % (analyzer.nbSequences - analyzer.nbWrittenSequences - analyzer.nbAlreadyMappedSequences, float(analyzer.nbSequences - analyzer.nbWrittenSequences - analyzer.nbAlreadyMappedSequences) / analyzer.nbSequences * 100)
|
|
485
|
|
486
|