annotate SMART/Java/Python/mapperAnalyzer.py @ 36:44d5973c188c

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 15:02:29 -0400
parents 769e306b7933
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 #! /usr/bin/env python
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 # Copyright INRA-URGI 2009-2010
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 # "http://www.cecill.info".
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 # liability.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 # same conditions as regards security.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 Read a mapping file (many formats supported) and select some of them
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 Mappings should be sorted by read names
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 import os, random, shelve
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 from optparse import OptionParser, OptionGroup
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 from commons.core.parsing.ParserChooser import ParserChooser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38 from commons.core.parsing.FastaParser import FastaParser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 from commons.core.parsing.FastqParser import FastqParser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40 from commons.core.parsing.GffParser import GffParser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41 from commons.core.writer.BedWriter import BedWriter
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42 from commons.core.writer.UcscWriter import UcscWriter
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 from commons.core.writer.GbWriter import GbWriter
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44 from commons.core.writer.Gff2Writer import Gff2Writer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 from commons.core.writer.Gff3Writer import Gff3Writer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46 from commons.core.writer.FastaWriter import FastaWriter
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 from commons.core.writer.FastqWriter import FastqWriter
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 from SMART.Java.Python.mySql.MySqlTable import MySqlTable
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 from SMART.Java.Python.misc.RPlotter import RPlotter
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 from SMART.Java.Python.misc.Progress import Progress
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 distanceExons = 20
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 exonSize = 20
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 class MapperAnalyzer(object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62 Analyse the output of a parser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65 def __init__(self, verbosity = 0):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66 self.verbosity = verbosity
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67 self.mySqlConnection = MySqlConnection(verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68 self.tooShort = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69 self.tooManyMismatches = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70 self.tooManyGaps = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71 self.tooShortExons = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72 self.tooManyMappings = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73 self.nbMappings = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74 self.nbSequences = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75 self.nbAlreadyMapped = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76 self.nbAlreadyMappedSequences = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77 self.nbWrittenMappings = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78 self.nbWrittenSequences = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79 self.parser = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80 self.logHandle = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81 self.randomNumber = random.randint(0, 100000)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82 self.gff3Writer = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83 self.alreadyMappedReader = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84 self.unmatchedWriter = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 self.sequenceListParser = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
86 self.sequences = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
87 self.alreadyMapped = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88 self.mappedNamesTable = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
89 self.minSize = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
90 self.minId = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
91 self.maxMismatches = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
92 self.maxGaps = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
93 self.maxMappings = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
94 self.merge = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
95 self.checkExons = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
96 self.suffix = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
97 self.tmpDirectory = "%s%s" % (os.environ["SMARTMPPATH"], os.sep) if "SMARTMPPATH" in os.environ else ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
98
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
99
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
100 def __del__(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
101 if self.sequences != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
102 self.sequences.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
103 if self.alreadyMapped != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
104 self.alreadyMapped.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
105 if self.mappedNamesTable != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
106 self.mappedNamesTable.remove()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
107 if self.gff3Writer != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
108 self.gff3Writer.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
109
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
110 if self.logHandle != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
111 self.logHandle.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
112
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
113
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
114 def setMappingFile(self, fileName, format):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
115 parserChooser = ParserChooser(self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
116 parserChooser.findFormat(format, "mapping")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
117 self.parser = parserChooser.getParser(fileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
118
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
119
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
120 def setSequenceFile(self, fileName, format):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
121 if format == "fasta":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
122 self.sequenceListParser = FastaParser(fileName, self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
123 elif format == "fastq":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
124 self.sequenceListParser = FastqParser(fileName, self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
125 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
126 raise Exception("Do not understand sequence format %s" % (format))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
127
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
128
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
129 def setOutputFile(self, fileName, title):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
130 self.gff3Writer = Gff3Writer(fileName, self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
131 self.gff3Writer.setTitle(title)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
132
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
133
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
134 def setAlreadyMatched(self, fileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
135 self.alreadyMappedReader = GffParser(fileName, self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
136
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
137
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
138 def setRemainingFile(self, fileName, format):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
139 if format == "fasta":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
140 self.unmatchedWriter = FastaWriter("%s_unmatched.fasta" % (fileName), self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
141 elif format == "fastq":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
142 self.unmatchedWriter = FastqWriter("%s_unmatched.fastq" % (fileName), self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
143 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
144 raise Exception("Do not understand %s format." % (format))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
145 self.mappedNamesTable = MySqlTable(self.mySqlConnection, "mappedNames_%d" % (self.randomNumber), self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
146 self.mappedNamesTable.create(["name"], {"name": "char"}, {"name": 50})
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
147 self.mappedNamesTable.createIndex("iNameMapped", ["name", ], True)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
148
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
149
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
150 def setLog(self, fileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
151 self.logHandle = open(fileName, "w")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
152
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
153
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
154 def setMinSize(self, size):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
155 self.minSize = size
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
156
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
157
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
158 def setMinId(self, id):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
159 self.minId = id
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
160
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
161
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
162 def setMaxMismatches(self, mismatches):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
163 self.maxMismatches = mismatches
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
164
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
165
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
166 def setMaxGaps(self, gaps):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
167 self.maxGaps = gaps
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
168
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
169
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
170 def setMaxMappings(self, mappings):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
171 self.maxMappings = mappings
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
172
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
173
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
174 def mergeExons(self, b):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
175 self.merge = b
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
176
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
177
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
178 def acceptShortExons(self, b):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
179 self.checkExons = not b
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
180
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
181
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
182 def countMappings(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
183 self.nbMappings = self.parser.getNbMappings()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
184 if self.verbosity > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
185 print "%i matches found" % (self.nbMappings)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
186
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
187
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
188 def storeAlreadyMapped(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
189 self.alreadyMapped = shelve.open("%stmpAlreadyMapped_%d" % (self.tmpDirectory, self.randomNumber))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
190 progress = Progress(self.alreadyMappedReader.getNbTranscripts(), "Reading already mapped reads", self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
191 self.nbAlreadyMappedSequences = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
192 for transcript in self.alreadyMappedReader.getIterator():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
193 if not self.alreadyMapped.has_key(transcript.getName()):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
194 self.alreadyMapped[transcript.getName()] = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
195 self.nbAlreadyMappedSequences += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
196 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
197 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
198 self.nbAlreadyMapped = self.alreadyMappedReader.getNbTranscripts()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
199
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
200
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
201 def storeSequences(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
202 self.sequences = shelve.open("%stmpSequences_%d" % (self.tmpDirectory, self.randomNumber))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
203 progress = Progress(self.sequenceListParser.getNbSequences(), "Reading sequences", self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
204 for sequence in self.sequenceListParser.getIterator():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
205 self.sequences[sequence.getName().split(" ")[0]] = len(sequence.getSequence())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
206 self.nbSequences += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
207 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
208 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
209 if self.verbosity > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
210 print "%i sequences read" % (self.nbSequences)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
211
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
212
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
213 def checkOrder(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
214 names = shelve.open("%stmpNames_%d" % (self.tmpDirectory, self.randomNumber))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
215 previousName = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
216 progress = Progress(self.nbMappings, "Checking mapping file", self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
217 for mapping in self.parser.getIterator():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
218 name = mapping.queryInterval.getName()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
219 if name != previousName and previousName != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
220 if names.has_key(previousName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
221 raise Exception("Error! Input mapping file is not ordered! (Name '%s' occurs at least twice)" % (previousName))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
222 names[previousName] = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
223 previousName = name
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
224 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
225 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
226 names.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
227
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
228
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
229 def checkPreviouslyMapped(self, name):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
230 if self.alreadyMappedReader == None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
231 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
232 return self.alreadyMapped.has_key(name)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
233
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
234
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
235 def findOriginalSize(self, name):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
236 alternate = "%s/1" % (name)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
237 if (self.suffix == None) or (not self.suffix):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
238 if self.sequences.has_key(name):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
239 self.suffix = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
240 return self.sequences[name]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
241 if self.suffix == None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
242 self.suffix = True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
243 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
244 raise Exception("Cannot find name %n" % (name))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
245 if (self.suffix):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
246 if self.sequences.has_key(alternate):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
247 return self.sequences[alternate]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
248 raise Exception("Cannot find name %s" % (name))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
249
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
250
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
251 def checkErrors(self, mapping):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
252 accepted = True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
253 # short size
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
254 if self.minSize != None and mapping.size * 100 < self.minSize * mapping.queryInterval.size:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
255 self.tooShort += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
256 accepted = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
257 if self.logHandle != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
258 self.logHandle.write("size of mapping %s is too short (%i instead of %i)\n" % (str(mapping), mapping.queryInterval.size, mapping.size))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
259 # low identity
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
260 if self.minId != None and mapping.getTagValue("identity") < self.minId:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
261 self.tooManyMismatches += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
262 accepted = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
263 if self.logHandle != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
264 self.logHandle.write("mapping %s has a low identity rate\n" % (str(mapping)))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
265 # too many mismatches
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
266 if self.maxMismatches != None and mapping.getTagValue("nbMismatches") > self.maxMismatches:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
267 self.tooManyMismatches += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
268 accepted = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
269 if self.logHandle != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
270 self.logHandle.write("mapping %s has more mismatches than %i\n" % (str(mapping), self.maxMismatches))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
271 # too many gaps
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
272 if self.maxGaps != None and mapping.getTagValue("nbGaps") > self.maxGaps:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
273 self.tooManyGaps += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
274 accepted = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
275 if self.logHandle != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
276 self.logHandle.write("mapping %s has more gaps than %i\n" % (str(mapping), self.maxGaps))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
277 # short exons
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
278 if self.checkExons and len(mapping.subMappings) > 1 and min([subMapping.targetInterval.getSize() for subMapping in mapping.subMappings]) < exonSize:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
279 self.tooShortExons += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
280 accepted = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
281 if self.logHandle != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
282 self.logHandle.write("sequence %s maps as too short exons\n" % (mapping))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
283 return accepted
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
284
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
285
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
286 def checkNbMappings(self, mappings):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
287 nbOccurrences = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
288 for mapping in mappings:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
289 nbOccurrences += 1 if "nbOccurrences" not in mapping.getTagNames() else mapping.getTagValue("nbOccurrences")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
290 if (self.maxMappings != None and nbOccurrences > self.maxMappings):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
291 self.tooManyMappings += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
292 if self.logHandle != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
293 self.logHandle.write("sequence %s maps %i times\n" % (mappings[0].queryInterval.getName(), nbOccurrences))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
294 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
295 return (nbOccurrences > 0)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
296
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
297
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
298 def sortMappings(self, mappings):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
299 nbOccurrences = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
300 for mapping in mappings:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
301 nbOccurrences += 1 if "nbOccurrences" not in mapping.getTagNames() else mapping.getTagValue("nbOccurrences")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
302
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
303 orderedMappings = sorted(mappings, key = lambda mapping: mapping.getErrorScore())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
304 cpt = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
305 rank = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
306 previousMapping = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
307 previousScore = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
308 wasLastTie = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
309 rankedMappings = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
310 bestRegion = "%s:%d-%d" % (orderedMappings[0].targetInterval.getChromosome(), orderedMappings[0].targetInterval.getStart(), orderedMappings[0].targetInterval.getEnd())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
311 for mapping in orderedMappings:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
312 mapping.setNbOccurrences(nbOccurrences)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
313 mapping.setOccurrence(cpt)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
314
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
315 score = mapping.getErrorScore()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
316 if previousScore != None and previousScore == score:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
317 if "Rank" in previousMapping.getTagNames():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
318 if not wasLastTie:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
319 previousMapping.setRank("%sTie" % (rank))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
320 mapping.setRank("%sTie" % (rank))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
321 wasLastTie = True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
322 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
323 rank = cpt
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
324 mapping.setRank(rank)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
325 wasLastTie = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
326 if cpt != 1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
327 mapping.setBestRegion(bestRegion)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
328
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
329 rankedMappings.append(mapping)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
330 previousMapping = mapping
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
331 previousScore = score
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
332 cpt += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
333 return rankedMappings
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
334
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
335
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
336 def processMappings(self, mappings):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
337 if not mappings:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
338 return
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
339 selectedMappings = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
340 name = mappings[0].queryInterval.getName()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
341 size = self.findOriginalSize(name)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
342 for mapping in mappings:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
343 if self.merge:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
344 mapping.mergeExons(distanceExons)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
345 mapping.queryInterval.size = size
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
346 if self.checkErrors(mapping):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
347 selectedMappings.append(mapping)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
348
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
349 if self.checkNbMappings(selectedMappings):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
350 if self.unmatchedWriter != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
351 query = self.mySqlConnection.executeQuery("INSERT INTO %s (name) VALUES ('%s')" % (self.mappedNamesTable.name, name if not self.suffix else "%s/1" % (name)))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
352 self.nbWrittenSequences += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
353 mappings = self.sortMappings(selectedMappings)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
354 for mapping in mappings:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
355 self.nbWrittenMappings += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
356 self.gff3Writer.addTranscript(mapping.getTranscript())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
357
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
358
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
359 def readMappings(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
360 previousQueryName = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
361 mappings = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
362 self.parser.reset()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
363 progress = Progress(self.nbMappings, "Reading mappings", self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
364 for mapping in self.parser.getIterator():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
365 queryName = mapping.queryInterval.getName().split(" ")[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
366 if self.checkPreviouslyMapped(queryName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
367 if self.logHandle != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
368 self.logHandle.write("Mapping %s has already been mapped.\n" % (queryName))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
369 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
370 if previousQueryName == queryName:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
371 mappings.append(mapping)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
372 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
373 if previousQueryName != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
374 self.processMappings(mappings)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
375 previousQueryName = queryName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
376 mappings = [mapping, ]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
377 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
378 self.processMappings(mappings)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
379 self.gff3Writer.write()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
380 self.gff3Writer.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
381 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
382
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
383
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
384 def writeUnmatched(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
385 progress = Progress(self.nbSequences, "Reading unmatched sequences", self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
386 for sequence in self.sequenceListParser.getIterator():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
387 name = sequence.getName().split(" ")[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
388 query = self.mySqlConnection.executeQuery("SELECT * FROM %s WHERE name = '%s' LIMIT 1" % (self.mappedNamesTable.name, name))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
389 if query.isEmpty():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
390 self.unmatchedWriter.addSequence(sequence)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
391 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
392 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
393
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
394
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
395 def analyze(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
396 self.countMappings()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
397 self.checkOrder()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
398 self.storeSequences()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
399 if self.alreadyMappedReader != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
400 self.storeAlreadyMapped()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
401 self.readMappings()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
402 if self.unmatchedWriter != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
403 self.writeUnmatched()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
404
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
405
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
406
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
407
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
408 if __name__ == "__main__":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
409
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
410 # parse command line
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
411 description = "Mapper Analyzer v1.0.1: Read the output of an aligner, print statistics and possibly translate into BED or GBrowse formats. [Category: Conversion]"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
412
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
413 parser = OptionParser(description = description)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
414 compGroup = OptionGroup(parser, "Compulsory options")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
415 filtGroup = OptionGroup(parser, "Filtering options")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
416 tranGroup = OptionGroup(parser, "Transformation options")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
417 outpGroup = OptionGroup(parser, "Output options")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
418 otheGroup = OptionGroup(parser, "Other options")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
419 compGroup.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file (output of the tool) [compulsory] [format: file in mapping format given by -f]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
420 compGroup.add_option("-f", "--format", dest="format", action="store", default="seqmap", type="string", help="format of the file [compulsory] [format: mapping file format]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
421 compGroup.add_option("-q", "--sequences", dest="sequencesFileName", action="store", type="string", help="file of the sequences [compulsory] [format: file in sequence format given by -k]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
422 compGroup.add_option("-k", "--seqFormat", dest="sequenceFormat", action="store", default="fasta", type="string", help="format of the sequences: fasta or fastq [default: fasta] [format: sequence file format]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
423 compGroup.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in GFF3 format]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
424 filtGroup.add_option("-n", "--number", dest="number", action="store", default=None, type="int", help="max. number of occurrences of a sequence [format: int]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
425 filtGroup.add_option("-s", "--size", dest="size", action="store", default=None, type="int", help="minimum pourcentage of size [format: int]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
426 filtGroup.add_option("-d", "--identity", dest="identity", action="store", default=None, type="int", help="minimum pourcentage of identity [format: int]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
427 filtGroup.add_option("-m", "--mismatch", dest="mismatch", action="store", default=None, type="int", help="maximum number of mismatches [format: int]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
428 filtGroup.add_option("-p", "--gap", dest="gap", action="store", default=None, type="int", help="maximum number of gaps [format: int]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
429 tranGroup.add_option("-e", "--mergeExons", dest="mergeExons", action="store_true", default=False, help="merge exons when introns are short [format: bool] [default: false]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
430 tranGroup.add_option("-x", "--removeExons", dest="removeExons", action="store_true", default=False, help="remove transcripts when exons are short [format: bool] [default: false]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
431 outpGroup.add_option("-t", "--title", dest="title", action="store", default="SMART", type="string", help="title of the UCSC track [format: string] [default: SMART]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
432 outpGroup.add_option("-r", "--remaining", dest="remaining", action="store_true", default=False, help="print the unmatched sequences [format: bool] [default: false]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
433 otheGroup.add_option("-a", "--append", dest="appendFileName", action="store", default=None, type="string", help="append to GFF3 file [format: file in GFF3 format]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
434 otheGroup.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [default: 1] [format: int]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
435 otheGroup.add_option("-l", "--log", dest="log", action="store_true", default=False, help="write a log file [format: bool] [default: false]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
436 parser.add_option_group(compGroup)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
437 parser.add_option_group(filtGroup)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
438 parser.add_option_group(tranGroup)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
439 parser.add_option_group(outpGroup)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
440 parser.add_option_group(otheGroup)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
441 (options, args) = parser.parse_args()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
442
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
443
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
444 analyzer = MapperAnalyzer(options.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
445 analyzer.setMappingFile(options.inputFileName, options.format)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
446 analyzer.setSequenceFile(options.sequencesFileName, options.sequenceFormat)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
447 analyzer.setOutputFile(options.outputFileName, options.title)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
448 if options.appendFileName != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
449 analyzer.setAlreadyMatched(options.appendFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
450 if options.remaining:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
451 analyzer.setRemainingFile(options.outputFileName, options.sequenceFormat)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
452 if options.number != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
453 analyzer.setMaxMappings(options.number)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
454 if options.size != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
455 analyzer.setMinSize(options.size)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
456 if options.identity != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
457 analyzer.setMinId(options.identity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
458 if options.mismatch != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
459 analyzer.setMaxMismatches(options.mismatch)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
460 if options.gap != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
461 analyzer.setMaxGaps(options.gap)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
462 if options.mergeExons:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
463 analyzer.mergeExons(True)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
464 if options.removeExons:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
465 analyzer.acceptShortExons(False)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
466 if options.log:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
467 analyzer.setLog("%s.log" % (options.outputFileName))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
468 analyzer.analyze()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
469
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
470 if options.verbosity > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
471 print "kept %i sequences over %s (%f%%)" % (analyzer.nbWrittenSequences, analyzer.nbSequences, float(analyzer.nbWrittenSequences) / analyzer.nbSequences * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
472 if options.appendFileName != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
473 print "kept %i sequences over %s (%f%%) including already mapped sequences" % (analyzer.nbWrittenSequences + analyzer.nbAlreadyMappedSequences, analyzer.nbSequences, float(analyzer.nbWrittenSequences + analyzer.nbAlreadyMappedSequences) / analyzer.nbSequences * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
474 print "kept %i mappings over %i (%f%%)" % (analyzer.nbWrittenMappings, analyzer.nbMappings, float(analyzer.nbWrittenMappings) / analyzer.nbMappings * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
475 if options.appendFileName != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
476 print "kept %i mappings over %i (%f%%) including already mapped" % (analyzer.nbWrittenMappings + analyzer.nbAlreadyMapped, analyzer.nbMappings, float(analyzer.nbWrittenMappings + analyzer.nbAlreadyMapped) / analyzer.nbMappings * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
477 print "removed %i too short mappings (%f%%)" % (analyzer.tooShort, float(analyzer.tooShort) / analyzer.nbMappings * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
478 print "removed %i mappings with too many mismatches (%f%%)" % (analyzer.tooManyMismatches, float(analyzer.tooManyMismatches) / analyzer.nbMappings * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
479 print "removed %i mappings with too many gaps (%f%%)" % (analyzer.tooManyGaps, float(analyzer.tooManyGaps) / analyzer.nbMappings * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
480 print "removed %i mappings with too short exons (%f%%)" % (analyzer.tooShortExons, float(analyzer.tooShortExons) / analyzer.nbMappings * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
481 print "removed %i sequences with too many hits (%f%%)" % (analyzer.tooManyMappings, float(analyzer.tooManyMappings) / analyzer.nbSequences * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
482 print "%i sequences have no mapping (%f%%)" % (analyzer.nbSequences - analyzer.nbWrittenSequences, float(analyzer.nbSequences - analyzer.nbWrittenSequences) / analyzer.nbSequences * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
483 if options.appendFileName != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
484 print "%i sequences have no mapping (%f%%) excluding already mapped sequences" % (analyzer.nbSequences - analyzer.nbWrittenSequences - analyzer.nbAlreadyMappedSequences, float(analyzer.nbSequences - analyzer.nbWrittenSequences - analyzer.nbAlreadyMappedSequences) / analyzer.nbSequences * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
485
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
486