annotate SMART/Java/Python/structure/TranscriptListsComparator.py @ 13:03045debed6e

Uploaded
author m-zytnicki
date Wed, 17 Apr 2013 10:39:35 -0400
parents 769e306b7933
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 # Copyright INRA-URGI 2009-2010
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 # This software is governed by the CeCILL license under French law and
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 # abiding by the rules of distribution of free software. You can use,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 # modify and/ or redistribute the software under the terms of the CeCILL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 # license as circulated by CEA, CNRS and INRIA at the following URL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 # "http://www.cecill.info".
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 # As a counterpart to the access to the source code and rights to copy,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 # modify and redistribute granted by the license, users are provided only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 # with a limited warranty and the software's author, the holder of the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 # economic rights, and the successive licensors have only limited
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 # liability.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 # In this respect, the user's attention is drawn to the risks associated
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 # with loading, using, modifying and/or developing or reproducing the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 # software by the user in light of its specific status of free software,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 # that may mean that it is complicated to manipulate, and that also
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 # therefore means that it is reserved for developers and experienced
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 # professionals having in-depth computer knowledge. Users are therefore
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22 # encouraged to load and test the software's suitability as regards their
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 # requirements in conditions enabling the security of their systems and/or
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 # data to be ensured and, more generally, to use and operate it in the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 # same conditions as regards security.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 # The fact that you are presently reading this means that you have had
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 # knowledge of the CeCILL license and that you accept its terms.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30 import sys
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31 import random
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 from SMART.Java.Python.misc import Utils
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 from SMART.Java.Python.structure.Transcript import Transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 from SMART.Java.Python.structure.TranscriptList import TranscriptList
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38 from SMART.Java.Python.misc.Progress import Progress
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 class TranscriptListsComparator(object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 Compare two transcript lists, using a database for one of the list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46 Uses one TranscriptContainer for query data,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 one TranscriptContainer exported to MySqlTranscriptTable for reference data,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 one MySqlTranscriptTable for transformed reference data
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 @ivar inputTranscriptContainers: parsers to the list of query transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 @type inputTranscriptContainers: list of 2 L{TranscriptContainer<TranscriptContainer>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 @ivar writer: transcript list writer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 @type writer: class L{TranscriptListWriter<TranscriptListWriter>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 @ivar mySqlConnection: connection to a MySQL database (to compute the ovelapping efficiently)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55 @ivar introns: compare transcripts or exons only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 @type introns: list of 2 boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 @ivar starts: restrict the query transcripts to first nucleotides
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58 @type starts: list of 2 int or None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59 @ivar fivePrimes: extend a list of transcripts by their 5' end
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 @type fivePrimes: list of 2 int or None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 @ivar threePrimes: extend a list of transcripts by their 3' end
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62 @type threePrimes: list of 2 int or None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 @ivar minDistance: min distance between two transcripts [default: 0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64 @type minDistance: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65 @ivar maxDistance: max distance between two transcripts [default: 0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66 @type maxDistance: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67 @ivar minOverlap: minimum number of overlapping nucleotides to declare an overlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68 @type minOverlap: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69 @ivar pcOverlap: percentage of overlapping nucleotides to declare an overlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70 @type pcOverlap: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71 @ivar upstreams: consider distances with elements which are upstream of the transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72 @type upstreams: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73 @ivar downstreams: consider distances with elements which are downstream of the transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74 @type downstreams: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75 @ivar colinear: whether transcripts should overlap in the same direction
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76 @type colinear: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77 @ivar antisense: whether transcripts should overlap in the opposite direction
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78 @type antisense: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79 @ivar outputDistance: output distance between query and reference instead of query transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80 @type outputDistance: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81 @ivar absolute: do not consider the strand while computing distance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82 @type absolute: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83 @ivar strandedDistance: return a line per strand while computing distances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84 @type strandedDistance: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 @ivar QUERY: constant specifying the query objects
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
86 @type QUERY: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
87 @ivar REFERENCE: constant specifying the reference objects
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88 @type REFERENCE: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
89 @ivar INPUTTYPES: set of input types of data (query or reference) objects
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
90 @type INPUTTYPES: list of 2 int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
91 @ivar typeToString: string representation of the previous types
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
92 @type typeToString: dict
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
93 @ivar tableNames: name of the transcript tables
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
94 @type tableNames: dict of strings
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
95 @ivar nbTranscripts: number of transcript in the query/reference set
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
96 @type nbTranscripts: list of 2 int or None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
97 @ivar nbNucleotides: number of nucleotides in the query/reference set
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
98 @type nbNucleotides: list of 2 int or None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
99 @ivar transcriptsToBeStored: transcripts that will be stored into database
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
100 @type transcriptsToBeStored: dict of class L{TranscriptList<TranscriptList>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
101 @ivar multiple: in merge mode, aggregate multiple transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
102 @type multiple: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
103 @ivar normalization: normalize each element by the number of mappings of this element
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
104 @type normalization: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
105 @ivar invert: invert the current comparison
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
106 @type invert: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
107 @ivar splitDifference: split into intervals when computing difference
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
108 @type splitDifference: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
109 @ivar odds: whether odds about the comparison should be computed
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
110 @type odds: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
111 @ivar overlapResults: count the number of overlaps
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
112 @type overlapResults: dictionary
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
113 @ivar oddResults: compute the number of times each transcript overlaps (or is merged with) another one
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
114 @type oddResults: dictionary
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
115 @ivar outputContainer: container of the output transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
116 @type outputContainer: class L{TranscriptContainer<TranscriptContainer>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
117 @ivar logHandle: log handle
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
118 @type logHandle: file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
119 @ivar verbosity: verbosity
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
120 @type verbosity: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
121 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
122
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
123 def __init__(self, logHandle = None, verbosity = 0):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
124 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
125 Constructor
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
126 @param transcriptListParser2: parser to the list of reference transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
127 @type transcriptListParser2: class L{TranscriptListParser<TranscriptListParser>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
128 @param logHandle: log handle
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
129 @type logHandle: file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
130 @param verbosity: verbosity
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
131 @type verbosity: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
132 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
133 self.QUERY = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
134 self.REFERENCE = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
135 self.WORKING = 2
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
136 self.INPUTTYPES = (self.QUERY, self.REFERENCE)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
137 self.INPUTWORKINGTYPES = (self.QUERY, self.REFERENCE, self.WORKING)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
138 self.typeToString = {self.QUERY: "Query", self.REFERENCE: "Reference", self.WORKING: "Working"}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
139
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
140 self.logHandle = logHandle
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
141 self.verbosity = verbosity
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
142 self.mySqlConnection = MySqlConnection(self.verbosity-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
143 self.inputTranscriptContainers = [None, None]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
144 self.tableNames = ["tmpQueryTable_%d" % (random.randint(0, 100000)), "tmpReferenceTable_%d" % (random.randint(0, 100000)), "tmpOutputTable_%d" % (random.randint(0, 100000)), "tmpWorkingTable_%d" % (random.randint(0, 100000))]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
145 self.mySqlTranscriptWriters = [MySqlTranscriptWriter(self.mySqlConnection, name, verbosity-1) for name in self.tableNames]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
146 self.writer = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
147 self.introns = [False, False]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
148 self.starts = [None, None]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
149 self.ends = [None, None]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
150 self.fivePrimes = [None, None]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
151 self.threePrimes = [None, None]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
152 self.minDistance = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
153 self.maxDistance = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
154 self.minOverlap = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
155 self.pcOverlap = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
156 self.colinear = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
157 self.antisense = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
158 self.downstreams = [False, False]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
159 self.upstreams = [False, False]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
160 self.outputDistance = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
161 self.absolute = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
162 self.strandedDistance = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
163 self.nbTranscripts = [None, None]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
164 self.nbNucleotides = [None, None]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
165 self.normalization = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
166 self.included = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
167 self.including = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
168 self.invert = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
169 self.notOverlapping = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
170 self.splitDifference = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
171 self.multiple = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
172 self.odds = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
173 self.overlapResults = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
174 self.oddResults = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
175 self.outputContainer = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
176 self.transcriptsToBeStored = dict([(type, TranscriptList()) for type in self.INPUTWORKINGTYPES])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
177 self.nbPrinted = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
178
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
179 self.mySqlConnection.createDatabase()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
180
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
181
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
182 def __del__(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
183 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
184 Destructor
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
185 Remove all temporary tables
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
186 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
187 for type in self.INPUTWORKINGTYPES:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
188 self.mySqlTranscriptWriters[type].removeTables()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
189 self.mySqlConnection.deleteDatabase()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
190
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
191 def acceptIntrons(self, type, bool):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
192 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
193 Compare transcripts or exons only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
194 @param type: whether use query/reference data
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
195 @type type: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
196 @param bool: include introns or not
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
197 @type bool: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
198 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
199 self.introns[type] = bool
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
200
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
201
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
202 def restrictToStart(self, type, size):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
203 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
204 Restrict a list of transcripts to first nucleotides
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
205 @param type: whether use query/reference data
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
206 @type type: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
207 @param size: the size of the transcript to be considered
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
208 @type size: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
209 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
210 self.starts[type] = size
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
211 self.introns[type] = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
212
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
213
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
214 def restrictToEnd(self, type, size):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
215 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
216 Restrict a list of transcripts to first nucleotides
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
217 @param type: whether use query/reference data
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
218 @type type: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
219 @param size: the size of the transcript to be considered
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
220 @type size: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
221 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
222 self.ends[type] = size
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
223 self.introns[type] = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
224
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
225
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
226 def extendFivePrime(self, type, size):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
227 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
228 Extend a list of transcripts by their 5' end
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
229 @param type: whether use query/reference data
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
230 @type type: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
231 @param size: size of the extension
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
232 @type size: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
233 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
234 self.fivePrimes[type] = size
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
235
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
236
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
237 def extendThreePrime(self, type, size):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
238 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
239 Extend the list of query transcripts by their 3' end
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
240 @param type: whether use query/reference data
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
241 @type type: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
242 @param size: size of the extension
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
243 @type size: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
244 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
245 self.threePrimes[type] = size
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
246
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
247
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
248 def setMinDistance(self, distance):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
249 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
250 Set the min distance between two transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
251 @param distance: distance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
252 @type distance: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
253 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
254 self.minDistance = distance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
255
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
256
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
257 def setMaxDistance(self, distance):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
258 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
259 Set the max distance between two transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
260 @param distance: distance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
261 @type distance: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
262 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
263 self.maxDistance = distance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
264
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
265
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
266 def setMinOverlap(self, overlap):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
267 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
268 Set the minimum number of nucleotides to declare an overlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
269 @param overlap: minimum number of nucleotides
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
270 @type overlap: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
271 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
272 self.minOverlap = overlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
273
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
274
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
275 def setPcOverlap(self, overlap):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
276 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
277 Set the percentage of nucleotides to declare an overlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
278 @param overlap: percentage of nucleotides
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
279 @type overlap: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
280 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
281 self.pcOverlap = overlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
282
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
283
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
284 def setUpstream(self, type, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
285 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
286 Consider transcripts which are upstream of some transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
287 @param type: whether use query/reference data
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
288 @type type: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
289 @param boolean: consider only these transcripts or not
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
290 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
291 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
292 self.upstreams[type] = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
293
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
294
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
295 def setDownstream(self, type, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
296 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
297 Consider transcripts which are downstream of some transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
298 @param type: whether use query/reference data
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
299 @type type: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
300 @param boolean: consider only these transcripts or not
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
301 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
302 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
303 self.downstreams[type] = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
304
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
305
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
306 def setOutputDistance(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
307 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
308 Output distance between query and reference instead of query transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
309 @param boolean: whether distance should be output
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
310 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
311 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
312 self.outputDistance = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
313
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
314
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
315 def setAbsolute(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
316 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
317 Do not consider strand when computing distance (thus, having only non-negative values)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
318 @param boolean: whether we should consider strands
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
319 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
320 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
321 self.absolute = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
322
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
323
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
324 def setStrandedDistance(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
325 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
326 Return two distance distributions, one per strand
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
327 @param boolean: whether we should return 2 distance distance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
328 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
329 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
330 self.strandedDistance = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
331
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
332
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
333 def getColinearOnly(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
334 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
335 Only consider transcripts that overlap in the same direction
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
336 @param boolean: whether transcripts should overlap in the same direction
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
337 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
338 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
339 self.colinear = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
340
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
341
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
342 def getAntisenseOnly(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
343 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
344 Only consider transcripts that overlap in the opposite direction
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
345 @param boolean: whether transcripts should overlap in the opposite direction
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
346 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
347 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
348 self.antisense = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
349
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
350
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
351 def setIncludedOnly(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
352 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
353 Keep the elements from first set which are included in the second set
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
354 @param boolean: whether to keep included elements only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
355 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
356 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
357 self.included = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
358
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
359
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
360 def setIncludingOnly(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
361 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
362 Keep the elements from second set which are included in the first set
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
363 @param boolean: whether to keep included elements only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
364 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
365 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
366 self.including = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
367
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
368
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
369 def setNormalization(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
370 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
371 Normalize the elements by the number of mappings in the genome
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
372 @param boolean: whether normalize
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
373 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
374 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
375 self.normalization = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
376
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
377
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
378 def getInvert(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
379 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
380 Only consider transcripts that do not overlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
381 @param boolean: whether invert the selection
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
382 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
383 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
384 self.invert = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
385
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
386
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
387 def includeNotOverlapping(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
388 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
389 Also output the elements which do not overlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
390 @param boolean: whether output the elements which do not overlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
391 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
392 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
393 self.notOverlapping = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
394
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
395
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
396 def setSplitDifference(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
397 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
398 Split into intervals when computing difference
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
399 @param boolean: whether to split
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
400 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
401 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
402 self.splitDifference = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
403
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
404
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
405 def aggregate(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
406 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
407 In merge mode, aggregate multiple transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
408 @param boolean: aggregate multiple transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
409 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
410 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
411 self.multiple = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
412
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
413
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
414 def getTables(self, type):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
415 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
416 Get the SQL tables
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
417 @param type: type of the table (query, reference, etc.)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
418 @type type: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
419 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
420 return self.mySqlTranscriptWriters[type].getTables()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
421
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
422
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
423 def computeOdds(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
424 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
425 Compute odds
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
426 @param boolean: whether odds should be computed
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
427 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
428 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
429 self.odds = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
430 if self.odds:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
431 self.overlapResults = dict()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
432
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
433
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
434 def computeOddsPerTranscript(self, boolean):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
435 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
436 Compute odds for each transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
437 @param boolean: whether odds for each transcript should be computed
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
438 @type boolean: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
439 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
440 self.odds = boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
441 if self.odds:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
442 self.overlapResults = dict()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
443
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
444
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
445 def removeTables(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
446 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
447 Remove the temporary MySQL tables
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
448 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
449 for type in self.INPUTWORKINGTYPES:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
450 for chromosome in self.getTables(type):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
451 self.getTables(type)[chromosome].remove()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
452
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
453
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
454 def clearTables(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
455 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
456 Empty the content of the databases
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
457 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
458 for type in self.INPUTWORKINGTYPES:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
459 if self.transcriptListParsers[type] != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
460 for chromosome in self.getTables(type):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
461 self.getTables(type)[chromosome].clear()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
462
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
463
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
464 def extendTranscript(self, type, transcript):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
465 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
466 Extend a transcript corresponding to the parameters of the class
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
467 @param transcript: a transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
468 @type transcript: class L{Transcript<Transcript>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
469 @return: the possibly extended transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
470 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
471 extendedTranscript = Transcript()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
472 extendedTranscript.copy(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
473 if self.starts[type] != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
474 extendedTranscript.restrictStart(self.starts[type])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
475 if self.ends[type] != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
476 extendedTranscript.restrictEnd(self.ends[type])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
477 if self.fivePrimes[type] != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
478 extendedTranscript.extendStart(self.fivePrimes[type])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
479 if self.threePrimes[type] != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
480 extendedTranscript.extendEnd(self.threePrimes[type])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
481 return extendedTranscript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
482
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
483
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
484 def storeTranscript(self, type, transcript, now = True):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
485 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
486 Add a transcript to a MySQL database, or postpone the store
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
487 @param type: whether use query/reference table
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
488 @type type: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
489 @param transcript: a transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
490 @type transcript: class L{Transcript<Transcript>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
491 @param now: whether transcript should be stored now (or stored can be postponed)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
492 @type now: bool
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
493 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
494 self.mySqlTranscriptWriters[type].addTranscript(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
495 if type == self.REFERENCE:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
496 self.mySqlTranscriptWriters[self.WORKING].addTranscript(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
497 if now:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
498 self.mySqlTranscriptWriters[type].write()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
499 if type == self.REFERENCE:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
500 self.mySqlTranscriptWriters[self.WORKING].write()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
501
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
502
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
503 def writeTranscript(self, transcript):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
504 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
505 Write a transcript in the output file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
506 @param transcript: a transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
507 @type transcript: class L{Transcript<Transcript>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
508 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
509 if self.writer != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
510 self.writer.addTranscript(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
511 self.nbPrinted += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
512
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
513
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
514 def flushData(self, type = None):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
515 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
516 Store the remaining transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
517 @param type: whether use query/reference table (None for all)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
518 @type type: int or None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
519 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
520 if type == None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
521 types = self.INPUTWORKINGTYPES
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
522 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
523 types = [type]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
524 for type in types:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
525 self.mySqlTranscriptWriters[type].write()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
526 if self.writer != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
527 self.writer.write()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
528
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
529
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
530 def unstoreTranscript(self, type, transcript):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
531 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
532 Remove a transcript from a MySQL database
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
533 @param type: whether use query/reference table
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
534 @type type: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
535 @param transcript: a transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
536 @type transcript: class L{Transcript<Transcript>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
537 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
538 self.getTables(type)[transcript.getChromosome()].removeTranscript(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
539 if type == self.REFERENCE:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
540 self.getTables(self.WORKING)[transcript.getChromosome()].removeTranscript(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
541
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
542
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
543 def addIndexes(self, tables):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
544 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
545 Add useful indexes to the tables
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
546 @param tables: which tables should be indexed
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
547 @type tables: list of int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
548 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
549 for type in tables:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
550 for chromosome in self.getTables(type):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
551 self.getTables(type)[chromosome].createIndex("iStart_transcript_%s_%d_%d" % (chromosome, type, random.randint(0, 100000)), ["start"])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
552 self.getTables(type)[chromosome].exonsTable.createIndex("iTranscriptId_exon_%s_%d_%d" % (chromosome, type, random.randint(0, 100000)), ["transcriptId"])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
553
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
554
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
555 def storeTranscriptList(self, type, transcriptListParser, extension):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
556 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
557 Store a transcript list into database
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
558 @param type: whether use query/reference parser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
559 @type type: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
560 @param parser: a parser of transcript list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
561 @type parser: class L{TranscriptContainer<TranscriptContainer>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
562 @param extension: extend (or not) the transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
563 @type extension: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
564 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
565 progress = Progress(transcriptListParser.getNbTranscripts(), "Writing transcripts for %s" % ("query" if type == self.QUERY else "reference"), self.verbosity-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
566 for transcript in transcriptListParser.getIterator():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
567 if extension:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
568 transcript = self.extendTranscript(type, transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
569 self.mySqlTranscriptWriters[type].addTranscript(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
570 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
571 self.mySqlTranscriptWriters[type].write()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
572 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
573 if type == self.REFERENCE:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
574 for chromosome in self.getTables(self.REFERENCE):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
575 self.getTables(self.WORKING)[chromosome] = MySqlTranscriptTable(self.mySqlConnection, self.tableNames[self.WORKING], chromosome, self.verbosity-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
576 self.getTables(self.WORKING)[chromosome].copy(self.getTables(self.REFERENCE)[chromosome])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
577
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
578
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
579 def setInputTranscriptContainer(self, type, inputTranscriptContainer):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
580 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
581 Set an input transcript list container
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
582 @param type: whether use query/reference parser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
583 @type type: int
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
584 @param inputTranscriptContainer: a container
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
585 @type inputTranscriptContainer: class L{TranscriptContainer<TranscriptContainer>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
586 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
587 self.inputTranscriptContainers[type] = inputTranscriptContainer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
588 self.nbTranscripts[type] = self.inputTranscriptContainers[type].getNbTranscripts()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
589 self.nbNucleotides[type] = self.inputTranscriptContainers[type].getNbNucleotides()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
590
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
591
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
592 def setOutputWriter(self, writer):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
593 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
594 Set an output transcript list writer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
595 @param writer: a writer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
596 @type writer: class L{TranscriptListWriter<TranscriptListWriter>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
597 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
598 self.writer = writer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
599
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
600
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
601 def compareTranscript(self, transcript1, transcript2, includeDistance = False):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
602 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
603 Compare two transcripts, using user defined parameters
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
604 @param transcript1: a transcript from the query set (already extended)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
605 @type transcript1: class L{Transcript<Transcript>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
606 @param transcript2: a transcript from the reference set (already extended)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
607 @type transcript2: class L{Transcript<Transcript>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
608 @param includeDistance: take into account the distance too
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
609 @type includeDistance: boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
610 @return: true, if they overlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
611 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
612 extendedTranscript1 = Transcript()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
613 extendedTranscript1.copy(transcript1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
614 if includeDistance:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
615 if self.maxDistance > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
616 extendedTranscript1.extendStart(self.maxDistance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
617 extendedTranscript1.extendEnd(self.maxDistance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
618
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
619 minOverlap = self.minOverlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
620 if self.pcOverlap != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
621 minOverlap = max(minOverlap, transcript1.getSize() / 100.0 * self.pcOverlap)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
622 if not extendedTranscript1.overlapWith(transcript2, self.minOverlap):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
623 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
624 if (self.downstreams[self.QUERY] and transcript2.getStart() > extendedTranscript1.getStart()) or \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
625 (self.upstreams[self.QUERY] and transcript2.getEnd() < extendedTranscript1.getEnd()) or \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
626 (self.downstreams[self.REFERENCE] and extendedTranscript1.getStart() > transcript2.getStart()) or \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
627 (self.upstreams[self.REFERENCE] and extendedTranscript1.getEnd() < transcript2.getEnd()):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
628 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
629 if (self.antisense and extendedTranscript1.getDirection() == transcript2.getDirection()) or (self.colinear and extendedTranscript1.getDirection() != transcript2.getDirection()):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
630 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
631 if self.included and not transcript2.include(extendedTranscript1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
632 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
633 if self.including and not extendedTranscript1.include(transcript2):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
634 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
635 if self.introns[self.REFERENCE] and self.introns[self.QUERY]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
636 if self.logHandle != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
637 self.logHandle.write("%s overlaps with intron of %s\n" % (str(extendedTranscript1), str(transcript2)))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
638 return True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
639 if (not self.introns[self.REFERENCE]) and (not self.introns[self.QUERY]) and extendedTranscript1.overlapWithExon(transcript2, minOverlap):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
640 if self.logHandle != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
641 self.logHandle.write("%s overlaps with exon of %s\n" % (str(extendedTranscript1), str(transcript2)))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
642 return True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
643 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
644
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
645
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
646 def compareTranscriptToList(self, transcript1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
647 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
648 Compare a transcript to the reference list of transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
649 (Do not extend the transcripts, except for the distance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
650 @param transcript1: a transcript (from the query set)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
651 @type transcript1: class L{Transcript<Transcript>}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
652 @return: the reference transcripts overlapping
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
653 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
654 # no transcript in the reference table
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
655 if transcript1.getChromosome() not in self.getTables(self.WORKING):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
656 return
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
657
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
658 # retrieve the the transcripts that may overlap in the working tables
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
659 clauses = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
660 extendedTranscript1 = Transcript()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
661 extendedTranscript1.copy(transcript1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
662 if self.maxDistance > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
663 extendedTranscript1.extendStart(self.maxDistance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
664 if self.maxDistance > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
665 extendedTranscript1.extendEnd(self.maxDistance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
666 command = "SELECT * FROM %s WHERE (" % (self.getTables(self.WORKING)[transcript1.getChromosome()].getName())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
667 for binPair in extendedTranscript1.getBins():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
668 clause = "bin "
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
669 if binPair[0] == binPair[1]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
670 clause += "= %i" % (binPair[0])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
671 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
672 clause += "BETWEEN %i AND %i" % (binPair[0], binPair[1])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
673 clauses.append(clause)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
674 command += " OR ".join(clauses)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
675 command += ") AND start <= %d AND end >= %d" % (extendedTranscript1.getEnd(), extendedTranscript1.getStart())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
676
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
677 for index2, transcript2 in self.getTables(self.REFERENCE)[transcript1.getChromosome()].selectTranscripts(command):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
678 if self.compareTranscript(extendedTranscript1, transcript2):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
679 yield transcript2
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
680
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
681
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
682 def compareTranscriptList(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
683 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
684 Compare a list of transcript to the reference one
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
685 @return: the transcripts that overlap with the reference set
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
686 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
687 distance = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
688 nbClustersIn = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
689 nbClustersOut = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
690 if self.maxDistance != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
691 distance = self.maxDistance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
692
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
693 self.addIndexes([self.QUERY, self.REFERENCE])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
694
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
695 # export the container into tables
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
696 self.storeTranscriptList(self.QUERY, self.inputTranscriptContainers[self.QUERY], True)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
697 self.storeTranscriptList(self.REFERENCE, self.inputTranscriptContainers[self.REFERENCE], True)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
698
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
699 # looping
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
700 for chromosome1 in sorted(self.getTables(self.QUERY).keys()):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
701 # get range of transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
702 command = "SELECT MIN(start), MAX(end), COUNT(id) FROM %s" % (self.getTables(self.QUERY)[chromosome1].getName())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
703 query = self.mySqlConnection.executeQuery(command)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
704 result = query.getLine()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
705 first = result[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
706 last = result[1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
707 nb = result[2]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
708
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
709 transcripts1 = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
710 toBeRemoved1 = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
711 transcripts2 = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
712 toBeRemoved2 = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
713 overlapsWith = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
714 nbOverlaps = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
715 nbChunks = max(1, nb / 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
716 chunkSize = (last - first) / nbChunks
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
717 progress = Progress(nbChunks + 1, "Analyzing chromosome %s" % (chromosome1), self.verbosity-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
718 for chunk in range(nbChunks + 1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
719
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
720 # load transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
721 start = first + chunk * chunkSize
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
722 end = start + chunkSize - 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
723 command = "SELECT * FROM %s WHERE start BETWEEN %d AND %d" % (self.getTables(self.QUERY)[chromosome1].getName(), start, end-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
724 for index1, transcript1 in self.getTables(self.QUERY)[chromosome1].selectTranscripts(command):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
725 transcripts1.append(transcript1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
726 overlapsWith.append([])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
727 nbOverlaps.append(0)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
728 nbClustersIn += 1 if "nbElements" not in transcript1.getTagNames() else transcript1.getTagValue("nbElements")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
729 command = "DELETE FROM %s WHERE start < %d" % (self.getTables(self.QUERY)[chromosome1].getName(), end)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
730 self.mySqlConnection.executeQuery(command)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
731 if chromosome1 in self.getTables(self.REFERENCE):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
732 command = "SELECT * FROM %s WHERE start BETWEEN %d AND %d" % (self.getTables(self.REFERENCE)[chromosome1].getName(), start-distance, end+distance-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
733 if chunk == 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
734 command = "SELECT * FROM %s WHERE start < %d" % (self.getTables(self.REFERENCE)[chromosome1].getName(), end + distance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
735 for index2, transcript2 in self.getTables(self.REFERENCE)[chromosome1].selectTranscripts(command):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
736 transcripts2.append(transcript2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
737 command = "DELETE FROM %s WHERE start < %d" % (self.getTables(self.REFERENCE)[chromosome1].getName(), end + distance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
738 self.mySqlConnection.executeQuery(command)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
739
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
740 # compare sets
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
741 for index1, transcript1 in enumerate(transcripts1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
742 overlappingNames = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
743 nbElements1 = 1 if "nbElements" not in transcript1.getTagNames() else transcript1.getTagValue("nbElements")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
744 for transcript2 in transcripts2:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
745 if self.compareTranscript(transcript1, transcript2, True):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
746 id2 = transcript2.getTagValue("ID") if "ID" in transcript2.getTagNames() else transcript2.getName()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
747 if id2 not in overlapsWith[index1]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
748 overlapsWith[index1].append(id2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
749 nbOverlaps[index1] += 1 if "nbElements" not in transcript2.getTagNames() else transcript2.getTagValue("nbElements")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
750 if self.odds:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
751 if transcript2.getName() not in self.overlapResults:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
752 self.overlapResults[transcript2.getName()] = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
753 self.overlapResults[transcript2.getName()] += nbElements1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
754
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
755 # check if query transcript extends bounds of the chunk
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
756 if transcript1.getEnd() < end:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
757 if Utils.xor(overlapsWith[index1], self.invert) or self.notOverlapping:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
758 if overlapsWith[index1]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
759 transcript1.setTagValue("overlapWith", ",".join(overlapsWith[index1])[:100])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
760 transcript1.setTagValue("nbOverlaps", "%d" % (nbOverlaps[index1]))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
761 elif self.notOverlapping:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
762 transcript1.setTagValue("nbOverlaps", "0")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
763 self.writeTranscript(transcript1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
764 nbClustersOut += nbElements1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
765 toBeRemoved1.append(index1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
766
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
767 # update list of query transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
768 for index1 in reversed(toBeRemoved1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
769 del transcripts1[index1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
770 del overlapsWith[index1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
771 del nbOverlaps[index1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
772 toBeRemoved1 = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
773
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
774 # check if the reference transcripts extends bounds of the chunk
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
775 for index2, transcript2 in enumerate(transcripts2):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
776 if transcript2.getEnd() + distance < end:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
777 toBeRemoved2.append(index2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
778 for index2 in reversed(toBeRemoved2):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
779 del transcripts2[index2]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
780 toBeRemoved2 = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
781
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
782 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
783
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
784 for index1, transcript1 in enumerate(transcripts1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
785 if Utils.xor(overlapsWith[index1], self.invert) or self.notOverlapping:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
786 if overlapsWith[index1]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
787 transcript1.setTagValue("overlapWith", ",".join(overlapsWith[index1])[:100])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
788 transcript1.setTagValue("nbOverlaps", "%d" % (nbOverlaps[index1]))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
789 elif self.notOverlapping:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
790 transcript1.setTagValue("nbOverlaps", "0")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
791 self.writeTranscript(transcript1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
792 nbClustersOut += 1 if "nbElements" not in transcript1.getTagNames() else transcript1.getTagValue("nbElements")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
793 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
794 self.getTables(self.QUERY)[chromosome1].remove()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
795 if chromosome1 in self.getTables(self.REFERENCE):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
796 self.getTables(self.REFERENCE)[chromosome1].remove()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
797 self.getTables(self.WORKING)[chromosome1].remove()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
798
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
799 self.flushData()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
800 if self.writer != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
801 self.writer.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
802 self.writer = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
803
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
804 if self.verbosity > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
805 print "reference: %d elements" % (self.nbTranscripts[self.REFERENCE])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
806 print "query: %d elements, %d clustered" % (self.nbTranscripts[self.QUERY], nbClustersIn)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
807 if self.nbTranscripts[self.QUERY] != 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
808 print "output: %d elements (%.2f%%)"% (self.nbPrinted, self.nbPrinted / float(self.nbTranscripts[self.QUERY]) * 100),
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
809 if nbClustersOut != 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
810 print ", %d clustered (%.2f%%)" % (nbClustersOut, float(nbClustersOut) / nbClustersIn * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
811
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
812
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
813 def compareTranscriptListDistance(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
814 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
815 Compare a list of transcript to the reference one
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
816 @return: the distance distributions in a hash
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
817 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
818 nbDistances = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
819 distances = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
820 absDistances = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
821 strandedDistances = dict([(strand, {}) for strand in (1, -1)])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
822
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
823 # export the container into tables
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
824 self.storeTranscriptList(self.QUERY, self.inputTranscriptContainers[self.QUERY], True)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
825 self.storeTranscriptList(self.REFERENCE, self.inputTranscriptContainers[self.REFERENCE], True)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
826
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
827 progress = Progress(self.nbTranscripts[self.QUERY], "Analyzing chromosomes", self.verbosity-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
828 for transcript1 in self.inputTranscriptContainers[self.QUERY].getIterator():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
829 # get the distance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
830 transcript1 = self.extendTranscript(self.QUERY, transcript1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
831 distance = self.maxDistance + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
832 strand = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
833 closestElement = "None"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
834 for transcript2 in self.compareTranscriptToList(transcript1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
835 thisStrand = transcript1.getDirection() * transcript2.getDirection()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
836 if self.antisense or (not self.colinear and transcript1.getDirection() != transcript2.getDirection()):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
837 transcript2.reverse()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
838 if self.absolute:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
839 transcript2.setDirection(transcript1.getDirection())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
840 if transcript2.getDirection() == transcript1.getDirection():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
841 if self.starts[self.REFERENCE] != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
842 transcript2.restrictStart(self.starts[self.REFERENCE])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
843 if self.ends[self.REFERENCE] != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
844 transcript2.restrictEnd(self.ends[self.REFERENCE])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
845 thisDistance = transcript1.getRelativeDistance(transcript2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
846 if (self.absolute):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
847 thisDistance = abs(thisDistance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
848 if abs(thisDistance) < abs(distance):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
849 distance = thisDistance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
850 strand = thisStrand
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
851 closestElement = transcript2.getTagValue("ID") if "ID" in transcript2.getTagNames() else transcript2.getName()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
852 if (distance <= self.maxDistance) and (self.minDistance == None or distance >= self.minDistance):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
853 nbDistances += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
854 distances[distance] = distances.get(distance, 0) + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
855 absDistance = abs(distance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
856 absDistances[absDistance] = absDistances.get(absDistance, 0) + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
857 strandedDistances[strand][distance] = strandedDistances[strand].get(distance, 0)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
858 if distance not in strandedDistances[-strand]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
859 strandedDistances[-strand][distance] = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
860
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
861 # write transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
862 if distance == self.maxDistance + 1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
863 distance = "None"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
864 tmpTranscript = Transcript()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
865 tmpTranscript.copy(transcript1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
866 tmpTranscript.setTagValue("distance", distance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
867 tmpTranscript.setTagValue("closestElement", closestElement)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
868 self.writeTranscript(tmpTranscript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
869
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
870 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
871 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
872
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
873 self.flushData()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
874
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
875 if self.verbosity > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
876 print "reference: %d sequences" % (self.nbTranscripts[self.REFERENCE])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
877 print "query: %d sequences" % (self.nbTranscripts[self.QUERY])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
878 if nbDistances == 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
879 print "Nothing matches"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
880 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
881 print "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(absDistances)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
882 print "for %d distances (%.2f%%)" % (nbDistances, float(nbDistances) / self.nbTranscripts[self.QUERY] * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
883
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
884 if self.strandedDistance:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
885 return strandedDistances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
886 return distances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
887
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
888
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
889 def compareTranscriptListMerge(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
890 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
891 Merge the query list of transcript with itself
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
892 @return: the merged transcripts in a transcript list database
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
893 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
894 nbMerges = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
895
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
896 for type in (self.QUERY, self.REFERENCE):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
897 self.storeTranscriptList(type, self.inputTranscriptContainers[type], True)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
898 self.flushData()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
899
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
900 # Loop on the chromosomes
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
901 for chromosome in sorted(self.getTables(self.QUERY).keys()):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
902 if chromosome not in self.getTables(self.REFERENCE):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
903 continue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
904
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
905 # Get the size of the chromosome
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
906 maxEnd = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
907 nbChunks = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
908 for type in (self.QUERY, self.REFERENCE):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
909 command = "SELECT MAX(end) from %s" % (self.getTables(type)[chromosome].getName())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
910 query = self.mySqlConnection.executeQuery(command)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
911 maxEnd = max(maxEnd, int(query.getLine()[0]))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
912 nbChunks = max(nbChunks, self.getTables(type)[chromosome].getNbElements())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
913
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
914 mergedTranscripts = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
915 transcripts = {self.QUERY: [], self.REFERENCE: []}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
916 progress = Progress(nbChunks, "Analyzing %s" % (chromosome), self.verbosity-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
917 for i in range(nbChunks):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
918 rangeStart = int(i * (float(maxEnd) / nbChunks)) + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
919 rangeEnd = int((i+1) * (float(maxEnd) / nbChunks))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
920
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
921 # Get all transcripts in query and reference from chunk
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
922 for type in (self.QUERY, self.REFERENCE):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
923 correction = 0 if self.QUERY else self.maxDistance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
924 command = "SELECT * FROM %s WHERE start <= %d" % (self.getTables(type)[chromosome].getName(), rangeEnd + correction)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
925 for index, transcript in self.getTables(type)[chromosome].selectTranscripts(command):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
926 transcripts[type].append(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
927
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
928 # Merge elements between the two samples
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
929 for iQuery, queryTranscript in enumerate(transcripts[self.QUERY]):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
930 for iReference, referenceTranscript in enumerate(transcripts[self.REFERENCE]):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
931 if referenceTranscript == None: continue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
932 if self.compareTranscript(queryTranscript, referenceTranscript, True):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
933 if queryTranscript.getDirection() != referenceTranscript.getDirection():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
934 referenceTranscript.setDirection(queryTranscript.getDirection())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
935 queryTranscript.merge(referenceTranscript, self.normalization)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
936 nbMerges += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
937 transcripts[self.REFERENCE][iReference] = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
938 if not self.multiple:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
939 mergedTranscripts[iQuery] = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
940
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
941 # Remove transcripts from database
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
942 for type in (self.QUERY, self.REFERENCE):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
943 correction = 0 if self.QUERY else self.maxDistance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
944 command = "DELETE FROM %s WHERE start <= %d" % (self.getTables(type)[chromosome].getName(), rangeEnd - correction)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
945 query = self.mySqlConnection.executeQuery(command)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
946
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
947 # Just in case, self-merge the elements in the query (beware of mergedTranscripts!)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
948 if (self.multiple):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
949 for iQuery1, queryTranscript1 in enumerate(transcripts[self.QUERY]):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
950 if queryTranscript1 == None: continue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
951 for iQuery2, queryTranscript2 in enumerate(transcripts[self.QUERY]):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
952 if iQuery2 <= iQuery1 or queryTranscript2 == None: continue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
953 minOverlap = self.minOverlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
954 if self.pcOverlap != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
955 minOverlap = max(minOverlap, queryTranscript1.getSize() / 100.0 * self.pcOverlap)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
956 if queryTranscript2.overlapWith(queryTranscript1, minOverlap) and (queryTranscript1.getDirection() == queryTranscript2.getDirection() or not self.colinear):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
957 if queryTranscript1.getDirection() != queryTranscript2.getDirection():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
958 queryTranscript2.setDirection(queryTranscript1.getDirection())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
959 queryTranscript1.merge(queryTranscript2, self.normalization)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
960 transcripts[self.QUERY][iQuery2] = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
961 nbMerges += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
962 if not self.multiple:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
963 mergedTranscripts[iQuery1] = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
964
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
965 # Update the sets of transcripts and write into database (also update mergedTranscripts)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
966 newTranscripts = {self.QUERY: [], self.REFERENCE: []}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
967 newMergedTranscripts = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
968 for type in (self.QUERY, self.REFERENCE):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
969 for i, transcript in enumerate(transcripts[type]):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
970 if transcript == None: continue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
971 correction = 0 if self.QUERY else self.maxDistance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
972 if transcript.getEnd() < rangeEnd - correction:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
973 if self.multiple or ((type == self.QUERY) and (i in mergedTranscripts)):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
974 self.writeTranscript(transcripts[type][i])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
975 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
976 if type == self.QUERY and i in mergedTranscripts:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
977 newMergedTranscripts[len(newTranscripts[type])] = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
978 newTranscripts[type].append(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
979 transcripts = newTranscripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
980 mergedTranscripts = newMergedTranscripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
981
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
982 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
983 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
984
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
985 for type in (self.QUERY, self.REFERENCE):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
986 for i, transcript in enumerate(transcripts[type]):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
987 if transcripts == None: continue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
988 if self.multiple or ((type == self.QUERY) and (i in mergedTranscripts)):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
989 self.writeTranscript(transcripts[type][i])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
990
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
991 # Manage chromosomes with no corresponding data
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
992 if self.multiple:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
993 for type in self.INPUTTYPES:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
994 for chromosome in self.getTables(type):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
995 if chromosome in self.getTables(1 - type):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
996 continue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
997 for transcript in self.getTables(self.OUTPUT)[chromosome].getIterator():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
998 self.writeTranscript(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
999
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1000 self.flushData()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1001 if self.writer != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1002 self.writer.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1003 self.writer = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1004
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1005 if self.verbosity > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1006 print "query: %d sequences" % (self.nbTranscripts[self.QUERY])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1007 print "# merges: %d" % (nbMerges)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1008 print "# printed %d (%.2f%%)" % (self.nbPrinted, self.nbPrinted / float(self.nbTranscripts[self.QUERY]) * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1009
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1010
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1011 def compareTranscriptListSelfMerge(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1012 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1013 Merge the query list of transcript with itself
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1014 @return: the merged transcripts in a transcript list database
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1015 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1016 nbMerges = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1017 distance = self.maxDistance if self.maxDistance != None else 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1018
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1019 self.addIndexes([self.QUERY])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1020 self.storeTranscriptList(self.QUERY, self.inputTranscriptContainers[self.QUERY], True)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1021 self.flushData()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1022
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1023 # looping
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1024 for chromosome1 in sorted(self.getTables(self.QUERY).keys()):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1025 transcripts2 = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1026
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1027 # get range of transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1028 progress = Progress(self.getTables(self.QUERY)[chromosome1].getNbElements(), "Analyzing chromosome %s" % (chromosome1), self.verbosity-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1029 command = "SELECT * FROM %s ORDER BY start" % (self.getTables(self.QUERY)[chromosome1].getName())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1030 for index1, transcript1 in self.getTables(self.QUERY)[chromosome1].selectTranscripts(command):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1031
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1032 # compare sets
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1033 toBeRemoved = set()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1034 toBePrinted = set()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1035 for index2, transcript2 in enumerate(transcripts2):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1036
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1037 if self.compareTranscript(transcript1, transcript2, True):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1038 if transcript1.getDirection() != transcript2.getDirection():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1039 transcript2.setDirection(transcript1.getDirection())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1040 transcript1.merge(transcript2, self.normalization)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1041 toBeRemoved.add(index2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1042 nbMerges += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1043 elif transcript2.getEnd() + distance < transcript1.getStart():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1044 toBePrinted.add(index2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1045 transcripts2.append(transcript1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1046
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1047 for index2 in sorted(toBePrinted):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1048 self.writeTranscript(transcripts2[index2])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1049 transcripts2 = [transcripts2[index2] for index2 in range(len(transcripts2)) if index2 not in (toBeRemoved | toBePrinted)]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1050
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1051 for transcript2 in transcripts2:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1052 self.writeTranscript(transcript2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1053 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1054 self.getTables(self.QUERY)[chromosome1].remove()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1055
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1056 self.flushData()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1057 if self.writer != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1058 self.writer.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1059 self.writer = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1060
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1061 if self.verbosity > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1062 print "query: %d sequences" % (self.nbTranscripts[self.QUERY])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1063 print "# merges: %d" % (nbMerges)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1064 print "# printed %d (%.2f%%)" % (self.nbPrinted, self.nbPrinted / float(self.nbTranscripts[self.QUERY]) * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1065
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1066
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1067 def getDifferenceTranscriptList(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1068 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1069 Get the elements of the first list which do not overlap the second list (at the nucleotide level)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1070 @return: the transcripts that overlap with the reference set
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1071 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1072 distance = 0 if self.maxDistance == None else self.maxDistance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1073
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1074 self.addIndexes([self.QUERY, self.REFERENCE])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1075
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1076 # export the container into tables
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1077 self.storeTranscriptList(self.QUERY, self.inputTranscriptContainers[self.QUERY], True)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1078 self.storeTranscriptList(self.REFERENCE, self.inputTranscriptContainers[self.REFERENCE], True)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1079
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1080 # looping
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1081 for chromosome1 in sorted(self.getTables(self.QUERY).keys()):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1082 # get range of transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1083 command = "SELECT MIN(start), MAX(end), COUNT(id) FROM %s" % (self.getTables(self.QUERY)[chromosome1].getName())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1084 query = self.mySqlConnection.executeQuery(command)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1085 result = query.getLine()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1086 first = result[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1087 last = result[1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1088 nb = result[2]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1089
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1090 transcripts1 = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1091 transcripts2 = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1092 nbChunks = max(1, nb / 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1093 chunkSize = (last - first) / nbChunks
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1094 progress = Progress(nbChunks + 1, "Analyzing chromosome %s" % (chromosome1), self.verbosity-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1095 for chunk in range(nbChunks + 1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1096
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1097 # load transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1098 start = first + chunk * chunkSize
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1099 end = start + chunkSize - 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1100 command = "SELECT * FROM %s WHERE start BETWEEN %d AND %d" % (self.getTables(self.QUERY)[chromosome1].getName(), start, end-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1101 for index1, transcript1 in self.getTables(self.QUERY)[chromosome1].selectTranscripts(command):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1102 transcripts1.append(transcript1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1103 command = "DELETE FROM %s WHERE start < %d" % (self.getTables(self.QUERY)[chromosome1].getName(), end)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1104 self.mySqlConnection.executeQuery(command)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1105 if chromosome1 in self.getTables(self.REFERENCE):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1106 command = "SELECT * FROM %s WHERE start BETWEEN %d AND %d" % (self.getTables(self.REFERENCE)[chromosome1].getName(), start-distance, end+distance-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1107 if chunk == 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1108 command = "SELECT * FROM %s WHERE start < %d" % (self.getTables(self.REFERENCE)[chromosome1].getName(), end + distance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1109 for index2, transcript2 in self.getTables(self.REFERENCE)[chromosome1].selectTranscripts(command):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1110 transcripts2.append(transcript2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1111 command = "DELETE FROM %s WHERE start < %d" % (self.getTables(self.REFERENCE)[chromosome1].getName(), end + distance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1112 self.mySqlConnection.executeQuery(command)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1113
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1114 # compare sets
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1115 toBeRemoved1 = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1116 for index1, transcript1 in enumerate(transcripts1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1117 newTranscript1 = Transcript()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1118 newTranscript1.copy(transcript1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1119 for transcript2 in transcripts2:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1120 newTranscript1 = newTranscript1.getDifference(transcript2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1121 if newTranscript1 == None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1122 toBeRemoved1.append(index1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1123 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1124 transcripts1[index1] = newTranscript1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1125
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1126 # check if query transcript extends bounds of the chunk
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1127 if newTranscript1 != None and newTranscript1.getEnd() < end:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1128 if self.splitDifference:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1129 for exon in newTranscript1.getExons():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1130 transcript = Transcript()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1131 transcript.copy(exon)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1132 self.writeTranscript(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1133 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1134 self.writeTranscript(newTranscript1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1135 toBeRemoved1.append(index1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1136
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1137 # update list of query transcripts
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1138 for index1 in reversed(toBeRemoved1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1139 del transcripts1[index1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1140
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1141 # check if the reference transcripts extends bounds of the chunk
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1142 toBeRemoved2 = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1143 for index2, transcript2 in enumerate(transcripts2):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1144 if transcript2.getEnd() + distance < end:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1145 toBeRemoved2.append(index2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1146 for index2 in reversed(toBeRemoved2):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1147 del transcripts2[index2]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1148
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1149 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1150
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1151 for transcript1 in transcripts1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1152 if self.splitDifference:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1153 for exon in transcript1.getExons():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1154 transcript = Transcript()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1155 transcript.copy(exon)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1156 self.writeTranscript(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1157 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1158 self.writeTranscript(transcript1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1159 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1160 self.getTables(self.QUERY)[chromosome1].remove()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1161 if chromosome1 in self.getTables(self.REFERENCE):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1162 self.getTables(self.REFERENCE)[chromosome1].remove()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1163 self.getTables(self.WORKING)[chromosome1].remove()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1164
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1165 self.flushData()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1166 if self.writer != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1167 self.writer.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1168 self.writer = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1169
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1170 if self.verbosity > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1171 print "query: %d elements" % (self.nbTranscripts[self.QUERY])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1172 print "reference: %d elements" % (self.nbTranscripts[self.REFERENCE])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1173 print "# printed: %d (%.2f%%)" % (self.nbPrinted, self.nbPrinted / float(self.nbTranscripts[self.QUERY]) * 100)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1174
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1175
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1176 def getOddsPerTranscript(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1177 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1178 Return overlap results
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1179 @return a dict of data
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1180 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1181 if not self.odds:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1182 raise Exception("Did not compute odds!")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1183 return self.overlapResults
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1184
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1185
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1186 def getOdds(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1187 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1188 Return odds about the overlap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1189 @return a dict of data
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1190 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1191 if not self.odds:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1192 raise Exception("Did not compute odds!")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1193 if self.oddResults != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1194 return self.oddResults
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1195 self.oddResults = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1196 for name, value in self.overlapResults.iteritems():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1197 self.oddResults[value] = self.oddResults.get(value, 0) + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1198 return self.oddResults