comparison smart_toolShed/SMART/Java/Python/structure/TranscriptContainer.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e0f8dcca02ed
1 #
2 # Copyright INRA-URGI 2009-2010
3 #
4 # This software is governed by the CeCILL license under French law and
5 # abiding by the rules of distribution of free software. You can use,
6 # modify and/ or redistribute the software under the terms of the CeCILL
7 # license as circulated by CEA, CNRS and INRIA at the following URL
8 # "http://www.cecill.info".
9 #
10 # As a counterpart to the access to the source code and rights to copy,
11 # modify and redistribute granted by the license, users are provided only
12 # with a limited warranty and the software's author, the holder of the
13 # economic rights, and the successive licensors have only limited
14 # liability.
15 #
16 # In this respect, the user's attention is drawn to the risks associated
17 # with loading, using, modifying and/or developing or reproducing the
18 # software by the user in light of its specific status of free software,
19 # that may mean that it is complicated to manipulate, and that also
20 # therefore means that it is reserved for developers and experienced
21 # professionals having in-depth computer knowledge. Users are therefore
22 # encouraged to load and test the software's suitability as regards their
23 # requirements in conditions enabling the security of their systems and/or
24 # data to be ensured and, more generally, to use and operate it in the
25 # same conditions as regards security.
26 #
27 # The fact that you are presently reading this means that you have had
28 # knowledge of the CeCILL license and that you accept its terms.
29 #
30 import re
31 import sys
32 from commons.core.parsing.ParserChooser import ParserChooser
33 from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable
34 from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
35
36 class TranscriptContainer(object):
37 """
38 An interface class that contains a list of transcripts, handle different formats
39 @ivar container: container of the data
40 @type container: string
41 @ivar format: format of the data
42 @type format: string
43 @ivar transcriptListParser: possibly contains a parser to a list of transcripts
44 @type transcriptListParser: L{TranscriptListParser<TranscriptListParser>} or None
45 @ivar mappingListParser: possibly contains a parser to a list of mappings
46 @type mappingListParser: L{MapperParser<MapperParser>} or None
47 @ivar transcriptTables: possibly contains the mySQL tables
48 @type transcriptTables: dict of L{MySqlTranscriptTable<MySqlTranscriptTable>} or None
49 @ivar mySqlConnection: connection to a MySQL database
50 @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}
51 @ivar type: type of the data (transcripts, mappings or mySQL)
52 @type type: string
53 @ivar verbosity: verbosity
54 @type verbosity: int
55 """
56
57 def __init__(self, container, format, verbosity = 0):
58 """
59 Constructor
60 @param container: container of the data
61 @type container: string
62 @param format: format of the data
63 @type format: string
64 @param verbosity: verbosity
65 @type verbosity: int
66 """
67 self.container = container
68 self.format = format
69 self.verbosity = verbosity
70 self.transcriptListParser = None
71 self.mappingListParser = None
72 self.transcriptTables = {}
73 self.mySqlConnection = None
74 self.foundData = False
75 self.nbTranscripts = None
76 self.nbNucleotides = None
77 self.chromosomes = None
78 self.type = None
79 if self.container == None:
80 sys.exit("Error! Container input file name is empty!")
81 if self.format == None:
82 sys.exit("Error! Container input format is empty!")
83
84
85 def findData(self):
86 """
87 Load data
88 """
89 if self.format == None:
90 sys.exit("Error! Format is not specified!")
91 if self.format == "sql":
92 self.transcriptTables = {}
93 self.chromosomes = []
94 self.nbTranscripts = 0
95 self.nbNucleotides = 0
96 self.type = "sql"
97 query = self.mySqlConnection.executeQuery("SELECT name FROM sqlite_master WHERE type LIKE 'table' AND name LIKE '%s_%%_transcripts'" % (self.container))
98 for line in query.getIterator():
99 tableName = line[0]
100 m = re.search(r"^(\S*)_transcripts$", tableName[len(self.container)+1:])
101 if m == None:
102 sys.exit("Table '%s' has a strange name" % (tableName))
103 chromosome = m.group(1)
104 self.transcriptTables[chromosome] = MySqlTranscriptTable(self.mySqlConnection, self.container, chromosome, self.verbosity)
105 self.chromosomes.append(chromosome)
106 for transcript in self.transcriptTables[chromosome].getIterator():
107 self.nbTranscripts += 1
108 self.nbNucleotides += transcript.getSize()
109 if self.type == None:
110 parserChooser = ParserChooser(self.verbosity)
111 parserChooser.findFormat(self.format)
112 self.type = parserChooser.getType()
113 if self.type == "transcript":
114 self.transcriptListParser = parserChooser.getParser(self.container)
115 elif self.type == "mapping":
116 self.mappingListParser = parserChooser.getParser(self.container)
117 else:
118 sys.exit("Error! Cannot handle format '%s'!" % (self.format))
119 if self.type == None:
120 sys.exit("Error! Cannot handle format '%s'!" % (self.format))
121
122 if self.transcriptListParser != None:
123 if self.type == "transcript":
124 self.nbTranscripts = self.transcriptListParser.getNbTranscripts()
125 self.nbNucleotides = self.transcriptListParser.getNbNucleotides()
126 self.chromosomes = self.transcriptListParser.getChromosomes()
127 if self.mappingListParser != None:
128 if self.type == "mapping":
129 self.nbTranscripts = self.mappingListParser.getNbMappings()
130 self.nbNucleotides = self.mappingListParser.getNbNucleotides()
131 self.chromosomes = self.mappingListParser.getChromosomes()
132
133 self.foundData = True
134
135
136 def getNbTranscripts(self):
137 """
138 Get the number of transcripts
139 @return: the number of transcripts
140 """
141 if not self.foundData:
142 self.findData()
143 return self.nbTranscripts
144
145
146 def getNbItems(self):
147 """
148 Same as getNbTranscripts
149 """
150 return self.getNbTranscripts()
151
152
153 def getNbNucleotides(self):
154 """
155 Get the number of nucleotides
156 @return: the number of nucleotides
157 """
158 if not self.foundData:
159 self.findData()
160 return self.nbNucleotides
161
162
163 def getChromosomes(self):
164 """
165 Get the chromosomes
166 @return: the chromosomes
167 """
168 if not self.foundData:
169 self.findData()
170 return self.chromosomes
171
172
173 def getIterator(self):
174 """
175 An iterator
176 @return: an iterator to a list of transcripts
177 """
178 if not self.foundData:
179 self.findData()
180 if self.type == "sql":
181 for chromosome in self.transcriptTables:
182 for transcript in self.transcriptTables[chromosome].getIterator():
183 yield transcript
184 return
185 if self.type == "transcript":
186 for transcript in self.transcriptListParser.getIterator():
187 yield transcript
188 return
189 if self.type == "mapping":
190 for mapping in self.mappingListParser.getIterator():
191 yield mapping.getTranscript()
192 return
193 sys.exit("Error! No valid transcript container given!")
194
195
196 def storeIntoDatabase(self, name = None):
197 """
198 Store the current transcript / mapping list into database
199 """
200 if not self.foundData:
201 self.findData()
202
203 if (self.transcriptListParser == None and self.mappingListParser == None) or len(self.transcriptTables.keys()) != 0:
204 return
205
206 mySqlTranscriptWriter = MySqlTranscriptWriter(self.mySqlConnection, name, self.verbosity)
207 mySqlTranscriptWriter.addTranscriptList(self.transcriptListParser if self.transcriptListParser else self.mappingListParser)
208 mySqlTranscriptWriter.write()
209 self.transcriptTables = mySqlTranscriptWriter.getTables()
210 self.type = "sql"
211
212
213 def getTables(self):
214 """
215 Accessor to the mySQL tables
216 @return: the mySQL tables
217 """
218 return self.transcriptTables
219
220
221 def setDefaultTagValue(self, name, value):
222 """
223 Set the given tag to the value for all transcripts
224 @param name: name of the tag
225 @type name: string
226 @param value: value of the tag
227 @type value: string
228 """
229 if self.type == "sql":
230 for chromosome in self.transcriptTables:
231 self.transcriptTables[chromosome].setDefaultTagValue(name, value)
232 elif self.type == "transcript":
233 self.transcriptListParser.setDefaultTagValue(name, value)
234 elif self.type == "mapping":
235 self.mappingListParser.setDefaultTagValue(name, value)
236