Mercurial > repos > yufei-luo > s_mart
comparison SMART/Java/Python/structure/TranscriptContainer.py @ 36:44d5973c188c
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 15:02:29 -0400 |
parents | 769e306b7933 |
children |
comparison
equal
deleted
inserted
replaced
35:d94018ca4ada | 36:44d5973c188c |
---|---|
1 # | |
2 # Copyright INRA-URGI 2009-2010 | |
3 # | |
4 # This software is governed by the CeCILL license under French law and | |
5 # abiding by the rules of distribution of free software. You can use, | |
6 # modify and/ or redistribute the software under the terms of the CeCILL | |
7 # license as circulated by CEA, CNRS and INRIA at the following URL | |
8 # "http://www.cecill.info". | |
9 # | |
10 # As a counterpart to the access to the source code and rights to copy, | |
11 # modify and redistribute granted by the license, users are provided only | |
12 # with a limited warranty and the software's author, the holder of the | |
13 # economic rights, and the successive licensors have only limited | |
14 # liability. | |
15 # | |
16 # In this respect, the user's attention is drawn to the risks associated | |
17 # with loading, using, modifying and/or developing or reproducing the | |
18 # software by the user in light of its specific status of free software, | |
19 # that may mean that it is complicated to manipulate, and that also | |
20 # therefore means that it is reserved for developers and experienced | |
21 # professionals having in-depth computer knowledge. Users are therefore | |
22 # encouraged to load and test the software's suitability as regards their | |
23 # requirements in conditions enabling the security of their systems and/or | |
24 # data to be ensured and, more generally, to use and operate it in the | |
25 # same conditions as regards security. | |
26 # | |
27 # The fact that you are presently reading this means that you have had | |
28 # knowledge of the CeCILL license and that you accept its terms. | |
29 # | |
30 import re | |
31 import sys | |
32 from commons.core.parsing.ParserChooser import ParserChooser | |
33 from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable | |
34 from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter | |
35 | |
36 class TranscriptContainer(object): | |
37 """ | |
38 An interface class that contains a list of transcripts, handle different formats | |
39 @ivar container: container of the data | |
40 @type container: string | |
41 @ivar format: format of the data | |
42 @type format: string | |
43 @ivar transcriptListParser: possibly contains a parser to a list of transcripts | |
44 @type transcriptListParser: L{TranscriptListParser<TranscriptListParser>} or None | |
45 @ivar mappingListParser: possibly contains a parser to a list of mappings | |
46 @type mappingListParser: L{MapperParser<MapperParser>} or None | |
47 @ivar transcriptTables: possibly contains the mySQL tables | |
48 @type transcriptTables: dict of L{MySqlTranscriptTable<MySqlTranscriptTable>} or None | |
49 @ivar mySqlConnection: connection to a MySQL database | |
50 @type mySqlConnection: class L{MySqlConnection<MySqlConnection>} | |
51 @ivar type: type of the data (transcripts, mappings or mySQL) | |
52 @type type: string | |
53 @ivar verbosity: verbosity | |
54 @type verbosity: int | |
55 """ | |
56 | |
57 def __init__(self, container, format, verbosity = 0): | |
58 """ | |
59 Constructor | |
60 @param container: container of the data | |
61 @type container: string | |
62 @param format: format of the data | |
63 @type format: string | |
64 @param verbosity: verbosity | |
65 @type verbosity: int | |
66 """ | |
67 self.container = container | |
68 self.format = format | |
69 self.verbosity = verbosity | |
70 self.transcriptListParser = None | |
71 self.mappingListParser = None | |
72 self.transcriptTables = {} | |
73 self.mySqlConnection = None | |
74 self.foundData = False | |
75 self.nbTranscripts = None | |
76 self.nbNucleotides = None | |
77 self.chromosomes = None | |
78 self.type = None | |
79 if self.container == None: | |
80 sys.exit("Error! Container input file name is empty!") | |
81 if self.format == None: | |
82 sys.exit("Error! Container input format is empty!") | |
83 | |
84 | |
85 def findData(self): | |
86 """ | |
87 Load data | |
88 """ | |
89 if self.format == None: | |
90 sys.exit("Error! Format is not specified!") | |
91 if self.format == "sql": | |
92 self.transcriptTables = {} | |
93 self.chromosomes = [] | |
94 self.nbTranscripts = 0 | |
95 self.nbNucleotides = 0 | |
96 self.type = "sql" | |
97 query = self.mySqlConnection.executeQuery("SELECT name FROM sqlite_master WHERE type LIKE 'table' AND name LIKE '%s_%%_transcripts'" % (self.container)) | |
98 for line in query.getIterator(): | |
99 tableName = line[0] | |
100 m = re.search(r"^(\S*)_transcripts$", tableName[len(self.container)+1:]) | |
101 if m == None: | |
102 sys.exit("Table '%s' has a strange name" % (tableName)) | |
103 chromosome = m.group(1) | |
104 self.transcriptTables[chromosome] = MySqlTranscriptTable(self.mySqlConnection, self.container, chromosome, self.verbosity) | |
105 self.chromosomes.append(chromosome) | |
106 for transcript in self.transcriptTables[chromosome].getIterator(): | |
107 self.nbTranscripts += 1 | |
108 self.nbNucleotides += transcript.getSize() | |
109 if self.type == None: | |
110 parserChooser = ParserChooser(self.verbosity) | |
111 parserChooser.findFormat(self.format) | |
112 self.type = parserChooser.getType() | |
113 if self.type == "transcript": | |
114 self.transcriptListParser = parserChooser.getParser(self.container) | |
115 elif self.type == "mapping": | |
116 self.mappingListParser = parserChooser.getParser(self.container) | |
117 else: | |
118 sys.exit("Error! Cannot handle format '%s'!" % (self.format)) | |
119 if self.type == None: | |
120 sys.exit("Error! Cannot handle format '%s'!" % (self.format)) | |
121 | |
122 if self.transcriptListParser != None: | |
123 if self.type == "transcript": | |
124 self.nbTranscripts = self.transcriptListParser.getNbTranscripts() | |
125 self.nbNucleotides = self.transcriptListParser.getNbNucleotides() | |
126 self.chromosomes = self.transcriptListParser.getChromosomes() | |
127 if self.mappingListParser != None: | |
128 if self.type == "mapping": | |
129 self.nbTranscripts = self.mappingListParser.getNbMappings() | |
130 self.nbNucleotides = self.mappingListParser.getNbNucleotides() | |
131 self.chromosomes = self.mappingListParser.getChromosomes() | |
132 | |
133 self.foundData = True | |
134 | |
135 | |
136 def getNbTranscripts(self): | |
137 """ | |
138 Get the number of transcripts | |
139 @return: the number of transcripts | |
140 """ | |
141 if not self.foundData: | |
142 self.findData() | |
143 return self.nbTranscripts | |
144 | |
145 | |
146 def getNbItems(self): | |
147 """ | |
148 Same as getNbTranscripts | |
149 """ | |
150 return self.getNbTranscripts() | |
151 | |
152 | |
153 def getNbNucleotides(self): | |
154 """ | |
155 Get the number of nucleotides | |
156 @return: the number of nucleotides | |
157 """ | |
158 if not self.foundData: | |
159 self.findData() | |
160 return self.nbNucleotides | |
161 | |
162 | |
163 def getChromosomes(self): | |
164 """ | |
165 Get the chromosomes | |
166 @return: the chromosomes | |
167 """ | |
168 if not self.foundData: | |
169 self.findData() | |
170 return self.chromosomes | |
171 | |
172 | |
173 def getIterator(self): | |
174 """ | |
175 An iterator | |
176 @return: an iterator to a list of transcripts | |
177 """ | |
178 if not self.foundData: | |
179 self.findData() | |
180 if self.type == "sql": | |
181 for chromosome in self.transcriptTables: | |
182 for transcript in self.transcriptTables[chromosome].getIterator(): | |
183 yield transcript | |
184 return | |
185 if self.type == "transcript": | |
186 for transcript in self.transcriptListParser.getIterator(): | |
187 yield transcript | |
188 return | |
189 if self.type == "mapping": | |
190 for mapping in self.mappingListParser.getIterator(): | |
191 yield mapping.getTranscript() | |
192 return | |
193 sys.exit("Error! No valid transcript container given!") | |
194 | |
195 | |
196 def storeIntoDatabase(self, name = None): | |
197 """ | |
198 Store the current transcript / mapping list into database | |
199 """ | |
200 if not self.foundData: | |
201 self.findData() | |
202 | |
203 if (self.transcriptListParser == None and self.mappingListParser == None) or len(self.transcriptTables.keys()) != 0: | |
204 return | |
205 | |
206 mySqlTranscriptWriter = MySqlTranscriptWriter(self.mySqlConnection, name, self.verbosity) | |
207 mySqlTranscriptWriter.addTranscriptList(self.transcriptListParser if self.transcriptListParser else self.mappingListParser) | |
208 mySqlTranscriptWriter.write() | |
209 self.transcriptTables = mySqlTranscriptWriter.getTables() | |
210 self.type = "sql" | |
211 | |
212 | |
213 def getTables(self): | |
214 """ | |
215 Accessor to the mySQL tables | |
216 @return: the mySQL tables | |
217 """ | |
218 return self.transcriptTables | |
219 | |
220 | |
221 def setDefaultTagValue(self, name, value): | |
222 """ | |
223 Set the given tag to the value for all transcripts | |
224 @param name: name of the tag | |
225 @type name: string | |
226 @param value: value of the tag | |
227 @type value: string | |
228 """ | |
229 if self.type == "sql": | |
230 for chromosome in self.transcriptTables: | |
231 self.transcriptTables[chromosome].setDefaultTagValue(name, value) | |
232 elif self.type == "transcript": | |
233 self.transcriptListParser.setDefaultTagValue(name, value) | |
234 elif self.type == "mapping": | |
235 self.mappingListParser.setDefaultTagValue(name, value) | |
236 |