Mercurial > repos > jjkoehorst > sapp
comparison gbk2rdf/gbktordf.py @ 7:c79025539d9b
FASTA to RDF
author | jjkoehorst <jasperkoehorst@gmail.com> |
---|---|
date | Sat, 21 Feb 2015 15:23:15 +0100 |
parents | ec73c34af97b |
children |
comparison
equal
deleted
inserted
replaced
5:e159dbecdad6 | 7:c79025539d9b |
---|---|
106 | 106 |
107 try: | 107 try: |
108 gi = record.annotations["gi"] | 108 gi = record.annotations["gi"] |
109 typ = str(gi) | 109 typ = str(gi) |
110 except: | 110 except: |
111 scaf_value += 1 | 111 try: |
112 typ = "scaffold_"+str(scaf_value) | 112 gi = record.annotations["accessions"][0] |
113 typ = str(gi) | |
114 except: | |
115 scaf_value += 1 | |
116 typ = "scaffold_"+str(scaf_value) | |
113 genomeURI = coreURI[genome] | 117 genomeURI = coreURI[genome] |
114 gbkURI = coreURI[genome + "/" + typ] | 118 gbkURI = coreURI[genome + "/" + typ] |
115 #To contig connection to connect all data to it | 119 #To contig connection to connect all data to it |
116 genomeGraph.add((genomeURI, coreURI["dnaobject"] , gbkURI)) | 120 genomeGraph.add((genomeURI, coreURI["dnaobject"] , gbkURI)) |
117 | 121 |
146 else: | 150 else: |
147 for a in record.annotations[annot]: | 151 for a in record.annotations[annot]: |
148 int_add(gbkURI,coreURI[annot.lower()],str(a)) | 152 int_add(gbkURI,coreURI[annot.lower()],str(a)) |
149 else: | 153 else: |
150 int_add(gbkURI,coreURI[annot.lower()],str(record.annotations[annot])) | 154 int_add(gbkURI,coreURI[annot.lower()],str(record.annotations[annot])) |
151 | 155 |
152 | |
153 #####END of RECORD#### | 156 #####END of RECORD#### |
154 if len(sequence) > 0: | 157 if len(sequence) > 0: |
155 genomeGraph.add((gbkURI, coreURI["sequence"] , Literal(sequence))) | 158 genomeGraph.add((gbkURI, coreURI["sequence"] , Literal(sequence))) |
156 genomeGraph.add((genomeURI, RDF.type,genomeClass)) | 159 genomeGraph.add((genomeURI, RDF.type,genomeClass)) |
157 genomeGraph.add((gbkURI, RDF.type,typeClass)) | 160 genomeGraph.add((gbkURI, RDF.type,typeClass)) |
165 | 168 |
166 strand = str(feature.location.strand) | 169 strand = str(feature.location.strand) |
167 | 170 |
168 if strand == 'None': | 171 if strand == 'None': |
169 strand = 0 | 172 strand = 0 |
170 | |
171 # if feature_type == "gene": | |
172 # gene = feature | |
173 #Store gene in next feature.... | |
174 # gene_location_start = end = str(gene.location.end).replace(">","").replace("<","") | |
175 # gene_location_stop = str(gene.location.start).replace(">","").replace("<","") | |
176 # gene_qualifiers = gene.qualifiers | |
177 else: | 173 else: |
178 if feature.type == "misc_feature": #Store as part of previous cds or something... | 174 if feature.type == "misc_feature": #Store as part of previous cds or something... |
179 if strand == "-1": | 175 if strand == "-1": |
180 miscURI = coreURI[genome + "/" + typ + "/"+feature_type+"/gbk/"+str(end)+"_"+str(start)] | 176 miscURI = coreURI[genome + "/" + typ + "/"+feature_type+"/gbk/"+str(end)+"_"+str(start)] |
181 else: | 177 else: |
182 miscURI = coreURI[genome + "/" + typ + "/"+feature_type+"/gbk/"+str(start)+"_"+str(end)] | 178 miscURI = coreURI[genome + "/" + typ + "/"+feature_type+"/gbk/"+str(start)+"_"+str(end)] |
183 | 179 |
184 # genomeGraph.add((generalURI,coreURI["subFeature"],miscURI)) | |
185 | |
186 # TODO: Check if biopython has an overlap function... | 180 # TODO: Check if biopython has an overlap function... |
187 if int(prevObjStart) <= int(start): | 181 if int(prevObjStart) <= int(start): |
188 if int(end) <= int(prevObjStop): | 182 if int(end) <= int(prevObjStop): |
189 pass | 183 pass |
190 # genomeGraph.add((typeURI,coreURI["feature"],miscURI)) | 184 # genomeGraph.add((typeURI,coreURI["feature"],miscURI)) |
199 store_general_information(miscURI,feature,record) | 193 store_general_information(miscURI,feature,record) |
200 else: | 194 else: |
201 prevObjStart = start | 195 prevObjStart = start |
202 prevObjStop = end | 196 prevObjStop = end |
203 | 197 |
204 | |
205 if strand == "-1": | 198 if strand == "-1": |
206 typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(end)+"_"+str(start)] | 199 typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(end)+"_"+str(start)] |
207 else: | 200 else: |
208 typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(start)+"_"+str(end)] | 201 typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(start)+"_"+str(end)] |
209 | 202 |
210 # cds_sequence = str(feature.extract(sequence)) | 203 #Contig specific connection |
211 #Contig specific connection | |
212 | |
213 genomeGraph.add((gbkURI, coreURI["feature"] , typeURI)) | 204 genomeGraph.add((gbkURI, coreURI["feature"] , typeURI)) |
214 ############################ | 205 ############################ |
215 | 206 |
216 store_general_information(typeURI,feature,record) | 207 store_general_information(typeURI,feature,record) |
217 | 208 |
225 subURI = coreURI[genome + "/" + typ + "/" + subfeature_type+"/gbk/"+str(end)+"_"+str(start)] | 216 subURI = coreURI[genome + "/" + typ + "/" + subfeature_type+"/gbk/"+str(end)+"_"+str(start)] |
226 else: | 217 else: |
227 subURI = coreURI[genome + "/" + typ + "/" + subfeature_type+"/gbk/"+str(start)+"_"+str(end)] | 218 subURI = coreURI[genome + "/" + typ + "/" + subfeature_type+"/gbk/"+str(start)+"_"+str(end)] |
228 genomeGraph.add((typeURI, coreURI["feature"] , subURI)) | 219 genomeGraph.add((typeURI, coreURI["feature"] , subURI)) |
229 store_general_information(subURI,subfeature,record,feature) | 220 store_general_information(subURI,subfeature,record,feature) |
221 | |
230 | 222 |
231 def store_general_information(generalURI,feature,record,superfeature=""): | 223 def store_general_information(generalURI,feature,record,superfeature=""): |
232 proteinClass = createClass(coreURI["Protein"], root=True) | 224 proteinClass = createClass(coreURI["Protein"], root=True) |
233 sequence = str(record.seq) | 225 sequence = str(record.seq) |
234 cds_sequence = str(feature.extract(sequence)) | 226 cds_sequence = str(feature.extract(sequence)) |
275 #Feature is normally submitted to this function | 267 #Feature is normally submitted to this function |
276 #IF a subfeature is submitted it is submitted as a feature | 268 #IF a subfeature is submitted it is submitted as a feature |
277 #And subfeature variable will contain the superfeature | 269 #And subfeature variable will contain the superfeature |
278 if superfeature: | 270 if superfeature: |
279 codon = superfeature.qualifiers["transl_table"][0] | 271 codon = superfeature.qualifiers["transl_table"][0] |
280 # else: | |
281 # codon = subfeature.qualifiers["transl_table"][0] | |
282 except: | 272 except: |
283 #Default codon table 11 | 273 #Default codon table 11 |
284 codon = "11" | 274 codon = "11" |
285 #Protein linkage | 275 #Protein linkage |
286 translation = "" | 276 translation = "" |
354 def subClassOfBuilderRna(): | 344 def subClassOfBuilderRna(): |
355 for subclass in SubClassOfDictRna: | 345 for subclass in SubClassOfDictRna: |
356 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) | 346 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) |
357 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) | 347 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) |
358 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"])) | 348 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"])) |
359 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"])) | |
360 genomeGraph.add((coreURI[subclass],RDF.type,OWL.Class)) | 349 genomeGraph.add((coreURI[subclass],RDF.type,OWL.Class)) |
361 | 350 |
362 def main(): | 351 def main(): |
363 tmp() | 352 tmp() |
364 gbk_parser() | 353 gbk_parser() |