diff gbk2rdf/gbktordf.py @ 6:ec73c34af97b

FASTA2RDF
author jjkoehorst <jasperkoehorst@gmail.com>
date Sat, 21 Feb 2015 15:19:42 +0100
parents db04e12b8779
children
line wrap: on
line diff
--- a/gbk2rdf/gbktordf.py	Sat Feb 21 13:49:11 2015 +0100
+++ b/gbk2rdf/gbktordf.py	Sat Feb 21 15:19:42 2015 +0100
@@ -108,8 +108,12 @@
 					gi = record.annotations["gi"]
 					typ = str(gi)
 				except:
-					scaf_value += 1
-					typ = "scaffold_"+str(scaf_value)
+					try:
+						gi = record.annotations["accessions"][0]
+						typ = str(gi)
+					except:
+						scaf_value += 1
+						typ = "scaffold_"+str(scaf_value)
 				genomeURI = coreURI[genome]
 				gbkURI = coreURI[genome + "/" + typ]
 				#To contig connection to connect all data to it
@@ -148,8 +152,7 @@
 								int_add(gbkURI,coreURI[annot.lower()],str(a))
 					else:
 						int_add(gbkURI,coreURI[annot.lower()],str(record.annotations[annot]))
-
-
+						
 				#####END of RECORD####
 				if len(sequence) > 0:
 					genomeGraph.add((gbkURI, coreURI["sequence"] ,  Literal(sequence)))
@@ -167,13 +170,6 @@
 
 				if strand == 'None':
 					strand = 0
-
-# 				if feature_type == "gene":
-# 					gene = feature
-					#Store gene in next feature....
-# 					gene_location_start = end = str(gene.location.end).replace(">","").replace("<","")
-# 					gene_location_stop = str(gene.location.start).replace(">","").replace("<","")
-# 					gene_qualifiers = gene.qualifiers	
 				else:
 					if feature.type == "misc_feature": #Store as part of previous cds or something...
 						if strand == "-1":
@@ -181,8 +177,6 @@
 						else:
 							miscURI = coreURI[genome + "/" + typ + "/"+feature_type+"/gbk/"+str(start)+"_"+str(end)]
 						
-						# genomeGraph.add((generalURI,coreURI["subFeature"],miscURI))
-
 						# TODO: Check if biopython has an overlap function...
 						if int(prevObjStart) <= int(start):
 							if int(end) <= int(prevObjStop):
@@ -201,15 +195,12 @@
 						prevObjStart = start
 						prevObjStop = end
 						
-						
 						if strand == "-1":
 							typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(end)+"_"+str(start)]
 						else:
 							typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(start)+"_"+str(end)]
 
-# 						cds_sequence = str(feature.extract(sequence))
-						#Contig specific connection
-						
+						#Contig specific connection						
 						genomeGraph.add((gbkURI, coreURI["feature"] , typeURI))
 						############################
 
@@ -228,6 +219,7 @@
 							genomeGraph.add((typeURI, coreURI["feature"] , subURI))
 							store_general_information(subURI,subfeature,record,feature)
 
+
 def store_general_information(generalURI,feature,record,superfeature=""):
 	proteinClass = createClass(coreURI["Protein"], root=True)
 	sequence = str(record.seq)
@@ -277,8 +269,6 @@
 			#And subfeature variable will contain the superfeature
 			if superfeature:
 				codon = superfeature.qualifiers["transl_table"][0]
-# 			else:
-# 				codon = subfeature.qualifiers["transl_table"][0]
 		except:
 			#Default codon table 11
 			codon = "11"
@@ -356,7 +346,6 @@
 		genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
 		genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
 		genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
-		genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
 		genomeGraph.add((coreURI[subclass],RDF.type,OWL.Class))
 
 def main():