Mercurial > repos > jjkoehorst > sapp
diff gbk2rdf/gbktordf.py @ 6:ec73c34af97b
FASTA2RDF
author | jjkoehorst <jasperkoehorst@gmail.com> |
---|---|
date | Sat, 21 Feb 2015 15:19:42 +0100 |
parents | db04e12b8779 |
children |
line wrap: on
line diff
--- a/gbk2rdf/gbktordf.py Sat Feb 21 13:49:11 2015 +0100 +++ b/gbk2rdf/gbktordf.py Sat Feb 21 15:19:42 2015 +0100 @@ -108,8 +108,12 @@ gi = record.annotations["gi"] typ = str(gi) except: - scaf_value += 1 - typ = "scaffold_"+str(scaf_value) + try: + gi = record.annotations["accessions"][0] + typ = str(gi) + except: + scaf_value += 1 + typ = "scaffold_"+str(scaf_value) genomeURI = coreURI[genome] gbkURI = coreURI[genome + "/" + typ] #To contig connection to connect all data to it @@ -148,8 +152,7 @@ int_add(gbkURI,coreURI[annot.lower()],str(a)) else: int_add(gbkURI,coreURI[annot.lower()],str(record.annotations[annot])) - - + #####END of RECORD#### if len(sequence) > 0: genomeGraph.add((gbkURI, coreURI["sequence"] , Literal(sequence))) @@ -167,13 +170,6 @@ if strand == 'None': strand = 0 - -# if feature_type == "gene": -# gene = feature - #Store gene in next feature.... -# gene_location_start = end = str(gene.location.end).replace(">","").replace("<","") -# gene_location_stop = str(gene.location.start).replace(">","").replace("<","") -# gene_qualifiers = gene.qualifiers else: if feature.type == "misc_feature": #Store as part of previous cds or something... if strand == "-1": @@ -181,8 +177,6 @@ else: miscURI = coreURI[genome + "/" + typ + "/"+feature_type+"/gbk/"+str(start)+"_"+str(end)] - # genomeGraph.add((generalURI,coreURI["subFeature"],miscURI)) - # TODO: Check if biopython has an overlap function... if int(prevObjStart) <= int(start): if int(end) <= int(prevObjStop): @@ -201,15 +195,12 @@ prevObjStart = start prevObjStop = end - if strand == "-1": typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(end)+"_"+str(start)] else: typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(start)+"_"+str(end)] -# cds_sequence = str(feature.extract(sequence)) - #Contig specific connection - + #Contig specific connection genomeGraph.add((gbkURI, coreURI["feature"] , typeURI)) ############################ @@ -228,6 +219,7 @@ genomeGraph.add((typeURI, coreURI["feature"] , subURI)) store_general_information(subURI,subfeature,record,feature) + def store_general_information(generalURI,feature,record,superfeature=""): proteinClass = createClass(coreURI["Protein"], root=True) sequence = str(record.seq) @@ -277,8 +269,6 @@ #And subfeature variable will contain the superfeature if superfeature: codon = superfeature.qualifiers["transl_table"][0] -# else: -# codon = subfeature.qualifiers["transl_table"][0] except: #Default codon table 11 codon = "11" @@ -356,7 +346,6 @@ genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"])) - genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"])) genomeGraph.add((coreURI[subclass],RDF.type,OWL.Class)) def main():