comparison genetic_elements/aragorn/aragorn.py @ 17:2561c51e6605

aragorn addition
author jjkoehorst <jasperkoehorst@gmail.com>
date Sat, 21 Feb 2015 17:20:05 +0100
parents
children 9610ddbca991
comparison
equal deleted inserted replaced
16:74b8ba5e2d5b 17:2561c51e6605
1 def delete_galaxy():
2 import sys
3 for index, path in enumerate(sys.path):
4 if "galaxy-dist/" in path:
5 sys.path[index] = ''
6
7 #Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now.
8 delete_galaxy()
9
10 from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin
11
12 # Import RDFLib's default Graph implementation.
13 from rdflib.graph import Graph
14
15 import sys, os
16
17 import rdflib
18 import subprocess
19 import hashlib
20 global URI
21 global SubClassOfDict
22 SubClassOfDict = {}
23
24 URI = "http://csb.wur.nl/genome/"
25 global seeAlso
26 seeAlso = "rdfs:seeAlso"
27 global coreURI
28 coreURI = Namespace(URI)
29
30 def createClass(uri):
31 #genomeGraph.add((uri,RDF.type,OWL.Class))
32 #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing))
33 #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
34 #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
35 #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"]))
36 return uri
37
38 def tmp():
39 import time
40 global tmpFolder
41 tmpFolder = "/tmp/"+str(time.time())+"/"
42 os.mkdir(tmpFolder)
43
44 def query():
45 global genomeGraph
46 genomeGraph = Graph()
47 filename = sys.argv[1]
48 genomeGraph.parse(filename, format="turtle")
49 qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}')
50 sequences = []
51 for row in qres:
52 print ("Header:",row[0])
53 sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","")
54
55 return sequences
56
57 def aragorn(sequences):
58 for sequence in sequences:
59 #Call aragorn for each contig, for ease of parsing
60 open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence))
61 folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/"
62 cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output"
63 print (cmd)
64 os.system(cmd)
65 aragorn = open(tmpFolder+"aragorn.output").readlines()
66 # string = ''.join(aragorn)
67
68 contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","")
69 dnaobjectURI = coreURI[contig]
70 #print (contig)
71 for line in aragorn:
72 if ">" in line:
73 print (line.split())
74 try:
75 trna, pos = line.split()[1:]
76 except:
77 try:
78 trna, pos = line.split()
79 except:
80 if "(Permuted)" in line:
81 trna, permute, pos = line.split()[1:]
82
83 if "tRNA-" in line:
84 trna, codon = (trna.strip(">)").split("(",1))
85 else:
86 trna = trna.strip(">").strip() #Actually a tmRNA...
87 codon = ''
88 trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna
89 SubClassOfDict[trna.split("-")[0].title()] = 1
90 if "c" in pos[0]: #complementary
91 stop, start = pos.split("[")[1].split("]")[0].split(",")
92 else:
93 start, stop = pos.split("[")[1].split("]")[0].split(",")
94 trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop]
95 genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI))
96 genomeGraph.add((trnaURI, RDF.type,trnaClass))
97 genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer)))
98 genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer)))
99 genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna)))
100 genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon)))
101 genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn")))
102 genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36")))
103 genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1])))
104
105 def subClassOfBuilder():
106 for subclass in SubClassOfDict:
107 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
108 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
109 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
110 genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class))
111
112 def save():
113 #Create the subclass off instances
114 #subClassOfBuilder()
115 ## Saves the file
116 data = genomeGraph.serialize(format='turtle')
117 open(sys.argv[2],"wb").write(data)
118
119 def main():
120 tmp()
121 sequences = query()
122 aragorn(sequences)
123 save()
124
125 main()