annotate genetic_elements/aragorn/aragorn.py @ 25:f1255292e929

Aragorn addition
author jjkoehorst <jasperkoehorst@gmail.com>
date Sat, 21 Feb 2015 22:35:02 +0100
parents
children 9610ddbca991
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
25
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
1 def delete_galaxy():
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
2 import sys
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
3 for index, path in enumerate(sys.path):
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
4 if "galaxy-dist/" in path:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
5 sys.path[index] = ''
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
6
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
7 #Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now.
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
8 delete_galaxy()
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
9
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
10 from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
11
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
12 # Import RDFLib's default Graph implementation.
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
13 from rdflib.graph import Graph
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
14
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
15 import sys, os
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
16
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
17 import rdflib
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
18 import subprocess
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
19 import hashlib
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
20 global URI
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
21 global SubClassOfDict
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
22 SubClassOfDict = {}
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
23
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
24 URI = "http://csb.wur.nl/genome/"
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
25 global seeAlso
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
26 seeAlso = "rdfs:seeAlso"
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
27 global coreURI
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
28 coreURI = Namespace(URI)
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
29
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
30 def createClass(uri):
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
31 #genomeGraph.add((uri,RDF.type,OWL.Class))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
32 #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
33 #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
34 #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
35 #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"]))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
36 return uri
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
37
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
38 def tmp():
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
39 import time
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
40 global tmpFolder
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
41 tmpFolder = "/tmp/"+str(time.time())+"/"
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
42 os.mkdir(tmpFolder)
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
43
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
44 def query():
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
45 global genomeGraph
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
46 genomeGraph = Graph()
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
47 filename = sys.argv[1]
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
48 genomeGraph.parse(filename, format="turtle")
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
49 qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}')
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
50 sequences = []
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
51 for row in qres:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
52 print ("Header:",row[0])
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
53 sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","")
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
54
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
55 return sequences
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
56
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
57 def aragorn(sequences):
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
58 for sequence in sequences:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
59 #Call aragorn for each contig, for ease of parsing
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
60 open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
61 folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/"
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
62 cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output"
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
63 print (cmd)
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
64 os.system(cmd)
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
65 aragorn = open(tmpFolder+"aragorn.output").readlines()
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
66 # string = ''.join(aragorn)
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
67
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
68 contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","")
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
69 dnaobjectURI = coreURI[contig]
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
70 #print (contig)
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
71 for line in aragorn:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
72 if ">" in line:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
73 print (line.split())
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
74 try:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
75 trna, pos = line.split()[1:]
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
76 except:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
77 try:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
78 trna, pos = line.split()
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
79 except:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
80 if "(Permuted)" in line:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
81 trna, permute, pos = line.split()[1:]
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
82
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
83 if "tRNA-" in line:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
84 trna, codon = (trna.strip(">)").split("(",1))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
85 else:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
86 trna = trna.strip(">").strip() #Actually a tmRNA...
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
87 codon = ''
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
88 trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
89 SubClassOfDict[trna.split("-")[0].title()] = 1
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
90 if "c" in pos[0]: #complementary
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
91 stop, start = pos.split("[")[1].split("]")[0].split(",")
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
92 else:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
93 start, stop = pos.split("[")[1].split("]")[0].split(",")
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
94 trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop]
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
95 genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
96 genomeGraph.add((trnaURI, RDF.type,trnaClass))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
97 genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer)))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
98 genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer)))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
99 genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna)))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
100 genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon)))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
101 genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn")))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
102 genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36")))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
103 genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1])))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
104
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
105 def subClassOfBuilder():
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
106 for subclass in SubClassOfDict:
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
107 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
108 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
109 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
110 genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class))
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
111
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
112 def save():
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
113 #Create the subclass off instances
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
114 #subClassOfBuilder()
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
115 ## Saves the file
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
116 data = genomeGraph.serialize(format='turtle')
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
117 open(sys.argv[2],"wb").write(data)
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
118
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
119 def main():
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
120 tmp()
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
121 sequences = query()
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
122 aragorn(sequences)
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
123 save()
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
124
f1255292e929 Aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
125 main()