annotate genetic_elements/aragorn/aragorn.py @ 27:875035bbe366

no message
author Jasper Koehorst <jasperkoehorst@gmail.com>
date Wed, 25 Feb 2015 08:16:43 +0100
parents 9610ddbca991
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
24
jjkoehorst <jasperkoehorst@gmail.com>
parents: 17
diff changeset
1
17
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
2 def delete_galaxy():
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
3 import sys
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
4 for index, path in enumerate(sys.path):
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
5 if "galaxy-dist/" in path:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
6 sys.path[index] = ''
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
7
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
8 #Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now.
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
9 delete_galaxy()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
10
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
11 from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
12
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
13 # Import RDFLib's default Graph implementation.
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
14 from rdflib.graph import Graph
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
15
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
16 import sys, os
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
17
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
18 import rdflib
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
19 import subprocess
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
20 import hashlib
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
21 global URI
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
22 global SubClassOfDict
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
23 SubClassOfDict = {}
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
24
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
25 URI = "http://csb.wur.nl/genome/"
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
26 global seeAlso
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
27 seeAlso = "rdfs:seeAlso"
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
28 global coreURI
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
29 coreURI = Namespace(URI)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
30
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
31 def createClass(uri):
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
32 #genomeGraph.add((uri,RDF.type,OWL.Class))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
33 #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
34 #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
35 #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
36 #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"]))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
37 return uri
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
38
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
39 def tmp():
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
40 import time
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
41 global tmpFolder
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
42 tmpFolder = "/tmp/"+str(time.time())+"/"
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
43 os.mkdir(tmpFolder)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
44
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
45 def query():
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
46 global genomeGraph
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
47 genomeGraph = Graph()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
48 filename = sys.argv[1]
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
49 genomeGraph.parse(filename, format="turtle")
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
50 qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}')
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
51 sequences = []
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
52 for row in qres:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
53 print ("Header:",row[0])
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
54 sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","")
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
55
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
56 return sequences
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
57
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
58 def aragorn(sequences):
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
59 for sequence in sequences:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
60 #Call aragorn for each contig, for ease of parsing
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
61 open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
62 folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/"
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
63 cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output"
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
64 print (cmd)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
65 os.system(cmd)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
66 aragorn = open(tmpFolder+"aragorn.output").readlines()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
67 # string = ''.join(aragorn)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
68
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
69 contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","")
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
70 dnaobjectURI = coreURI[contig]
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
71 #print (contig)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
72 for line in aragorn:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
73 if ">" in line:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
74 print (line.split())
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
75 try:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
76 trna, pos = line.split()[1:]
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
77 except:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
78 try:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
79 trna, pos = line.split()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
80 except:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
81 if "(Permuted)" in line:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
82 trna, permute, pos = line.split()[1:]
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
83
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
84 if "tRNA-" in line:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
85 trna, codon = (trna.strip(">)").split("(",1))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
86 else:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
87 trna = trna.strip(">").strip() #Actually a tmRNA...
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
88 codon = ''
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
89 trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
90 SubClassOfDict[trna.split("-")[0].title()] = 1
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
91 if "c" in pos[0]: #complementary
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
92 stop, start = pos.split("[")[1].split("]")[0].split(",")
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
93 else:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
94 start, stop = pos.split("[")[1].split("]")[0].split(",")
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
95 trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop]
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
96 genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
97 genomeGraph.add((trnaURI, RDF.type,trnaClass))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
98 genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer)))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
99 genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer)))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
100 genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna)))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
101 genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon)))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
102 genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn")))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
103 genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36")))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
104 genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1])))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
105
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
106 def subClassOfBuilder():
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
107 for subclass in SubClassOfDict:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
108 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
109 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
110 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
111 genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
112
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
113 def save():
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
114 #Create the subclass off instances
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
115 #subClassOfBuilder()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
116 ## Saves the file
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
117 data = genomeGraph.serialize(format='turtle')
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
118 open(sys.argv[2],"wb").write(data)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
119
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
120 def main():
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
121 tmp()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
122 sequences = query()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
123 aragorn(sequences)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
124 save()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
125
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
126 main()