comparison genetic_elements/aragorn/aragorn.py @ 27:875035bbe366

no message
author Jasper Koehorst <jasperkoehorst@gmail.com>
date Wed, 25 Feb 2015 08:16:43 +0100
parents 9610ddbca991
children
comparison
equal deleted inserted replaced
26:6a858e304888 27:875035bbe366
1
2 def delete_galaxy():
3 import sys
4 for index, path in enumerate(sys.path):
5 if "galaxy-dist/" in path:
6 sys.path[index] = ''
7
8 #Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now.
9 delete_galaxy()
10
11 from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin
12
13 # Import RDFLib's default Graph implementation.
14 from rdflib.graph import Graph
15
16 import sys, os
17
18 import rdflib
19 import subprocess
20 import hashlib
21 global URI
22 global SubClassOfDict
23 SubClassOfDict = {}
24
25 URI = "http://csb.wur.nl/genome/"
26 global seeAlso
27 seeAlso = "rdfs:seeAlso"
28 global coreURI
29 coreURI = Namespace(URI)
30
31 def createClass(uri):
32 #genomeGraph.add((uri,RDF.type,OWL.Class))
33 #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing))
34 #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
35 #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
36 #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"]))
37 return uri
38
39 def tmp():
40 import time
41 global tmpFolder
42 tmpFolder = "/tmp/"+str(time.time())+"/"
43 os.mkdir(tmpFolder)
44
45 def query():
46 global genomeGraph
47 genomeGraph = Graph()
48 filename = sys.argv[1]
49 genomeGraph.parse(filename, format="turtle")
50 qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}')
51 sequences = []
52 for row in qres:
53 print ("Header:",row[0])
54 sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","")
55
56 return sequences
57
58 def aragorn(sequences):
59 for sequence in sequences:
60 #Call aragorn for each contig, for ease of parsing
61 open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence))
62 folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/"
63 cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output"
64 print (cmd)
65 os.system(cmd)
66 aragorn = open(tmpFolder+"aragorn.output").readlines()
67 # string = ''.join(aragorn)
68
69 contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","")
70 dnaobjectURI = coreURI[contig]
71 #print (contig)
72 for line in aragorn:
73 if ">" in line:
74 print (line.split())
75 try:
76 trna, pos = line.split()[1:]
77 except:
78 try:
79 trna, pos = line.split()
80 except:
81 if "(Permuted)" in line:
82 trna, permute, pos = line.split()[1:]
83
84 if "tRNA-" in line:
85 trna, codon = (trna.strip(">)").split("(",1))
86 else:
87 trna = trna.strip(">").strip() #Actually a tmRNA...
88 codon = ''
89 trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna
90 SubClassOfDict[trna.split("-")[0].title()] = 1
91 if "c" in pos[0]: #complementary
92 stop, start = pos.split("[")[1].split("]")[0].split(",")
93 else:
94 start, stop = pos.split("[")[1].split("]")[0].split(",")
95 trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop]
96 genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI))
97 genomeGraph.add((trnaURI, RDF.type,trnaClass))
98 genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer)))
99 genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer)))
100 genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna)))
101 genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon)))
102 genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn")))
103 genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36")))
104 genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1])))
105
106 def subClassOfBuilder():
107 for subclass in SubClassOfDict:
108 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
109 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
110 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
111 genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class))
112
113 def save():
114 #Create the subclass off instances
115 #subClassOfBuilder()
116 ## Saves the file
117 data = genomeGraph.serialize(format='turtle')
118 open(sys.argv[2],"wb").write(data)
119
120 def main():
121 tmp()
122 sequences = query()
123 aragorn(sequences)
124 save()
125
126 main()