1 def delete_galaxy():
2 import sys
3 for index, path in enumerate(sys.path):
4 if "galaxy-dist/" in path:
5 sys.path[index] = ''
7 #Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now.
8 delete_galaxy()
10 from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin
12 # Import RDFLib's default Graph implementation.
13 from rdflib.graph import Graph
15 import sys, os
17 import rdflib
18 import subprocess
19 import hashlib
20 global URI
21 global SubClassOfDict
22 SubClassOfDict = {}
24 URI = "http://csb.wur.nl/genome/"
25 global seeAlso
26 seeAlso = "rdfs:seeAlso"
27 global coreURI
28 coreURI = Namespace(URI)
30 def createClass(uri):
31 #genomeGraph.add((uri,RDF.type,OWL.Class))
32 #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing))
33 #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
34 #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
35 #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"]))
36 return uri
38 def tmp():
39 import time
40 global tmpFolder
41 tmpFolder = "/tmp/"+str(time.time())+"/"
42 os.mkdir(tmpFolder)
44 def query():
45 global genomeGraph
46 genomeGraph = Graph()
47 filename = sys.argv[1]
48 genomeGraph.parse(filename, format="turtle")
49 qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}')
50 sequences = []
51 for row in qres:
52 print ("Header:",row[0])
53 sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","")
55 return sequences
57 def aragorn(sequences):
58 for sequence in sequences:
59 #Call aragorn for each contig, for ease of parsing
60 open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence))
61 folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/"
62 cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output"
63 print (cmd)
64 os.system(cmd)
65 aragorn = open(tmpFolder+"aragorn.output").readlines()
66 # string = ''.join(aragorn)
68 contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","")
69 dnaobjectURI = coreURI[contig]
70 #print (contig)
71 for line in aragorn:
72 if ">" in line:
73 print (line.split())
74 try:
75 trna, pos = line.split()[1:]
76 except:
77 try:
78 trna, pos = line.split()
79 except:
80 if "(Permuted)" in line:
81 trna, permute, pos = line.split()[1:]
83 if "tRNA-" in line:
84 trna, codon = (trna.strip(">)").split("(",1))
85 else:
86 trna = trna.strip(">").strip() #Actually a tmRNA...
87 codon = ''
88 trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna
89 SubClassOfDict[trna.split("-")[0].title()] = 1
90 if "c" in pos[0]: #complementary
91 stop, start = pos.split("[")[1].split("]")[0].split(",")
92 else:
93 start, stop = pos.split("[")[1].split("]")[0].split(",")
94 trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop]
95 genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI))
96 genomeGraph.add((trnaURI, RDF.type,trnaClass))
97 genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer)))
98 genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer)))
99 genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna)))
100 genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon)))
101 genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn")))
102 genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36")))
103 genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1])))
105 def subClassOfBuilder():
106 for subclass in SubClassOfDict:
107 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
108 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
109 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
110 genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class))
112 def save():
113 #Create the subclass off instances
114 #subClassOfBuilder()
115 ## Saves the file
116 data = genomeGraph.serialize(format='turtle')
117 open(sys.argv[2],"wb").write(data)
119 def main():
120 tmp()
121 sequences = query()
122 aragorn(sequences)
123 save()
125 main()