Mercurial > repos > jjkoehorst > sapp
comparison genetic_elements/aragorn/aragorn.py @ 17:2561c51e6605
aragorn addition
author | jjkoehorst <jasperkoehorst@gmail.com> |
---|---|
date | Sat, 21 Feb 2015 17:20:05 +0100 |
parents | |
children | 9610ddbca991 |
comparison
equal
deleted
inserted
replaced
16:74b8ba5e2d5b | 17:2561c51e6605 |
---|---|
1 def delete_galaxy(): | |
2 import sys | |
3 for index, path in enumerate(sys.path): | |
4 if "galaxy-dist/" in path: | |
5 sys.path[index] = '' | |
6 | |
7 #Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now. | |
8 delete_galaxy() | |
9 | |
10 from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin | |
11 | |
12 # Import RDFLib's default Graph implementation. | |
13 from rdflib.graph import Graph | |
14 | |
15 import sys, os | |
16 | |
17 import rdflib | |
18 import subprocess | |
19 import hashlib | |
20 global URI | |
21 global SubClassOfDict | |
22 SubClassOfDict = {} | |
23 | |
24 URI = "http://csb.wur.nl/genome/" | |
25 global seeAlso | |
26 seeAlso = "rdfs:seeAlso" | |
27 global coreURI | |
28 coreURI = Namespace(URI) | |
29 | |
30 def createClass(uri): | |
31 #genomeGraph.add((uri,RDF.type,OWL.Class)) | |
32 #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing)) | |
33 #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) | |
34 #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) | |
35 #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"])) | |
36 return uri | |
37 | |
38 def tmp(): | |
39 import time | |
40 global tmpFolder | |
41 tmpFolder = "/tmp/"+str(time.time())+"/" | |
42 os.mkdir(tmpFolder) | |
43 | |
44 def query(): | |
45 global genomeGraph | |
46 genomeGraph = Graph() | |
47 filename = sys.argv[1] | |
48 genomeGraph.parse(filename, format="turtle") | |
49 qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}') | |
50 sequences = [] | |
51 for row in qres: | |
52 print ("Header:",row[0]) | |
53 sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","") | |
54 | |
55 return sequences | |
56 | |
57 def aragorn(sequences): | |
58 for sequence in sequences: | |
59 #Call aragorn for each contig, for ease of parsing | |
60 open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence)) | |
61 folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/" | |
62 cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output" | |
63 print (cmd) | |
64 os.system(cmd) | |
65 aragorn = open(tmpFolder+"aragorn.output").readlines() | |
66 # string = ''.join(aragorn) | |
67 | |
68 contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","") | |
69 dnaobjectURI = coreURI[contig] | |
70 #print (contig) | |
71 for line in aragorn: | |
72 if ">" in line: | |
73 print (line.split()) | |
74 try: | |
75 trna, pos = line.split()[1:] | |
76 except: | |
77 try: | |
78 trna, pos = line.split() | |
79 except: | |
80 if "(Permuted)" in line: | |
81 trna, permute, pos = line.split()[1:] | |
82 | |
83 if "tRNA-" in line: | |
84 trna, codon = (trna.strip(">)").split("(",1)) | |
85 else: | |
86 trna = trna.strip(">").strip() #Actually a tmRNA... | |
87 codon = '' | |
88 trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna | |
89 SubClassOfDict[trna.split("-")[0].title()] = 1 | |
90 if "c" in pos[0]: #complementary | |
91 stop, start = pos.split("[")[1].split("]")[0].split(",") | |
92 else: | |
93 start, stop = pos.split("[")[1].split("]")[0].split(",") | |
94 trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop] | |
95 genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI)) | |
96 genomeGraph.add((trnaURI, RDF.type,trnaClass)) | |
97 genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer))) | |
98 genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer))) | |
99 genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna))) | |
100 genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon))) | |
101 genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn"))) | |
102 genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36"))) | |
103 genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1]))) | |
104 | |
105 def subClassOfBuilder(): | |
106 for subclass in SubClassOfDict: | |
107 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) | |
108 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) | |
109 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"])) | |
110 genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class)) | |
111 | |
112 def save(): | |
113 #Create the subclass off instances | |
114 #subClassOfBuilder() | |
115 ## Saves the file | |
116 data = genomeGraph.serialize(format='turtle') | |
117 open(sys.argv[2],"wb").write(data) | |
118 | |
119 def main(): | |
120 tmp() | |
121 sequences = query() | |
122 aragorn(sequences) | |
123 save() | |
124 | |
125 main() |