Mercurial > repos > jjkoehorst > sapp
comparison genetic_elements/aragorn/aragorn.py @ 27:875035bbe366
no message
author | Jasper Koehorst <jasperkoehorst@gmail.com> |
---|---|
date | Wed, 25 Feb 2015 08:16:43 +0100 |
parents | 9610ddbca991 |
children |
comparison
equal
deleted
inserted
replaced
26:6a858e304888 | 27:875035bbe366 |
---|---|
1 | |
2 def delete_galaxy(): | |
3 import sys | |
4 for index, path in enumerate(sys.path): | |
5 if "galaxy-dist/" in path: | |
6 sys.path[index] = '' | |
7 | |
8 #Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now. | |
9 delete_galaxy() | |
10 | |
11 from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin | |
12 | |
13 # Import RDFLib's default Graph implementation. | |
14 from rdflib.graph import Graph | |
15 | |
16 import sys, os | |
17 | |
18 import rdflib | |
19 import subprocess | |
20 import hashlib | |
21 global URI | |
22 global SubClassOfDict | |
23 SubClassOfDict = {} | |
24 | |
25 URI = "http://csb.wur.nl/genome/" | |
26 global seeAlso | |
27 seeAlso = "rdfs:seeAlso" | |
28 global coreURI | |
29 coreURI = Namespace(URI) | |
30 | |
31 def createClass(uri): | |
32 #genomeGraph.add((uri,RDF.type,OWL.Class)) | |
33 #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing)) | |
34 #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) | |
35 #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) | |
36 #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"])) | |
37 return uri | |
38 | |
39 def tmp(): | |
40 import time | |
41 global tmpFolder | |
42 tmpFolder = "/tmp/"+str(time.time())+"/" | |
43 os.mkdir(tmpFolder) | |
44 | |
45 def query(): | |
46 global genomeGraph | |
47 genomeGraph = Graph() | |
48 filename = sys.argv[1] | |
49 genomeGraph.parse(filename, format="turtle") | |
50 qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}') | |
51 sequences = [] | |
52 for row in qres: | |
53 print ("Header:",row[0]) | |
54 sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","") | |
55 | |
56 return sequences | |
57 | |
58 def aragorn(sequences): | |
59 for sequence in sequences: | |
60 #Call aragorn for each contig, for ease of parsing | |
61 open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence)) | |
62 folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/" | |
63 cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output" | |
64 print (cmd) | |
65 os.system(cmd) | |
66 aragorn = open(tmpFolder+"aragorn.output").readlines() | |
67 # string = ''.join(aragorn) | |
68 | |
69 contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","") | |
70 dnaobjectURI = coreURI[contig] | |
71 #print (contig) | |
72 for line in aragorn: | |
73 if ">" in line: | |
74 print (line.split()) | |
75 try: | |
76 trna, pos = line.split()[1:] | |
77 except: | |
78 try: | |
79 trna, pos = line.split() | |
80 except: | |
81 if "(Permuted)" in line: | |
82 trna, permute, pos = line.split()[1:] | |
83 | |
84 if "tRNA-" in line: | |
85 trna, codon = (trna.strip(">)").split("(",1)) | |
86 else: | |
87 trna = trna.strip(">").strip() #Actually a tmRNA... | |
88 codon = '' | |
89 trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna | |
90 SubClassOfDict[trna.split("-")[0].title()] = 1 | |
91 if "c" in pos[0]: #complementary | |
92 stop, start = pos.split("[")[1].split("]")[0].split(",") | |
93 else: | |
94 start, stop = pos.split("[")[1].split("]")[0].split(",") | |
95 trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop] | |
96 genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI)) | |
97 genomeGraph.add((trnaURI, RDF.type,trnaClass)) | |
98 genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer))) | |
99 genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer))) | |
100 genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna))) | |
101 genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon))) | |
102 genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn"))) | |
103 genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36"))) | |
104 genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1]))) | |
105 | |
106 def subClassOfBuilder(): | |
107 for subclass in SubClassOfDict: | |
108 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) | |
109 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) | |
110 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"])) | |
111 genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class)) | |
112 | |
113 def save(): | |
114 #Create the subclass off instances | |
115 #subClassOfBuilder() | |
116 ## Saves the file | |
117 data = genomeGraph.serialize(format='turtle') | |
118 open(sys.argv[2],"wb").write(data) | |
119 | |
120 def main(): | |
121 tmp() | |
122 sequences = query() | |
123 aragorn(sequences) | |
124 save() | |
125 | |
126 main() |