diff genetic_elements/aragorn.py @ 16:74b8ba5e2d5b

aragorn addition
author jjkoehorst <jasperkoehorst@gmail.com>
date Sat, 21 Feb 2015 17:17:06 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genetic_elements/aragorn.py	Sat Feb 21 17:17:06 2015 +0100
@@ -0,0 +1,125 @@
+def delete_galaxy():
+	import sys
+	for index, path in enumerate(sys.path):
+		if "galaxy-dist/" in path:
+			sys.path[index] = ''
+
+#Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now.
+delete_galaxy()
+
+from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin
+
+# Import RDFLib's default Graph implementation.
+from rdflib.graph import Graph
+
+import sys, os
+
+import rdflib
+import subprocess
+import hashlib
+global URI
+global SubClassOfDict
+SubClassOfDict = {}
+
+URI = "http://csb.wur.nl/genome/"
+global seeAlso
+seeAlso = "rdfs:seeAlso"
+global coreURI
+coreURI = Namespace(URI)
+
+def createClass(uri):
+	#genomeGraph.add((uri,RDF.type,OWL.Class))
+	#genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing))
+	#genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
+	#genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
+	#genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"]))
+	return uri
+
+def tmp():
+	import time
+	global tmpFolder
+	tmpFolder = "/tmp/"+str(time.time())+"/"
+	os.mkdir(tmpFolder)
+
+def query():
+	global genomeGraph
+	genomeGraph = Graph()
+	filename = sys.argv[1]
+	genomeGraph.parse(filename, format="turtle")
+	qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}')
+	sequences = []
+	for row in qres:
+		print ("Header:",row[0])
+		sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","")
+
+	return sequences
+
+def aragorn(sequences):
+	for sequence in sequences:
+		#Call aragorn for each contig, for ease of parsing
+		open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence))
+		folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/"
+		cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output"
+		print (cmd)
+		os.system(cmd)
+		aragorn = open(tmpFolder+"aragorn.output").readlines()
+# 		string = ''.join(aragorn)
+
+		contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","")
+		dnaobjectURI = coreURI[contig]
+		#print (contig)
+		for line in aragorn:
+			if ">" in line:
+				print (line.split())
+				try:
+					trna, pos = line.split()[1:]
+				except:
+					try:
+						trna, pos = line.split()
+					except:
+						if "(Permuted)" in line:
+							trna, permute, pos = line.split()[1:]
+							
+				if "tRNA-" in line:
+					trna, codon = (trna.strip(">)").split("(",1))
+				else:
+					trna = trna.strip(">").strip() #Actually a tmRNA...
+					codon = ''
+				trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna
+				SubClassOfDict[trna.split("-")[0].title()] = 1
+				if "c" in pos[0]: #complementary
+					stop, start = pos.split("[")[1].split("]")[0].split(",")
+				else:
+					start, stop = pos.split("[")[1].split("]")[0].split(",")
+				trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop]
+				genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI))
+				genomeGraph.add((trnaURI, RDF.type,trnaClass))
+				genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer)))
+				genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer)))
+				genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna)))
+				genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon)))
+				genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn")))
+				genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36")))
+				genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1])))
+
+def subClassOfBuilder():
+	for subclass in SubClassOfDict:
+		genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
+		genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
+		genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
+		genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class))
+
+def save():
+	#Create the subclass off instances
+	#subClassOfBuilder()
+	## Saves the file
+	data = genomeGraph.serialize(format='turtle')
+	open(sys.argv[2],"wb").write(data)
+
+def main():
+	tmp()
+	sequences = query()
+	aragorn(sequences)
+	save()
+
+main()