view genetic_elements/aragorn/aragorn.py @ 17:2561c51e6605

aragorn addition
author jjkoehorst <jasperkoehorst@gmail.com>
date Sat, 21 Feb 2015 17:20:05 +0100
parents
children 9610ddbca991
line wrap: on
line source

def delete_galaxy():
	import sys
	for index, path in enumerate(sys.path):
		if "galaxy-dist/" in path:
			sys.path[index] = ''

#Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now.
delete_galaxy()

from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin

# Import RDFLib's default Graph implementation.
from rdflib.graph import Graph

import sys, os

import rdflib
import subprocess
import hashlib
global URI
global SubClassOfDict
SubClassOfDict = {}

URI = "http://csb.wur.nl/genome/"
global seeAlso
seeAlso = "rdfs:seeAlso"
global coreURI
coreURI = Namespace(URI)

def createClass(uri):
	#genomeGraph.add((uri,RDF.type,OWL.Class))
	#genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing))
	#genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
	#genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
	#genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"]))
	return uri

def tmp():
	import time
	global tmpFolder
	tmpFolder = "/tmp/"+str(time.time())+"/"
	os.mkdir(tmpFolder)

def query():
	global genomeGraph
	genomeGraph = Graph()
	filename = sys.argv[1]
	genomeGraph.parse(filename, format="turtle")
	qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}')
	sequences = []
	for row in qres:
		print ("Header:",row[0])
		sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","")

	return sequences

def aragorn(sequences):
	for sequence in sequences:
		#Call aragorn for each contig, for ease of parsing
		open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence))
		folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/"
		cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output"
		print (cmd)
		os.system(cmd)
		aragorn = open(tmpFolder+"aragorn.output").readlines()
# 		string = ''.join(aragorn)

		contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","")
		dnaobjectURI = coreURI[contig]
		#print (contig)
		for line in aragorn:
			if ">" in line:
				print (line.split())
				try:
					trna, pos = line.split()[1:]
				except:
					try:
						trna, pos = line.split()
					except:
						if "(Permuted)" in line:
							trna, permute, pos = line.split()[1:]
							
				if "tRNA-" in line:
					trna, codon = (trna.strip(">)").split("(",1))
				else:
					trna = trna.strip(">").strip() #Actually a tmRNA...
					codon = ''
				trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna
				SubClassOfDict[trna.split("-")[0].title()] = 1
				if "c" in pos[0]: #complementary
					stop, start = pos.split("[")[1].split("]")[0].split(",")
				else:
					start, stop = pos.split("[")[1].split("]")[0].split(",")
				trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop]
				genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI))
				genomeGraph.add((trnaURI, RDF.type,trnaClass))
				genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer)))
				genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer)))
				genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna)))
				genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon)))
				genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn")))
				genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36")))
				genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1])))

def subClassOfBuilder():
	for subclass in SubClassOfDict:
		genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
		genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
		genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
		genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class))

def save():
	#Create the subclass off instances
	#subClassOfBuilder()
	## Saves the file
	data = genomeGraph.serialize(format='turtle')
	open(sys.argv[2],"wb").write(data)

def main():
	tmp()
	sequences = query()
	aragorn(sequences)
	save()

main()