Mercurial > repos > jjkoehorst > sapp
diff genetic_elements/aragorn.py @ 16:74b8ba5e2d5b
aragorn addition
author | jjkoehorst <jasperkoehorst@gmail.com> |
---|---|
date | Sat, 21 Feb 2015 17:17:06 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genetic_elements/aragorn.py Sat Feb 21 17:17:06 2015 +0100 @@ -0,0 +1,125 @@ +def delete_galaxy(): + import sys + for index, path in enumerate(sys.path): + if "galaxy-dist/" in path: + sys.path[index] = '' + +#Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now. +delete_galaxy() + +from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin + +# Import RDFLib's default Graph implementation. +from rdflib.graph import Graph + +import sys, os + +import rdflib +import subprocess +import hashlib +global URI +global SubClassOfDict +SubClassOfDict = {} + +URI = "http://csb.wur.nl/genome/" +global seeAlso +seeAlso = "rdfs:seeAlso" +global coreURI +coreURI = Namespace(URI) + +def createClass(uri): + #genomeGraph.add((uri,RDF.type,OWL.Class)) + #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing)) + #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) + #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) + #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"])) + return uri + +def tmp(): + import time + global tmpFolder + tmpFolder = "/tmp/"+str(time.time())+"/" + os.mkdir(tmpFolder) + +def query(): + global genomeGraph + genomeGraph = Graph() + filename = sys.argv[1] + genomeGraph.parse(filename, format="turtle") + qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}') + sequences = [] + for row in qres: + print ("Header:",row[0]) + sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","") + + return sequences + +def aragorn(sequences): + for sequence in sequences: + #Call aragorn for each contig, for ease of parsing + open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence)) + folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/" + cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output" + print (cmd) + os.system(cmd) + aragorn = open(tmpFolder+"aragorn.output").readlines() +# string = ''.join(aragorn) + + contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","") + dnaobjectURI = coreURI[contig] + #print (contig) + for line in aragorn: + if ">" in line: + print (line.split()) + try: + trna, pos = line.split()[1:] + except: + try: + trna, pos = line.split() + except: + if "(Permuted)" in line: + trna, permute, pos = line.split()[1:] + + if "tRNA-" in line: + trna, codon = (trna.strip(">)").split("(",1)) + else: + trna = trna.strip(">").strip() #Actually a tmRNA... + codon = '' + trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna + SubClassOfDict[trna.split("-")[0].title()] = 1 + if "c" in pos[0]: #complementary + stop, start = pos.split("[")[1].split("]")[0].split(",") + else: + start, stop = pos.split("[")[1].split("]")[0].split(",") + trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop] + genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI)) + genomeGraph.add((trnaURI, RDF.type,trnaClass)) + genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer))) + genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer))) + genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna))) + genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon))) + genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn"))) + genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36"))) + genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1]))) + +def subClassOfBuilder(): + for subclass in SubClassOfDict: + genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) + genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) + genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"])) + genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class)) + +def save(): + #Create the subclass off instances + #subClassOfBuilder() + ## Saves the file + data = genomeGraph.serialize(format='turtle') + open(sys.argv[2],"wb").write(data) + +def main(): + tmp() + sequences = query() + aragorn(sequences) + save() + +main()