Mercurial > repos > jjkoehorst > sapp
view genetic_elements/aragorn/aragorn.py @ 17:2561c51e6605
aragorn addition
author | jjkoehorst <jasperkoehorst@gmail.com> |
---|---|
date | Sat, 21 Feb 2015 17:20:05 +0100 |
parents | |
children | 9610ddbca991 |
line wrap: on
line source
def delete_galaxy(): import sys for index, path in enumerate(sys.path): if "galaxy-dist/" in path: sys.path[index] = '' #Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now. delete_galaxy() from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin # Import RDFLib's default Graph implementation. from rdflib.graph import Graph import sys, os import rdflib import subprocess import hashlib global URI global SubClassOfDict SubClassOfDict = {} URI = "http://csb.wur.nl/genome/" global seeAlso seeAlso = "rdfs:seeAlso" global coreURI coreURI = Namespace(URI) def createClass(uri): #genomeGraph.add((uri,RDF.type,OWL.Class)) #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing)) #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"])) return uri def tmp(): import time global tmpFolder tmpFolder = "/tmp/"+str(time.time())+"/" os.mkdir(tmpFolder) def query(): global genomeGraph genomeGraph = Graph() filename = sys.argv[1] genomeGraph.parse(filename, format="turtle") qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}') sequences = [] for row in qres: print ("Header:",row[0]) sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","") return sequences def aragorn(sequences): for sequence in sequences: #Call aragorn for each contig, for ease of parsing open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence)) folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/" cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output" print (cmd) os.system(cmd) aragorn = open(tmpFolder+"aragorn.output").readlines() # string = ''.join(aragorn) contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","") dnaobjectURI = coreURI[contig] #print (contig) for line in aragorn: if ">" in line: print (line.split()) try: trna, pos = line.split()[1:] except: try: trna, pos = line.split() except: if "(Permuted)" in line: trna, permute, pos = line.split()[1:] if "tRNA-" in line: trna, codon = (trna.strip(">)").split("(",1)) else: trna = trna.strip(">").strip() #Actually a tmRNA... codon = '' trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna SubClassOfDict[trna.split("-")[0].title()] = 1 if "c" in pos[0]: #complementary stop, start = pos.split("[")[1].split("]")[0].split(",") else: start, stop = pos.split("[")[1].split("]")[0].split(",") trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop] genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI)) genomeGraph.add((trnaURI, RDF.type,trnaClass)) genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer))) genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer))) genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna))) genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon))) genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn"))) genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36"))) genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1]))) def subClassOfBuilder(): for subclass in SubClassOfDict: genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"])) genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class)) def save(): #Create the subclass off instances #subClassOfBuilder() ## Saves the file data = genomeGraph.serialize(format='turtle') open(sys.argv[2],"wb").write(data) def main(): tmp() sequences = query() aragorn(sequences) save() main()