17
+ − 1 def delete_galaxy():
+ − 2 import sys
+ − 3 for index, path in enumerate(sys.path):
+ − 4 if "galaxy-dist/" in path:
+ − 5 sys.path[index] = ''
+ − 6
+ − 7 #Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now.
+ − 8 delete_galaxy()
+ − 9
+ − 10 from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin
+ − 11
+ − 12 # Import RDFLib's default Graph implementation.
+ − 13 from rdflib.graph import Graph
+ − 14
+ − 15 import sys, os
+ − 16
+ − 17 import rdflib
+ − 18 import subprocess
+ − 19 import hashlib
+ − 20 global URI
+ − 21 global SubClassOfDict
+ − 22 SubClassOfDict = {}
+ − 23
+ − 24 URI = "http://csb.wur.nl/genome/"
+ − 25 global seeAlso
+ − 26 seeAlso = "rdfs:seeAlso"
+ − 27 global coreURI
+ − 28 coreURI = Namespace(URI)
+ − 29
+ − 30 def createClass(uri):
+ − 31 #genomeGraph.add((uri,RDF.type,OWL.Class))
+ − 32 #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing))
+ − 33 #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
+ − 34 #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
+ − 35 #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"]))
+ − 36 return uri
+ − 37
+ − 38 def tmp():
+ − 39 import time
+ − 40 global tmpFolder
+ − 41 tmpFolder = "/tmp/"+str(time.time())+"/"
+ − 42 os.mkdir(tmpFolder)
+ − 43
+ − 44 def query():
+ − 45 global genomeGraph
+ − 46 genomeGraph = Graph()
+ − 47 filename = sys.argv[1]
+ − 48 genomeGraph.parse(filename, format="turtle")
+ − 49 qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}')
+ − 50 sequences = []
+ − 51 for row in qres:
+ − 52 print ("Header:",row[0])
+ − 53 sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","")
+ − 54
+ − 55 return sequences
+ − 56
+ − 57 def aragorn(sequences):
+ − 58 for sequence in sequences:
+ − 59 #Call aragorn for each contig, for ease of parsing
+ − 60 open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence))
+ − 61 folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/"
+ − 62 cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output"
+ − 63 print (cmd)
+ − 64 os.system(cmd)
+ − 65 aragorn = open(tmpFolder+"aragorn.output").readlines()
+ − 66 # string = ''.join(aragorn)
+ − 67
+ − 68 contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","")
+ − 69 dnaobjectURI = coreURI[contig]
+ − 70 #print (contig)
+ − 71 for line in aragorn:
+ − 72 if ">" in line:
+ − 73 print (line.split())
+ − 74 try:
+ − 75 trna, pos = line.split()[1:]
+ − 76 except:
+ − 77 try:
+ − 78 trna, pos = line.split()
+ − 79 except:
+ − 80 if "(Permuted)" in line:
+ − 81 trna, permute, pos = line.split()[1:]
+ − 82
+ − 83 if "tRNA-" in line:
+ − 84 trna, codon = (trna.strip(">)").split("(",1))
+ − 85 else:
+ − 86 trna = trna.strip(">").strip() #Actually a tmRNA...
+ − 87 codon = ''
+ − 88 trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna
+ − 89 SubClassOfDict[trna.split("-")[0].title()] = 1
+ − 90 if "c" in pos[0]: #complementary
+ − 91 stop, start = pos.split("[")[1].split("]")[0].split(",")
+ − 92 else:
+ − 93 start, stop = pos.split("[")[1].split("]")[0].split(",")
+ − 94 trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop]
+ − 95 genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI))
+ − 96 genomeGraph.add((trnaURI, RDF.type,trnaClass))
+ − 97 genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer)))
+ − 98 genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer)))
+ − 99 genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna)))
+ − 100 genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon)))
+ − 101 genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn")))
+ − 102 genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36")))
+ − 103 genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1])))
+ − 104
+ − 105 def subClassOfBuilder():
+ − 106 for subclass in SubClassOfDict:
+ − 107 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
+ − 108 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
+ − 109 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
+ − 110 genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class))
+ − 111
+ − 112 def save():
+ − 113 #Create the subclass off instances
+ − 114 #subClassOfBuilder()
+ − 115 ## Saves the file
+ − 116 data = genomeGraph.serialize(format='turtle')
+ − 117 open(sys.argv[2],"wb").write(data)
+ − 118
+ − 119 def main():
+ − 120 tmp()
+ − 121 sequences = query()
+ − 122 aragorn(sequences)
+ − 123 save()
+ − 124
+ − 125 main()