Mercurial > repos > tyty > structurefold
diff predict/predict_RNAs.py @ 74:63c41304b221 draft
Uploaded
author | tyty |
---|---|
date | Tue, 09 Dec 2014 03:03:30 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/predict/predict_RNAs.py Tue Dec 09 03:03:30 2014 -0500 @@ -0,0 +1,102 @@ +#RNA structure prediction & Output and illustrate reactivities + +import sys +import shlex +import subprocess +import tarfile +from parse_dis_pac import * +from read_file import * +from Bio import SeqIO +import os +from rtts_plot import * +import random +import string + + +id_file = sys.argv[1] +seq_file = sys.argv[2] +predict_type = sys.argv[3] +temperature = sys.argv[4] +output_file = sys.argv[5] + + +flag = False +if predict_type!='silico': #input reactivity file if provided + react_file = sys.argv[6] + slope = sys.argv[7] + intercept = sys.argv[8] + react = parse_dist(react_file) + react = react[1] + flag = True + +syspath = os.getcwd() + +ids = read_t_file(id_file) +sequences = SeqIO.parse(seq_file, 'fasta') + + +seqs = {} +for seq in sequences: + seqs[seq.id] = seq.seq.tostring() + +if len(ids)>100: #setup a limit of the number of sequence to be predicted + print("Number of sequences exceeds limitation!") + sys.exit(0) + + +#predict RNA structures +output_directory = os.path.join(syspath, "output_files") +if not os.path.exists(output_directory): + os.makedirs(output_directory) +for i in range(len(ids)): + flag2 = 0 + id_s = ids[i][0] + #print(id_s) + #Put RNA sequence and reactivities into files + if id_s in seqs: + fh = file(os.path.join(syspath,"temp.txt"), 'w') + fh.write('>'+id_s) + fh.write('\n') + fh.write(seqs[id_s]) + fh.close() + if not flag: + command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s))) + subprocess.call(command) + else: + if id_s in react: + fh = file(os.path.join(syspath, "constraint.txt"), 'w') + make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA + for j in range(0, (len(react[id_s]))): + if react[id_s][j]!='NA': + fh.write(str(j+1)) + fh.write('\t') + fh.write(str(react[id_s][j])) + fh.write('\n') + #h.write(str(react[id_s][j])) #Output the reactivities + #h.write('\t') + fh.close() + #h.write('\n') + #h.write('\n') + command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), + os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, + os.path.join(output_directory, "%s.ct" % id_s))) + subprocess.call(command) + else: + print(id_s+" not in the data of react!") + flag2 = 1 + if flag2 == 0: + command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) + subprocess.call(command) + else: + print(id_s+" not in the data of sequences!") + +#Remove the unnecessary files +tarball = tarfile.open(output_file, 'w:') +for filename in os.listdir(output_directory): + filepath = os.path.join(output_directory, filename) + print filepath + tarball.add(filepath, arcname=filename) +#print os.listdir(syspath) +#print os.listdir(output_directory) +# tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s) +tarball.close()