Mercurial > repos > tyty > structurefold
comparison predict/predict_RNAs.py @ 117:75e3711e23c4 draft
Uploaded
| author | tyty |
|---|---|
| date | Tue, 14 Apr 2015 14:17:27 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 116:62e8f7adf1ab | 117:75e3711e23c4 |
|---|---|
| 1 #RNA structure prediction & Output and illustrate reactivities | |
| 2 | |
| 3 import sys | |
| 4 import shlex | |
| 5 import subprocess | |
| 6 import tarfile | |
| 7 from parse_dis_pac import * | |
| 8 from read_file import * | |
| 9 from Bio import SeqIO | |
| 10 import os | |
| 11 from rtts_plot import * | |
| 12 import random | |
| 13 import string | |
| 14 | |
| 15 | |
| 16 id_file = sys.argv[1] | |
| 17 seq_file = sys.argv[2] | |
| 18 predict_type = sys.argv[3] | |
| 19 temperature = sys.argv[4] | |
| 20 predict_program = sys.argv[5] | |
| 21 output_html = sys.argv[6] | |
| 22 output_directory = sys.argv[7] | |
| 23 | |
| 24 | |
| 25 | |
| 26 flag = False | |
| 27 if predict_type!='silico': #input reactivity file if provided | |
| 28 if predict_program == 'rs': | |
| 29 react_file = sys.argv[8] | |
| 30 slope = sys.argv[9] | |
| 31 intercept = sys.argv[10] | |
| 32 else: | |
| 33 react_file = sys.argv[8] | |
| 34 thres_h = sys.argv[9] | |
| 35 thres_h = float(thres_h) | |
| 36 thres_l = sys.argv[10] | |
| 37 thres_l = float(thres_l) | |
| 38 gqs = sys.argv[11] | |
| 39 gqs = int(gqs) | |
| 40 | |
| 41 react = parse_dist(react_file) | |
| 42 react = react[1] | |
| 43 flag = True | |
| 44 else: | |
| 45 if predict_program!='rs': | |
| 46 gqs = sys.argv[8] | |
| 47 gqs = int(gqs) | |
| 48 | |
| 49 | |
| 50 ospath = os.path.realpath(sys.argv[0]) | |
| 51 ost = ospath.split('/') | |
| 52 syspathpt = "" | |
| 53 for i in range(len(ost)-1): | |
| 54 syspathpt = syspathpt+ost[i].strip() | |
| 55 syspathpt = syspathpt+'/' | |
| 56 | |
| 57 | |
| 58 syspath = os.getcwd() | |
| 59 | |
| 60 ids = read_t_file(id_file) | |
| 61 sequences = SeqIO.parse(seq_file, 'fasta') | |
| 62 | |
| 63 | |
| 64 seqs = {} | |
| 65 for seq in sequences: | |
| 66 seqs[seq.id] = seq.seq.tostring() | |
| 67 | |
| 68 if len(ids)>100: #setup a limit of the number of sequence to be predicted | |
| 69 print("Number of sequences exceeds limitation!") | |
| 70 sys.exit(0) | |
| 71 | |
| 72 | |
| 73 #predict RNA structures | |
| 74 | |
| 75 os.mkdir(output_directory) | |
| 76 flag3 = 0 | |
| 77 | |
| 78 id_predicted = set() | |
| 79 for i in range(len(ids)): | |
| 80 flag2 = 0 | |
| 81 id_s = ids[i][0] | |
| 82 #print(id_s) | |
| 83 #Put RNA sequence and reactivities into files | |
| 84 if id_s in seqs: | |
| 85 fh = file(os.path.join(syspath,"temp.txt"), 'w') | |
| 86 fh.write('>'+id_s) | |
| 87 fh.write('\n') | |
| 88 fh.write(seqs[id_s]) | |
| 89 fh.close() | |
| 90 if not flag: | |
| 91 if predict_program == 'rs': | |
| 92 command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s))) | |
| 93 subprocess.call(command) | |
| 94 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) | |
| 95 subprocess.call(command) | |
| 96 else: | |
| 97 if gqs: | |
| 98 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb') | |
| 99 | |
| 100 else: | |
| 101 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') | |
| 102 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) | |
| 103 subprocess.call(command) | |
| 104 if not gqs: | |
| 105 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) | |
| 106 else: | |
| 107 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) | |
| 108 subprocess.call(command) | |
| 109 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) | |
| 110 subprocess.call(command) | |
| 111 else: | |
| 112 if id_s in react: | |
| 113 fh = file(os.path.join(syspath, "constraint.txt"), 'w') | |
| 114 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA | |
| 115 if predict_program == 'rs': | |
| 116 for j in range(0, (len(react[id_s]))): | |
| 117 if react[id_s][j]!='NA': | |
| 118 fh.write(str(j+1)) | |
| 119 fh.write('\t') | |
| 120 fh.write(str(react[id_s][j])) | |
| 121 fh.write('\n') | |
| 122 fh.close() | |
| 123 command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), | |
| 124 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, | |
| 125 os.path.join(output_directory, "%s.ct" % id_s))) | |
| 126 subprocess.call(command) | |
| 127 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) | |
| 128 subprocess.call(command) | |
| 129 else: | |
| 130 fh.write('>'+id_s) | |
| 131 fh.write('\n') | |
| 132 fh.write(seqs[id_s]) | |
| 133 fh.write('\n') | |
| 134 for j in range(0, (len(react[id_s]))): | |
| 135 if react[id_s][j]!='NA': | |
| 136 re = float(react[id_s][j]) | |
| 137 if re>thres_h: | |
| 138 fh.write('x') | |
| 139 else: | |
| 140 if re<thres_l: | |
| 141 fh.write('|') | |
| 142 else: | |
| 143 fh.write('.') | |
| 144 else: | |
| 145 fh.write('.') | |
| 146 fh.write('.') | |
| 147 fh.close() | |
| 148 if gqs: | |
| 149 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv -g > '+output_directory+'/'+id_s+'.dbnb') | |
| 150 | |
| 151 else: | |
| 152 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') | |
| 153 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) | |
| 154 subprocess.call(command) | |
| 155 if not gqs: | |
| 156 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) | |
| 157 else: | |
| 158 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) | |
| 159 subprocess.call(command) | |
| 160 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) | |
| 161 subprocess.call(command) | |
| 162 | |
| 163 else: | |
| 164 print(id_s+" not in the data of react!") | |
| 165 flag2 = 1 | |
| 166 if flag2 == 0: | |
| 167 if predict_program == 'rs': | |
| 168 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) | |
| 169 subprocess.call(command) | |
| 170 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) | |
| 171 subprocess.call(command) | |
| 172 else: | |
| 173 if not gqs: | |
| 174 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) | |
| 175 subprocess.call(command) | |
| 176 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) | |
| 177 subprocess.call(command) | |
| 178 flag3 = 1 | |
| 179 id_predicted.add(id_s) | |
| 180 else: | |
| 181 print(id_s+" not in the data of sequences!") | |
| 182 | |
| 183 #Remove the unnecessary files | |
| 184 if flag3 == 1: | |
| 185 | |
| 186 tarball = tarfile.open(os.path.join(output_directory,'prediction_results.tar'), 'w:') | |
| 187 for filename in os.listdir(output_directory): | |
| 188 filepath = os.path.join(output_directory, filename) | |
| 189 print filepath | |
| 190 tarball.add(filepath, arcname=filename) | |
| 191 #print os.listdir(syspath) | |
| 192 #print os.listdir(output_directory) | |
| 193 # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s) | |
| 194 tarball.close() | |
| 195 | |
| 196 h = open(output_html, 'wb' ) | |
| 197 h.write('<html><head><title><h1>Results of RNA structure prediction</h1></title></head><body>\n') | |
| 198 | |
| 199 h.write('<p>\n') | |
| 200 h.write('Click <a href="%s">here</a> to download the compressed file containing all prediction results.\n' % (('prediction_results.tar'))) | |
| 201 #h.write('<\p>\n') | |
| 202 h.write('<hr>\n') | |
| 203 | |
| 204 | |
| 205 for id_p in id_predicted: | |
| 206 h.write('<h4>'+id_p+'</h4><p><ul>\n') | |
| 207 h.write('<li><a href="%s">%s</a></li>\n' % (('%s.dbn' % id_p), ('%s.dbn' % id_p))) | |
| 208 h.write('<li><a href="%s">%s</a></li>\n' % (('%s.ps' % id_p), ('%s.ps' % id_p))) | |
| 209 if flag: | |
| 210 h.write('<li><a href="%s">%s</a></li>\n' % (('%s.tif' % id_p), ('%s.tif' % id_p))) | |
| 211 h.write( '</ul></p>\n' ) | |
| 212 h.write('<hr>\n') | |
| 213 h.write( '</body></html>\n' ) | |
| 214 h.close() | |
| 215 | |
| 216 | |
| 217 | |
| 218 | |
| 219 |
