| 117 | 1 #RNA structure prediction & Output and illustrate reactivities | 
|  | 2 | 
|  | 3 import sys | 
|  | 4 import shlex | 
|  | 5 import subprocess | 
|  | 6 import tarfile | 
|  | 7 from parse_dis_pac import * | 
|  | 8 from read_file import * | 
|  | 9 from Bio import SeqIO | 
|  | 10 import os | 
|  | 11 from rtts_plot import * | 
|  | 12 import random | 
|  | 13 import string | 
|  | 14 | 
|  | 15 | 
|  | 16 id_file = sys.argv[1] | 
|  | 17 seq_file = sys.argv[2] | 
|  | 18 predict_type = sys.argv[3] | 
|  | 19 temperature = sys.argv[4] | 
|  | 20 predict_program = sys.argv[5] | 
|  | 21 output_html = sys.argv[6] | 
|  | 22 output_directory = sys.argv[7] | 
|  | 23 | 
|  | 24 | 
|  | 25 | 
|  | 26 flag = False | 
|  | 27 if predict_type!='silico': #input reactivity file if provided | 
|  | 28     if predict_program == 'rs': | 
|  | 29         react_file = sys.argv[8] | 
|  | 30         slope = sys.argv[9] | 
|  | 31         intercept = sys.argv[10] | 
|  | 32     else: | 
|  | 33         react_file = sys.argv[8] | 
|  | 34         thres_h = sys.argv[9] | 
|  | 35         thres_h = float(thres_h) | 
|  | 36         thres_l = sys.argv[10] | 
|  | 37         thres_l = float(thres_l) | 
|  | 38         gqs = sys.argv[11] | 
|  | 39         gqs = int(gqs) | 
|  | 40 | 
|  | 41     react = parse_dist(react_file) | 
|  | 42     react = react[1] | 
|  | 43     flag = True | 
|  | 44 else: | 
|  | 45     if predict_program!='rs': | 
|  | 46         gqs = sys.argv[8] | 
|  | 47         gqs = int(gqs) | 
|  | 48 | 
|  | 49 | 
|  | 50 ospath = os.path.realpath(sys.argv[0]) | 
|  | 51 ost = ospath.split('/') | 
|  | 52 syspathpt = "" | 
|  | 53 for i in range(len(ost)-1): | 
|  | 54     syspathpt = syspathpt+ost[i].strip() | 
|  | 55     syspathpt = syspathpt+'/' | 
|  | 56 | 
|  | 57 | 
|  | 58 syspath = os.getcwd() | 
|  | 59 | 
|  | 60 ids = read_t_file(id_file) | 
|  | 61 sequences = SeqIO.parse(seq_file, 'fasta') | 
|  | 62 | 
|  | 63 | 
|  | 64 seqs = {} | 
|  | 65 for seq in sequences: | 
|  | 66     seqs[seq.id] = seq.seq.tostring() | 
|  | 67 | 
|  | 68 if len(ids)>100: #setup a limit of the number of sequence to be predicted | 
|  | 69     print("Number of sequences exceeds limitation!") | 
|  | 70     sys.exit(0) | 
|  | 71 | 
|  | 72 | 
|  | 73 #predict RNA structures | 
|  | 74 | 
|  | 75 os.mkdir(output_directory) | 
|  | 76 flag3 = 0 | 
|  | 77 | 
|  | 78 id_predicted = set() | 
|  | 79 for i in range(len(ids)): | 
|  | 80     flag2 = 0 | 
|  | 81     id_s = ids[i][0] | 
|  | 82     #print(id_s) | 
|  | 83     #Put RNA sequence and reactivities into files | 
|  | 84     if id_s in seqs: | 
|  | 85         fh = file(os.path.join(syspath,"temp.txt"), 'w') | 
|  | 86         fh.write('>'+id_s) | 
|  | 87         fh.write('\n') | 
|  | 88         fh.write(seqs[id_s]) | 
|  | 89         fh.close() | 
|  | 90         if not flag: | 
|  | 91             if predict_program == 'rs': | 
|  | 92                 command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s))) | 
|  | 93                 subprocess.call(command) | 
|  | 94                 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) | 
|  | 95                 subprocess.call(command) | 
|  | 96             else: | 
|  | 97                 if gqs: | 
|  | 98                     os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb') | 
|  | 99 | 
|  | 100                 else: | 
|  | 101                     os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') | 
|  | 102                 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) | 
|  | 103                 subprocess.call(command) | 
|  | 104                 if not gqs: | 
|  | 105                     command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) | 
|  | 106                 else: | 
|  | 107                     command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) | 
|  | 108                 subprocess.call(command) | 
|  | 109                 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) | 
|  | 110                 subprocess.call(command) | 
|  | 111         else: | 
|  | 112             if id_s in react: | 
|  | 113                 fh = file(os.path.join(syspath, "constraint.txt"), 'w') | 
|  | 114                 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA | 
|  | 115                 if predict_program == 'rs': | 
|  | 116                     for j in range(0, (len(react[id_s]))): | 
|  | 117                         if react[id_s][j]!='NA': | 
|  | 118                             fh.write(str(j+1)) | 
|  | 119                             fh.write('\t') | 
|  | 120                             fh.write(str(react[id_s][j])) | 
|  | 121                             fh.write('\n') | 
|  | 122                     fh.close() | 
|  | 123                     command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), | 
|  | 124                                                                  os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, | 
|  | 125                                                                  os.path.join(output_directory, "%s.ct" % id_s))) | 
|  | 126                     subprocess.call(command) | 
|  | 127                     command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) | 
|  | 128                     subprocess.call(command) | 
|  | 129                 else: | 
|  | 130                     fh.write('>'+id_s) | 
|  | 131                     fh.write('\n') | 
|  | 132                     fh.write(seqs[id_s]) | 
|  | 133                     fh.write('\n') | 
|  | 134                     for j in range(0, (len(react[id_s]))): | 
|  | 135                         if react[id_s][j]!='NA': | 
|  | 136                             re = float(react[id_s][j]) | 
|  | 137                             if re>thres_h: | 
|  | 138                                 fh.write('x') | 
|  | 139                             else: | 
|  | 140                                 if re<thres_l: | 
|  | 141                                     fh.write('|') | 
|  | 142                                 else: | 
|  | 143                                     fh.write('.') | 
|  | 144                         else: | 
|  | 145                             fh.write('.') | 
|  | 146                     fh.write('.') | 
|  | 147                     fh.close() | 
|  | 148                     if gqs: | 
|  | 149                         os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv -g > '+output_directory+'/'+id_s+'.dbnb') | 
|  | 150 | 
|  | 151                     else: | 
|  | 152                         os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') | 
|  | 153                     command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) | 
|  | 154                     subprocess.call(command) | 
|  | 155                     if not gqs: | 
|  | 156                         command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) | 
|  | 157                     else: | 
|  | 158                         command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) | 
|  | 159                     subprocess.call(command) | 
|  | 160                     command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) | 
|  | 161                     subprocess.call(command) | 
|  | 162 | 
|  | 163             else: | 
|  | 164                 print(id_s+" not in the data of react!") | 
|  | 165                 flag2 = 1 | 
|  | 166         if flag2 == 0: | 
|  | 167             if predict_program == 'rs': | 
|  | 168                 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) | 
|  | 169                 subprocess.call(command) | 
|  | 170                 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) | 
|  | 171                 subprocess.call(command) | 
|  | 172             else: | 
|  | 173                 if not gqs: | 
|  | 174                     command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) | 
|  | 175                     subprocess.call(command) | 
|  | 176                     command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) | 
|  | 177                     subprocess.call(command) | 
|  | 178             flag3 = 1 | 
|  | 179         id_predicted.add(id_s) | 
|  | 180     else: | 
|  | 181         print(id_s+" not in the data of sequences!") | 
|  | 182 | 
|  | 183 #Remove the unnecessary files | 
|  | 184 if flag3 == 1: | 
|  | 185 | 
|  | 186     tarball = tarfile.open(os.path.join(output_directory,'prediction_results.tar'), 'w:') | 
|  | 187     for filename in os.listdir(output_directory): | 
|  | 188         filepath = os.path.join(output_directory, filename) | 
|  | 189         print filepath | 
|  | 190         tarball.add(filepath, arcname=filename) | 
|  | 191     #print os.listdir(syspath) | 
|  | 192     #print os.listdir(output_directory) | 
|  | 193     # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s) | 
|  | 194     tarball.close() | 
|  | 195 | 
|  | 196     h = open(output_html, 'wb' ) | 
|  | 197     h.write('<html><head><title><h1>Results of RNA structure prediction</h1></title></head><body>\n') | 
|  | 198 | 
|  | 199     h.write('<p>\n') | 
|  | 200     h.write('Click <a href="%s">here</a> to download the compressed file containing all prediction results.\n' % (('prediction_results.tar'))) | 
|  | 201     #h.write('<\p>\n') | 
|  | 202     h.write('<hr>\n') | 
|  | 203 | 
|  | 204 | 
|  | 205     for id_p in id_predicted: | 
|  | 206         h.write('<h4>'+id_p+'</h4><p><ul>\n') | 
|  | 207         h.write('<li><a href="%s">%s</a></li>\n' % (('%s.dbn' % id_p), ('%s.dbn' % id_p))) | 
|  | 208         h.write('<li><a href="%s">%s</a></li>\n' % (('%s.ps' % id_p), ('%s.ps' % id_p))) | 
|  | 209         if flag: | 
|  | 210             h.write('<li><a href="%s">%s</a></li>\n' % (('%s.tif' % id_p), ('%s.tif' % id_p))) | 
|  | 211         h.write( '</ul></p>\n' ) | 
|  | 212         h.write('<hr>\n') | 
|  | 213     h.write( '</body></html>\n' ) | 
|  | 214     h.close() | 
|  | 215 | 
|  | 216 | 
|  | 217 | 
|  | 218 | 
|  | 219 |