Mercurial > repos > vimalkumarvelayudhan > riboseqr_wrapper
comparison riboseqr/metagene.py @ 0:c34c364ce75d
First commit
| author | Vimalkumar Velayudhan <vimal@biotechcoder.com> |
|---|---|
| date | Tue, 21 Jul 2015 14:48:39 +0100 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c34c364ce75d |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import os | |
| 3 import sys | |
| 4 import glob | |
| 5 import argparse | |
| 6 import logging | |
| 7 import rpy2.robjects as robjects | |
| 8 | |
| 9 import utils | |
| 10 | |
| 11 rscript = '' | |
| 12 R = robjects.r | |
| 13 | |
| 14 | |
| 15 def run_rscript(command=None): | |
| 16 """Run R command, log it, append to rscript""" | |
| 17 global rscript | |
| 18 if not command: | |
| 19 return | |
| 20 logging.debug(command) | |
| 21 rscript += '{}\n'.format(command) | |
| 22 output = R(command) | |
| 23 return output | |
| 24 | |
| 25 | |
| 26 def do_analysis( | |
| 27 rdata_load='Periodicity.rda', selected_lengths='27', | |
| 28 selected_frames='', hit_mean='10', unique_hit_mean='1', | |
| 29 ratio_check='TRUE', min5p='-20', max5p='200', min3p='-200', max3p='20', | |
| 30 cap='', plot_title='', plot_lengths='27', rdata_save='Metagene.rda', | |
| 31 html_file='Metagene-report.html', output_path=os.getcwd()): | |
| 32 """Metagene analysis from saved periodicity R data file. """ | |
| 33 run_rscript('suppressMessages(library(riboSeqR))') | |
| 34 run_rscript('load("{}")'.format(rdata_load)) | |
| 35 | |
| 36 logging.debug('fS\n{}\nfCs\n{}\n'.format(R['fS'], R['fCs'])) | |
| 37 options = {} | |
| 38 for key, value, rtype, rmode in ( | |
| 39 ('lengths', selected_lengths, 'int', 'charvector'), | |
| 40 ('frames', selected_frames, 'int', 'listvector'), | |
| 41 ('hit_mean', hit_mean, 'int', None), | |
| 42 ('unique_hit_mean', unique_hit_mean, 'int', None), | |
| 43 ('ratio_check', ratio_check, 'bool', None), | |
| 44 ('min5p', min5p, 'int', None), ('max5p', max5p, 'int', None), | |
| 45 ('min3p', min3p, 'int', None), ('max3p', max3p, 'int', None), | |
| 46 ('cap', cap, 'int', None), | |
| 47 ('plot_title', plot_title, 'str', 'charvector'), | |
| 48 ('plot_lengths', plot_lengths, 'int', 'list')): | |
| 49 options[key] = utils.process_args( | |
| 50 value, ret_type=rtype, ret_mode=rmode) | |
| 51 | |
| 52 cmd_args = """fCs, lengths={lengths}, | |
| 53 frames={frames}, hitMean={hit_mean}, | |
| 54 unqhitMean={unique_hit_mean}, fS=fS""".format(**options) | |
| 55 | |
| 56 if ratio_check == 'TRUE': | |
| 57 cmd_args += ', ratioCheck = TRUE' | |
| 58 | |
| 59 run_rscript('ffCs <- filterHits({})'.format(cmd_args)) | |
| 60 logging.debug("ffCs\n{}\n".format(R['ffCs'])) | |
| 61 | |
| 62 cds_args = ('coordinates=ffCs@CDS, riboDat=riboDat, min5p={min5p}, ' | |
| 63 'max5p={max5p}, min3p={min3p}, max3p={max3p}'.format(**options)) | |
| 64 | |
| 65 if options['cap']: | |
| 66 cds_args += ', cap={cap}'.format(**options) | |
| 67 | |
| 68 if options['plot_title']: | |
| 69 cds_args += ', main={plot_title}'.format(**options) | |
| 70 | |
| 71 html = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> | |
| 72 <html> | |
| 73 <head> | |
| 74 <title>Metagene Analysis - Report</title> | |
| 75 </head> | |
| 76 <body> | |
| 77 """ | |
| 78 html += '<h2>Metagene analysis - results</h2>\n<hr>\n' | |
| 79 html += ('<p>\nLengths of footprints used in analysis - <strong>' | |
| 80 '<code>{0}</code></strong><br>\nLengths of footprints ' | |
| 81 'selected for the plot - <strong><code>{1}</code></strong>' | |
| 82 '\n</p>\n'.format(selected_lengths, plot_lengths)) | |
| 83 for count, length in enumerate(options['plot_lengths']): | |
| 84 count += 1 | |
| 85 html += '<h3>Length: {0}</h3>\n'.format(length) | |
| 86 plot_file = os.path.join(output_path, | |
| 87 'Metagene-analysis-plot{0}'.format(count)) | |
| 88 for fmat in ('pdf', 'png'): | |
| 89 if fmat == 'png': | |
| 90 cmd = 'png(file="{0}_%1d.png", type="cairo")' | |
| 91 else: | |
| 92 cmd = 'pdf(file="{0}.pdf")' | |
| 93 run_rscript(cmd.format(plot_file)) | |
| 94 run_rscript('plotCDS({0},{1})'.format( | |
| 95 cds_args, 'lengths={}'.format(length))) | |
| 96 run_rscript('dev.off()') | |
| 97 for image in sorted( | |
| 98 glob.glob('{}*.png'.format(plot_file))): | |
| 99 html += '<p><img border="1" src="{0}" alt="{0}"></p>\n'.format( | |
| 100 os.path.basename(image)) | |
| 101 html += '<p><a href="{0}.pdf">PDF version</a></p>\n'.format( | |
| 102 os.path.basename(plot_file)) | |
| 103 run_rscript('save("ffCs", "riboDat", "fastaCDS", file="{}", ' | |
| 104 'compress=FALSE)'.format(rdata_save)) | |
| 105 | |
| 106 logging.debug('\n{:#^80}\n{}\n{:#^80}\n'.format( | |
| 107 ' R script for this session ', rscript, ' End R script ')) | |
| 108 | |
| 109 with open(os.path.join(output_path, 'metagene.R'), 'w') as r: | |
| 110 r.write(rscript) | |
| 111 | |
| 112 html += ('<h4>R script for this session</h4>\n' | |
| 113 '<p><a href="metagene.R">metagene.R</a></p>\n' | |
| 114 '<p>Next step: <em>Plot Ribosome profile</em></p>\n' | |
| 115 '</body>\n</html>\n') | |
| 116 | |
| 117 with open(html_file, 'w') as f: | |
| 118 f.write(html) | |
| 119 | |
| 120 if __name__ == '__main__': | |
| 121 | |
| 122 parser = argparse.ArgumentParser(description='Metagene analysis') | |
| 123 | |
| 124 # required arguments | |
| 125 flags = parser.add_argument_group('required arguments') | |
| 126 flags.add_argument('--rdata_load', required=True, | |
| 127 help='Saved riboSeqR data from Periodicity step.') | |
| 128 flags.add_argument('--selected_lengths', required=True, | |
| 129 help='Select frame lengths to filter. Comma-separated', | |
| 130 default='27') | |
| 131 flags.add_argument( | |
| 132 '--selected_frames', required=True, | |
| 133 help='Select frames corresponding to frame lengths. Comma-separated') | |
| 134 | |
| 135 flags.add_argument( | |
| 136 '--hit_mean', required=True, | |
| 137 help='Mean number of hits within the replicate group for filtering', | |
| 138 default='10') | |
| 139 | |
| 140 flags.add_argument( | |
| 141 '--unique_hit_mean', required=True, | |
| 142 help='Mean number of unique sequences within the replicate group ' | |
| 143 'for filtering', default='1') | |
| 144 | |
| 145 parser.add_argument( | |
| 146 '--rdata_save', help='File to write R data to (default: %(default)s)', | |
| 147 default='Metagene.rda') | |
| 148 | |
| 149 parser.add_argument( | |
| 150 '--ratio_check', | |
| 151 help='Check the ratios of the expected phase to maximal phase ' | |
| 152 'within the putative coding sequence (default: %(default)s)', | |
| 153 choices=['TRUE', 'FALSE'], default='TRUE') | |
| 154 | |
| 155 parser.add_argument( | |
| 156 '--plot_lengths', | |
| 157 help='Length of footprints to be plotted. Multiple values should be ' | |
| 158 'comma-separated. In that case, multiple plots will be produced' | |
| 159 '(default: %(default)s)', default='27') | |
| 160 | |
| 161 parser.add_argument( | |
| 162 '--min5p', | |
| 163 help='The distance upstream of the translation start to be plotted ' | |
| 164 '(default: %(default)s)', default='-20') | |
| 165 | |
| 166 parser.add_argument( | |
| 167 '--max5p', | |
| 168 help='The distance downstream of the translation start to be plotted ' | |
| 169 '(default: %(default)s)', default='200') | |
| 170 | |
| 171 parser.add_argument( | |
| 172 '--min3p', | |
| 173 help='The distance upstream of the translation end to be plotted ' | |
| 174 '(default: %(default)s)', default='-200') | |
| 175 | |
| 176 parser.add_argument( | |
| 177 '--max3p', | |
| 178 help='The distance downtream of the translation end to be plotted ' | |
| 179 '(default: %(default)s)', default='20') | |
| 180 | |
| 181 parser.add_argument( | |
| 182 '--cap', help='If given, caps the height of plotted values ' | |
| 183 '(default: %(default)s)') | |
| 184 | |
| 185 parser.add_argument('--plot_title', help='Title of the plot', default='') | |
| 186 parser.add_argument('--html_file', help='HTML file with reports') | |
| 187 parser.add_argument('--output_path', help='Directory to save output files') | |
| 188 parser.add_argument( | |
| 189 '--debug', help='Produce debug output', action='store_true') | |
| 190 | |
| 191 args = parser.parse_args() | |
| 192 if args.debug: | |
| 193 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s', | |
| 194 level=logging.DEBUG, stream=sys.stdout) | |
| 195 logging.debug('Supplied Arguments\n{}\n'.format(vars(args))) | |
| 196 | |
| 197 if not os.path.exists(args.output_path): | |
| 198 os.mkdir(args.output_path) | |
| 199 | |
| 200 do_analysis( | |
| 201 rdata_load=args.rdata_load, selected_lengths=args.selected_lengths, | |
| 202 selected_frames=args.selected_frames, hit_mean=args.hit_mean, | |
| 203 unique_hit_mean=args.unique_hit_mean, ratio_check=args.ratio_check, | |
| 204 min5p=args.min5p, max5p=args.max5p, min3p=args.min3p, max3p=args.max3p, | |
| 205 cap=args.cap, plot_title=args.plot_title, | |
| 206 plot_lengths=args.plot_lengths, rdata_save=args.rdata_save, | |
| 207 html_file=args.html_file, output_path=args.output_path) | |
| 208 | |
| 209 logging.debug('Done!') |
