Mercurial > repos > biotechcoder > riboseqr_wrapper
diff riboseqr/metagene.py @ 0:e01de823e919 draft default tip
Uploaded
author | biotechcoder |
---|---|
date | Fri, 01 May 2015 05:41:51 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/riboseqr/metagene.py Fri May 01 05:41:51 2015 -0400 @@ -0,0 +1,198 @@ +#!/usr/bin/env python +import os +import sys +import argparse +import logging +import rpy2.robjects as robjects + +import utils + +rscript = '' +R = robjects.r + + +def run_rscript(command=None): + """Run R command, log it, append to rscript""" + global rscript + if not command: + return + logging.debug(command) + rscript += '{}\n'.format(command) + R(command) + + +def do_analysis( + rdata_load='Periodicity.rda', selected_lengths='27', + selected_frames='', hit_mean='10', unique_hit_mean='1', + ratio_check='TRUE', min5p='-20', max5p='200', min3p='-200', max3p='20', + cap='', plot_title='', plot_lengths='27', rdata_save='Metagene.rda', + html_file='Metagene-report.html', output_path=os.getcwd()): + """Metagene analysis from saved periodicity R data file. """ + run_rscript('library(riboSeqR)') + run_rscript('load("{}")'.format(rdata_load)) + + logging.debug('fS\n{}\nfCs\n{}\n'.format(R['fS'], R['fCs'])) + options = {} + for key, value, rtype, rmode in ( + ('lengths', selected_lengths, 'int', 'charvector'), + ('frames', selected_frames, 'int', 'listvector'), + ('hit_mean', hit_mean, 'int', None), + ('unique_hit_mean', unique_hit_mean, 'int', None), + ('ratio_check', ratio_check, 'bool', None), + ('min5p', min5p, 'int', None), ('max5p', max5p, 'int', None), + ('min3p', min3p, 'int', None), ('max3p', max3p, 'int', None), + ('cap', cap, 'int', None), + ('plot_title', plot_title, 'str', 'charvector'), + ('plot_lengths', plot_lengths, 'int', 'list')): + options[key] = utils.process_args( + value, ret_type=rtype, ret_mode=rmode) + + cmd_args = """fCs, lengths={lengths}, + frames={frames}, hitMean={hit_mean}, + unqhitMean={unique_hit_mean}, fS=fS""".format(**options) + + if ratio_check == 'TRUE': + cmd_args += ', ratioCheck = TRUE' + + run_rscript('ffCs <- filterHits({})'.format(cmd_args)) + logging.debug("ffCs\n{}\n".format(R['ffCs'])) + + cds_args = ('coordinates=ffCs@CDS, riboDat=riboDat, min5p={min5p}, ' + 'max5p={max5p}, min3p={min3p}, max3p={max3p}'.format(**options)) + + if options['cap']: + cds_args += ', cap={cap}'.format(**options) + + if options['plot_title']: + cds_args += ', main={plot_title}'.format(**options) + + html = '<h2>Metagene analysis - results</h2><hr>' + html += ('<p>Lengths of footprints used in analysis - <strong>' + '<code>{0}</code></strong><br>Lengths of footprints selected for ' + 'the plot - <strong><code>{1}</code></strong>' + '</p>'.format(selected_lengths, plot_lengths)) + print 'plot lengths', options['plot_lengths'] + for count, length in enumerate(options['plot_lengths']): + count += 1 + plot_file = 'Metagene-analysis-plot{0}'.format(count) + for fmat in ('pdf', 'png'): + run_rscript('{0}(file="{1}")'.format( + fmat, os.path.join( + output_path, '{0}.{1}'.format(plot_file, fmat)))) + run_rscript('plotCDS({0},{1})'.format( + cds_args, 'lengths={}'.format(length))) + run_rscript('dev.off()') + + html += ('<h4>Length: {0}</h4><p><img border="1" src="{1}.png" ' + 'alt="Metagene analysis plot" /><br><a href="{1}.pdf">' + 'PDF version</a></p>'.format(length, plot_file)) + run_rscript( + 'save("ffCs", "riboDat", "fastaCDS", file="{}", compress=FALSE)'.format(rdata_save)) + + logging.info('\n{:#^80}\n{}\n{:#^80}\n'.format( + ' R script for this session ', rscript, ' End R script ')) + + with open(os.path.join(output_path, 'metagene.R'), 'w') as r: + r.write(rscript) + + html += ('<h4>R script for this session</h4>' + '<p><a href="metagene.R">metagene.R</a></p>' + '<p>Next step: <em>Plot Ribosome profile</em></p>') + + with open(html_file, 'w') as f: + f.write(html) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Metagene analysis') + + # required arguments + flags = parser.add_argument_group('required arguments') + flags.add_argument('--rdata_load', required=True, + help='Saved riboSeqR data from Periodicity step.') + flags.add_argument('--selected_lengths', required=True, + help='Select frame lengths to filter. Comma-separated', + default='27') + flags.add_argument( + '--selected_frames', required=True, + help='Select frames corresponding to frame lengths. Comma-separated') + + flags.add_argument( + '--hit_mean', required=True, + help='Mean number of hits within the replicate group for filtering', + default='10') + + flags.add_argument( + '--unique_hit_mean', required=True, + help='Mean number of unique sequences within the replicate group ' + 'for filtering', default='1') + + parser.add_argument( + '--rdata_save', help='File to write R data to (default: %(default)s)', + default='Metagene.rda') + + parser.add_argument( + '--ratio_check', + help='Check the ratios of the expected phase to maximal phase ' + 'within the putative coding sequence (default: %(default)s)', + choices=['TRUE', 'FALSE'], default='TRUE') + + parser.add_argument( + '--plot_lengths', + help='Length of footprints to be plotted. Multiple values should be ' + 'comma-separated. In that case, multiple plots will be produced' + '(default: %(default)s)', default='27') + + parser.add_argument( + '--min5p', + help='The distance upstream of the translation start to be plotted ' + '(default: %(default)s)', default='-20') + + parser.add_argument( + '--max5p', + help='The distance downstream of the translation start to be plotted ' + '(default: %(default)s)', default='200') + + parser.add_argument( + '--min3p', + help='The distance upstream of the translation end to be plotted ' + '(default: %(default)s)', default='-200') + + parser.add_argument( + '--max3p', + help='The distance downtream of the translation end to be plotted ' + '(default: %(default)s)', default='20') + + parser.add_argument( + '--cap', help='If given, caps the height of plotted values ' + '(default: %(default)s)') + + parser.add_argument('--plot_title', help='Title of the plot', default='') + parser.add_argument('--html_file', help='HTML file with reports') + parser.add_argument('--output_path', help='Directory to save output files') + parser.add_argument( + '--debug', help='Produce debug output', action='store_true') + + args = parser.parse_args() + if args.debug: + level = logging.DEBUG + else: + level = logging.INFO + + logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s', + level=level, stream=sys.stdout) + logging.debug('Supplied Arguments\n{}\n'.format(vars(args))) + + if not os.path.exists(args.output_path): + os.mkdir(args.output_path) + + do_analysis( + rdata_load=args.rdata_load, selected_lengths=args.selected_lengths, + selected_frames=args.selected_frames, hit_mean=args.hit_mean, + unique_hit_mean=args.unique_hit_mean, ratio_check=args.ratio_check, + min5p=args.min5p, max5p=args.max5p, min3p=args.min3p, max3p=args.max3p, + cap=args.cap, plot_title=args.plot_title, + plot_lengths=args.plot_lengths, rdata_save=args.rdata_save, + html_file=args.html_file, output_path=args.output_path) + + logging.info('Done!')