Mercurial > repos > biotechcoder > riboseqr_wrapper
diff riboseqr/triplet.py @ 0:e01de823e919 draft default tip
Uploaded
author | biotechcoder |
---|---|
date | Fri, 01 May 2015 05:41:51 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/riboseqr/triplet.py Fri May 01 05:41:51 2015 -0400 @@ -0,0 +1,157 @@ +#!/usr/bin/env python +import os +import sys +import argparse +import logging +import rpy2.robjects as robjects + +import utils + +rscript = '' +R = robjects.r + + +def run_rscript(command=None): + """Run R command, log it, append to rscript""" + global rscript + if not command: + return + logging.debug(command) + rscript += '{}\n'.format(command) + R(command) + + +def find_periodicity( + rdata_load='Prepare.rda', start_codons='ATG', stop_codons='TAG,TAA,TGA', + fasta_file=None, include_lengths='25:30', analyze_plot_lengths='26:30', + text_legend='Frame 0, Frame 1, Frame 2', rdata_save='Periodicity.rda', + html_file='Periodicity-report.html', output_path=os.getcwd()): + """Plot triplet periodicity from prepared R data file. """ + logging.debug('{}'.format(R('sessionInfo()'))) + cmd = 'library(riboSeqR)' + run_rscript(cmd) + + logging.debug('Loading saved R data file') + cmd = 'load("{}")'.format(rdata_load) + run_rscript(cmd) + + # R("""options(showTailLines=Inf)""") + starts, stops = (utils.process_args(start_codons, ret_mode='charvector'), + utils.process_args(stop_codons, ret_mode='charvector')) + + cmd = ('fastaCDS <- findCDS(fastaFile={0!r}, startCodon={1}, ' + 'stopCodon={2})'.format(fasta_file, starts, stops)) + run_rscript(cmd) + + logging.info('Potential coding sequences using start codon (ATG) and stop ' + 'codons TAG, TAA, TGA') + logging.info('{}\n'.format(R['fastaCDS'])) + + cmd = """fCs <- frameCounting(riboDat, fastaCDS, lengths={0}) + fS <- readingFrame(rC=fCs, lengths={1}); fS""".\ + format(include_lengths, analyze_plot_lengths) + run_rscript(cmd) + + logging.debug('riboDat \n{}\n'.format(R['riboDat'])) + logging.debug('fCs\n{0}\n'.format(R['fCs'])) + logging.info('Reading frames for each n-mer\n{}'.format(R['fS'])) + + legend = utils.process_args(text_legend, ret_mode='charvector') + + for fmat in ('pdf', 'png'): + run_rscript('{0}(file="{1}")'.format(fmat, os.path.join( + output_path, '{0}.{1}'.format('Periodicity-plot', fmat)))) + run_rscript('plotFS(fS, legend.text = {0})'.format(legend)) + run_rscript('dev.off()') + + run_rscript('save("fCs", "fS", "riboDat", "fastaCDS", ' + 'file="{}", compress=FALSE)'.format(rdata_save)) + + html = '<h2>Triplet periodicity - results</h2><hr>' + html += ('<h4>Results of reading frame analysis</h4>' + '<pre>{}</pre><br>'.format(R['fS'])) + html += ('<p>Lengths used for reading frame analysis - <code>{0}</code>' + '<br>Lengths selected for the plot - <code>{1}</code>' + '</p>'.format(include_lengths, analyze_plot_lengths)) + html += ('<p><img src="Periodicity-plot.png" border="1" ' + 'alt="Triplet periodicity plot" />' + '<br><a href="Periodicity-plot.pdf">PDF version</a></p>') + + logging.info('\n{:#^80}\n{}\n{:#^80}\n'.format( + ' R script for this session ', rscript, ' End R script ')) + + with open(os.path.join(output_path, 'periodicity.R'), 'w') as r: + r.write(rscript) + + html += ('<h4>R script for this session</h4>' + '<p><a href="periodicity.R">periodicity.R</a></p>' + '<p>Next step: <em>Metagene analysis</em></p>') + + with open(html_file, 'w') as f: + f.write(html) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description='Plot triplet periodicity for different read lengths.') + + # required arguments + flags = parser.add_argument_group('required arguments') + flags.add_argument( + '--rdata_load', required=True, + help='riboSeqR data from input preparation step (Prepare.rda)') + flags.add_argument( + '--fasta_file', required=True, + help='FASTA file of the reference transcriptome') + + # optional arguments + parser.add_argument( + '--start_codons', + help='Start codon(s) to use. (default: %(default)s)', default='ATG') + parser.add_argument( + '--stop_codons', help='Stop codon(s) to use. (default: %(default)s)', + default='TAG, TAA, TGA') + parser.add_argument( + '--include_lengths', + help='Lengths of ribosome footprints to be included ' + '(default: %(default)s)', default='25:30') + parser.add_argument( + '--analyze_plot_lengths', + help='Lenghts of reads to be analyzed for frame-shift and plotting ' + '(default: %(default)s)', default='26:30') + parser.add_argument( + '--text_legend', + help='Text for legend used in the plot (default: %(default)s)', + default='Frame 0, Frame 1, Frame 2') + parser.add_argument( + '--rdata_save', help='File to write RData to (default: %(default)s)', + default='Periodicity.rda') + parser.add_argument('--html_file', help='Output file for results (HTML)') + parser.add_argument('--output_path', + help='Files are saved in this directory') + parser.add_argument( + '--debug', help='Produce debug output', action='store_true') + + args = parser.parse_args() + if args.debug: + level = logging.DEBUG + else: + level = logging.INFO + + logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s', + level=level, stream=sys.stdout) + logging.info('Supplied Arguments\n{}\n'.format(vars(args))) + + if not os.path.exists(args.output_path): + os.mkdir(args.output_path) + + find_periodicity( + rdata_load=args.rdata_load, start_codons=args.start_codons, + stop_codons=args.stop_codons, fasta_file=args.fasta_file, + include_lengths=args.include_lengths, + analyze_plot_lengths=args.analyze_plot_lengths, + text_legend=args.text_legend, + rdata_save=args.rdata_save, html_file=args.html_file, + output_path=args.output_path) +logging.info("Done!")