Mercurial > repos > biotechcoder > riboseqr_wrapper
comparison riboseqr/triplet.py @ 0:e01de823e919 draft default tip
Uploaded
author | biotechcoder |
---|---|
date | Fri, 01 May 2015 05:41:51 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e01de823e919 |
---|---|
1 #!/usr/bin/env python | |
2 import os | |
3 import sys | |
4 import argparse | |
5 import logging | |
6 import rpy2.robjects as robjects | |
7 | |
8 import utils | |
9 | |
10 rscript = '' | |
11 R = robjects.r | |
12 | |
13 | |
14 def run_rscript(command=None): | |
15 """Run R command, log it, append to rscript""" | |
16 global rscript | |
17 if not command: | |
18 return | |
19 logging.debug(command) | |
20 rscript += '{}\n'.format(command) | |
21 R(command) | |
22 | |
23 | |
24 def find_periodicity( | |
25 rdata_load='Prepare.rda', start_codons='ATG', stop_codons='TAG,TAA,TGA', | |
26 fasta_file=None, include_lengths='25:30', analyze_plot_lengths='26:30', | |
27 text_legend='Frame 0, Frame 1, Frame 2', rdata_save='Periodicity.rda', | |
28 html_file='Periodicity-report.html', output_path=os.getcwd()): | |
29 """Plot triplet periodicity from prepared R data file. """ | |
30 logging.debug('{}'.format(R('sessionInfo()'))) | |
31 cmd = 'library(riboSeqR)' | |
32 run_rscript(cmd) | |
33 | |
34 logging.debug('Loading saved R data file') | |
35 cmd = 'load("{}")'.format(rdata_load) | |
36 run_rscript(cmd) | |
37 | |
38 # R("""options(showTailLines=Inf)""") | |
39 starts, stops = (utils.process_args(start_codons, ret_mode='charvector'), | |
40 utils.process_args(stop_codons, ret_mode='charvector')) | |
41 | |
42 cmd = ('fastaCDS <- findCDS(fastaFile={0!r}, startCodon={1}, ' | |
43 'stopCodon={2})'.format(fasta_file, starts, stops)) | |
44 run_rscript(cmd) | |
45 | |
46 logging.info('Potential coding sequences using start codon (ATG) and stop ' | |
47 'codons TAG, TAA, TGA') | |
48 logging.info('{}\n'.format(R['fastaCDS'])) | |
49 | |
50 cmd = """fCs <- frameCounting(riboDat, fastaCDS, lengths={0}) | |
51 fS <- readingFrame(rC=fCs, lengths={1}); fS""".\ | |
52 format(include_lengths, analyze_plot_lengths) | |
53 run_rscript(cmd) | |
54 | |
55 logging.debug('riboDat \n{}\n'.format(R['riboDat'])) | |
56 logging.debug('fCs\n{0}\n'.format(R['fCs'])) | |
57 logging.info('Reading frames for each n-mer\n{}'.format(R['fS'])) | |
58 | |
59 legend = utils.process_args(text_legend, ret_mode='charvector') | |
60 | |
61 for fmat in ('pdf', 'png'): | |
62 run_rscript('{0}(file="{1}")'.format(fmat, os.path.join( | |
63 output_path, '{0}.{1}'.format('Periodicity-plot', fmat)))) | |
64 run_rscript('plotFS(fS, legend.text = {0})'.format(legend)) | |
65 run_rscript('dev.off()') | |
66 | |
67 run_rscript('save("fCs", "fS", "riboDat", "fastaCDS", ' | |
68 'file="{}", compress=FALSE)'.format(rdata_save)) | |
69 | |
70 html = '<h2>Triplet periodicity - results</h2><hr>' | |
71 html += ('<h4>Results of reading frame analysis</h4>' | |
72 '<pre>{}</pre><br>'.format(R['fS'])) | |
73 html += ('<p>Lengths used for reading frame analysis - <code>{0}</code>' | |
74 '<br>Lengths selected for the plot - <code>{1}</code>' | |
75 '</p>'.format(include_lengths, analyze_plot_lengths)) | |
76 html += ('<p><img src="Periodicity-plot.png" border="1" ' | |
77 'alt="Triplet periodicity plot" />' | |
78 '<br><a href="Periodicity-plot.pdf">PDF version</a></p>') | |
79 | |
80 logging.info('\n{:#^80}\n{}\n{:#^80}\n'.format( | |
81 ' R script for this session ', rscript, ' End R script ')) | |
82 | |
83 with open(os.path.join(output_path, 'periodicity.R'), 'w') as r: | |
84 r.write(rscript) | |
85 | |
86 html += ('<h4>R script for this session</h4>' | |
87 '<p><a href="periodicity.R">periodicity.R</a></p>' | |
88 '<p>Next step: <em>Metagene analysis</em></p>') | |
89 | |
90 with open(html_file, 'w') as f: | |
91 f.write(html) | |
92 | |
93 | |
94 if __name__ == '__main__': | |
95 | |
96 parser = argparse.ArgumentParser( | |
97 description='Plot triplet periodicity for different read lengths.') | |
98 | |
99 # required arguments | |
100 flags = parser.add_argument_group('required arguments') | |
101 flags.add_argument( | |
102 '--rdata_load', required=True, | |
103 help='riboSeqR data from input preparation step (Prepare.rda)') | |
104 flags.add_argument( | |
105 '--fasta_file', required=True, | |
106 help='FASTA file of the reference transcriptome') | |
107 | |
108 # optional arguments | |
109 parser.add_argument( | |
110 '--start_codons', | |
111 help='Start codon(s) to use. (default: %(default)s)', default='ATG') | |
112 parser.add_argument( | |
113 '--stop_codons', help='Stop codon(s) to use. (default: %(default)s)', | |
114 default='TAG, TAA, TGA') | |
115 parser.add_argument( | |
116 '--include_lengths', | |
117 help='Lengths of ribosome footprints to be included ' | |
118 '(default: %(default)s)', default='25:30') | |
119 parser.add_argument( | |
120 '--analyze_plot_lengths', | |
121 help='Lenghts of reads to be analyzed for frame-shift and plotting ' | |
122 '(default: %(default)s)', default='26:30') | |
123 parser.add_argument( | |
124 '--text_legend', | |
125 help='Text for legend used in the plot (default: %(default)s)', | |
126 default='Frame 0, Frame 1, Frame 2') | |
127 parser.add_argument( | |
128 '--rdata_save', help='File to write RData to (default: %(default)s)', | |
129 default='Periodicity.rda') | |
130 parser.add_argument('--html_file', help='Output file for results (HTML)') | |
131 parser.add_argument('--output_path', | |
132 help='Files are saved in this directory') | |
133 parser.add_argument( | |
134 '--debug', help='Produce debug output', action='store_true') | |
135 | |
136 args = parser.parse_args() | |
137 if args.debug: | |
138 level = logging.DEBUG | |
139 else: | |
140 level = logging.INFO | |
141 | |
142 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s', | |
143 level=level, stream=sys.stdout) | |
144 logging.info('Supplied Arguments\n{}\n'.format(vars(args))) | |
145 | |
146 if not os.path.exists(args.output_path): | |
147 os.mkdir(args.output_path) | |
148 | |
149 find_periodicity( | |
150 rdata_load=args.rdata_load, start_codons=args.start_codons, | |
151 stop_codons=args.stop_codons, fasta_file=args.fasta_file, | |
152 include_lengths=args.include_lengths, | |
153 analyze_plot_lengths=args.analyze_plot_lengths, | |
154 text_legend=args.text_legend, | |
155 rdata_save=args.rdata_save, html_file=args.html_file, | |
156 output_path=args.output_path) | |
157 logging.info("Done!") |