Mercurial > repos > biotechcoder > riboseqr_wrapper
comparison riboseqr/metagene.py @ 0:e01de823e919 draft default tip
Uploaded
author | biotechcoder |
---|---|
date | Fri, 01 May 2015 05:41:51 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e01de823e919 |
---|---|
1 #!/usr/bin/env python | |
2 import os | |
3 import sys | |
4 import argparse | |
5 import logging | |
6 import rpy2.robjects as robjects | |
7 | |
8 import utils | |
9 | |
10 rscript = '' | |
11 R = robjects.r | |
12 | |
13 | |
14 def run_rscript(command=None): | |
15 """Run R command, log it, append to rscript""" | |
16 global rscript | |
17 if not command: | |
18 return | |
19 logging.debug(command) | |
20 rscript += '{}\n'.format(command) | |
21 R(command) | |
22 | |
23 | |
24 def do_analysis( | |
25 rdata_load='Periodicity.rda', selected_lengths='27', | |
26 selected_frames='', hit_mean='10', unique_hit_mean='1', | |
27 ratio_check='TRUE', min5p='-20', max5p='200', min3p='-200', max3p='20', | |
28 cap='', plot_title='', plot_lengths='27', rdata_save='Metagene.rda', | |
29 html_file='Metagene-report.html', output_path=os.getcwd()): | |
30 """Metagene analysis from saved periodicity R data file. """ | |
31 run_rscript('library(riboSeqR)') | |
32 run_rscript('load("{}")'.format(rdata_load)) | |
33 | |
34 logging.debug('fS\n{}\nfCs\n{}\n'.format(R['fS'], R['fCs'])) | |
35 options = {} | |
36 for key, value, rtype, rmode in ( | |
37 ('lengths', selected_lengths, 'int', 'charvector'), | |
38 ('frames', selected_frames, 'int', 'listvector'), | |
39 ('hit_mean', hit_mean, 'int', None), | |
40 ('unique_hit_mean', unique_hit_mean, 'int', None), | |
41 ('ratio_check', ratio_check, 'bool', None), | |
42 ('min5p', min5p, 'int', None), ('max5p', max5p, 'int', None), | |
43 ('min3p', min3p, 'int', None), ('max3p', max3p, 'int', None), | |
44 ('cap', cap, 'int', None), | |
45 ('plot_title', plot_title, 'str', 'charvector'), | |
46 ('plot_lengths', plot_lengths, 'int', 'list')): | |
47 options[key] = utils.process_args( | |
48 value, ret_type=rtype, ret_mode=rmode) | |
49 | |
50 cmd_args = """fCs, lengths={lengths}, | |
51 frames={frames}, hitMean={hit_mean}, | |
52 unqhitMean={unique_hit_mean}, fS=fS""".format(**options) | |
53 | |
54 if ratio_check == 'TRUE': | |
55 cmd_args += ', ratioCheck = TRUE' | |
56 | |
57 run_rscript('ffCs <- filterHits({})'.format(cmd_args)) | |
58 logging.debug("ffCs\n{}\n".format(R['ffCs'])) | |
59 | |
60 cds_args = ('coordinates=ffCs@CDS, riboDat=riboDat, min5p={min5p}, ' | |
61 'max5p={max5p}, min3p={min3p}, max3p={max3p}'.format(**options)) | |
62 | |
63 if options['cap']: | |
64 cds_args += ', cap={cap}'.format(**options) | |
65 | |
66 if options['plot_title']: | |
67 cds_args += ', main={plot_title}'.format(**options) | |
68 | |
69 html = '<h2>Metagene analysis - results</h2><hr>' | |
70 html += ('<p>Lengths of footprints used in analysis - <strong>' | |
71 '<code>{0}</code></strong><br>Lengths of footprints selected for ' | |
72 'the plot - <strong><code>{1}</code></strong>' | |
73 '</p>'.format(selected_lengths, plot_lengths)) | |
74 print 'plot lengths', options['plot_lengths'] | |
75 for count, length in enumerate(options['plot_lengths']): | |
76 count += 1 | |
77 plot_file = 'Metagene-analysis-plot{0}'.format(count) | |
78 for fmat in ('pdf', 'png'): | |
79 run_rscript('{0}(file="{1}")'.format( | |
80 fmat, os.path.join( | |
81 output_path, '{0}.{1}'.format(plot_file, fmat)))) | |
82 run_rscript('plotCDS({0},{1})'.format( | |
83 cds_args, 'lengths={}'.format(length))) | |
84 run_rscript('dev.off()') | |
85 | |
86 html += ('<h4>Length: {0}</h4><p><img border="1" src="{1}.png" ' | |
87 'alt="Metagene analysis plot" /><br><a href="{1}.pdf">' | |
88 'PDF version</a></p>'.format(length, plot_file)) | |
89 run_rscript( | |
90 'save("ffCs", "riboDat", "fastaCDS", file="{}", compress=FALSE)'.format(rdata_save)) | |
91 | |
92 logging.info('\n{:#^80}\n{}\n{:#^80}\n'.format( | |
93 ' R script for this session ', rscript, ' End R script ')) | |
94 | |
95 with open(os.path.join(output_path, 'metagene.R'), 'w') as r: | |
96 r.write(rscript) | |
97 | |
98 html += ('<h4>R script for this session</h4>' | |
99 '<p><a href="metagene.R">metagene.R</a></p>' | |
100 '<p>Next step: <em>Plot Ribosome profile</em></p>') | |
101 | |
102 with open(html_file, 'w') as f: | |
103 f.write(html) | |
104 | |
105 if __name__ == '__main__': | |
106 | |
107 parser = argparse.ArgumentParser(description='Metagene analysis') | |
108 | |
109 # required arguments | |
110 flags = parser.add_argument_group('required arguments') | |
111 flags.add_argument('--rdata_load', required=True, | |
112 help='Saved riboSeqR data from Periodicity step.') | |
113 flags.add_argument('--selected_lengths', required=True, | |
114 help='Select frame lengths to filter. Comma-separated', | |
115 default='27') | |
116 flags.add_argument( | |
117 '--selected_frames', required=True, | |
118 help='Select frames corresponding to frame lengths. Comma-separated') | |
119 | |
120 flags.add_argument( | |
121 '--hit_mean', required=True, | |
122 help='Mean number of hits within the replicate group for filtering', | |
123 default='10') | |
124 | |
125 flags.add_argument( | |
126 '--unique_hit_mean', required=True, | |
127 help='Mean number of unique sequences within the replicate group ' | |
128 'for filtering', default='1') | |
129 | |
130 parser.add_argument( | |
131 '--rdata_save', help='File to write R data to (default: %(default)s)', | |
132 default='Metagene.rda') | |
133 | |
134 parser.add_argument( | |
135 '--ratio_check', | |
136 help='Check the ratios of the expected phase to maximal phase ' | |
137 'within the putative coding sequence (default: %(default)s)', | |
138 choices=['TRUE', 'FALSE'], default='TRUE') | |
139 | |
140 parser.add_argument( | |
141 '--plot_lengths', | |
142 help='Length of footprints to be plotted. Multiple values should be ' | |
143 'comma-separated. In that case, multiple plots will be produced' | |
144 '(default: %(default)s)', default='27') | |
145 | |
146 parser.add_argument( | |
147 '--min5p', | |
148 help='The distance upstream of the translation start to be plotted ' | |
149 '(default: %(default)s)', default='-20') | |
150 | |
151 parser.add_argument( | |
152 '--max5p', | |
153 help='The distance downstream of the translation start to be plotted ' | |
154 '(default: %(default)s)', default='200') | |
155 | |
156 parser.add_argument( | |
157 '--min3p', | |
158 help='The distance upstream of the translation end to be plotted ' | |
159 '(default: %(default)s)', default='-200') | |
160 | |
161 parser.add_argument( | |
162 '--max3p', | |
163 help='The distance downtream of the translation end to be plotted ' | |
164 '(default: %(default)s)', default='20') | |
165 | |
166 parser.add_argument( | |
167 '--cap', help='If given, caps the height of plotted values ' | |
168 '(default: %(default)s)') | |
169 | |
170 parser.add_argument('--plot_title', help='Title of the plot', default='') | |
171 parser.add_argument('--html_file', help='HTML file with reports') | |
172 parser.add_argument('--output_path', help='Directory to save output files') | |
173 parser.add_argument( | |
174 '--debug', help='Produce debug output', action='store_true') | |
175 | |
176 args = parser.parse_args() | |
177 if args.debug: | |
178 level = logging.DEBUG | |
179 else: | |
180 level = logging.INFO | |
181 | |
182 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s', | |
183 level=level, stream=sys.stdout) | |
184 logging.debug('Supplied Arguments\n{}\n'.format(vars(args))) | |
185 | |
186 if not os.path.exists(args.output_path): | |
187 os.mkdir(args.output_path) | |
188 | |
189 do_analysis( | |
190 rdata_load=args.rdata_load, selected_lengths=args.selected_lengths, | |
191 selected_frames=args.selected_frames, hit_mean=args.hit_mean, | |
192 unique_hit_mean=args.unique_hit_mean, ratio_check=args.ratio_check, | |
193 min5p=args.min5p, max5p=args.max5p, min3p=args.min3p, max3p=args.max3p, | |
194 cap=args.cap, plot_title=args.plot_title, | |
195 plot_lengths=args.plot_lengths, rdata_save=args.rdata_save, | |
196 html_file=args.html_file, output_path=args.output_path) | |
197 | |
198 logging.info('Done!') |