comparison riboseqr/metagene.py @ 0:c34c364ce75d

First commit
author Vimalkumar Velayudhan <vimal@biotechcoder.com>
date Tue, 21 Jul 2015 14:48:39 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c34c364ce75d
1 #!/usr/bin/env python
2 import os
3 import sys
4 import glob
5 import argparse
6 import logging
7 import rpy2.robjects as robjects
8
9 import utils
10
11 rscript = ''
12 R = robjects.r
13
14
15 def run_rscript(command=None):
16 """Run R command, log it, append to rscript"""
17 global rscript
18 if not command:
19 return
20 logging.debug(command)
21 rscript += '{}\n'.format(command)
22 output = R(command)
23 return output
24
25
26 def do_analysis(
27 rdata_load='Periodicity.rda', selected_lengths='27',
28 selected_frames='', hit_mean='10', unique_hit_mean='1',
29 ratio_check='TRUE', min5p='-20', max5p='200', min3p='-200', max3p='20',
30 cap='', plot_title='', plot_lengths='27', rdata_save='Metagene.rda',
31 html_file='Metagene-report.html', output_path=os.getcwd()):
32 """Metagene analysis from saved periodicity R data file. """
33 run_rscript('suppressMessages(library(riboSeqR))')
34 run_rscript('load("{}")'.format(rdata_load))
35
36 logging.debug('fS\n{}\nfCs\n{}\n'.format(R['fS'], R['fCs']))
37 options = {}
38 for key, value, rtype, rmode in (
39 ('lengths', selected_lengths, 'int', 'charvector'),
40 ('frames', selected_frames, 'int', 'listvector'),
41 ('hit_mean', hit_mean, 'int', None),
42 ('unique_hit_mean', unique_hit_mean, 'int', None),
43 ('ratio_check', ratio_check, 'bool', None),
44 ('min5p', min5p, 'int', None), ('max5p', max5p, 'int', None),
45 ('min3p', min3p, 'int', None), ('max3p', max3p, 'int', None),
46 ('cap', cap, 'int', None),
47 ('plot_title', plot_title, 'str', 'charvector'),
48 ('plot_lengths', plot_lengths, 'int', 'list')):
49 options[key] = utils.process_args(
50 value, ret_type=rtype, ret_mode=rmode)
51
52 cmd_args = """fCs, lengths={lengths},
53 frames={frames}, hitMean={hit_mean},
54 unqhitMean={unique_hit_mean}, fS=fS""".format(**options)
55
56 if ratio_check == 'TRUE':
57 cmd_args += ', ratioCheck = TRUE'
58
59 run_rscript('ffCs <- filterHits({})'.format(cmd_args))
60 logging.debug("ffCs\n{}\n".format(R['ffCs']))
61
62 cds_args = ('coordinates=ffCs@CDS, riboDat=riboDat, min5p={min5p}, '
63 'max5p={max5p}, min3p={min3p}, max3p={max3p}'.format(**options))
64
65 if options['cap']:
66 cds_args += ', cap={cap}'.format(**options)
67
68 if options['plot_title']:
69 cds_args += ', main={plot_title}'.format(**options)
70
71 html = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
72 <html>
73 <head>
74 <title>Metagene Analysis - Report</title>
75 </head>
76 <body>
77 """
78 html += '<h2>Metagene analysis - results</h2>\n<hr>\n'
79 html += ('<p>\nLengths of footprints used in analysis - <strong>'
80 '<code>{0}</code></strong><br>\nLengths of footprints '
81 'selected for the plot - <strong><code>{1}</code></strong>'
82 '\n</p>\n'.format(selected_lengths, plot_lengths))
83 for count, length in enumerate(options['plot_lengths']):
84 count += 1
85 html += '<h3>Length: {0}</h3>\n'.format(length)
86 plot_file = os.path.join(output_path,
87 'Metagene-analysis-plot{0}'.format(count))
88 for fmat in ('pdf', 'png'):
89 if fmat == 'png':
90 cmd = 'png(file="{0}_%1d.png", type="cairo")'
91 else:
92 cmd = 'pdf(file="{0}.pdf")'
93 run_rscript(cmd.format(plot_file))
94 run_rscript('plotCDS({0},{1})'.format(
95 cds_args, 'lengths={}'.format(length)))
96 run_rscript('dev.off()')
97 for image in sorted(
98 glob.glob('{}*.png'.format(plot_file))):
99 html += '<p><img border="1" src="{0}" alt="{0}"></p>\n'.format(
100 os.path.basename(image))
101 html += '<p><a href="{0}.pdf">PDF version</a></p>\n'.format(
102 os.path.basename(plot_file))
103 run_rscript('save("ffCs", "riboDat", "fastaCDS", file="{}", '
104 'compress=FALSE)'.format(rdata_save))
105
106 logging.debug('\n{:#^80}\n{}\n{:#^80}\n'.format(
107 ' R script for this session ', rscript, ' End R script '))
108
109 with open(os.path.join(output_path, 'metagene.R'), 'w') as r:
110 r.write(rscript)
111
112 html += ('<h4>R script for this session</h4>\n'
113 '<p><a href="metagene.R">metagene.R</a></p>\n'
114 '<p>Next step: <em>Plot Ribosome profile</em></p>\n'
115 '</body>\n</html>\n')
116
117 with open(html_file, 'w') as f:
118 f.write(html)
119
120 if __name__ == '__main__':
121
122 parser = argparse.ArgumentParser(description='Metagene analysis')
123
124 # required arguments
125 flags = parser.add_argument_group('required arguments')
126 flags.add_argument('--rdata_load', required=True,
127 help='Saved riboSeqR data from Periodicity step.')
128 flags.add_argument('--selected_lengths', required=True,
129 help='Select frame lengths to filter. Comma-separated',
130 default='27')
131 flags.add_argument(
132 '--selected_frames', required=True,
133 help='Select frames corresponding to frame lengths. Comma-separated')
134
135 flags.add_argument(
136 '--hit_mean', required=True,
137 help='Mean number of hits within the replicate group for filtering',
138 default='10')
139
140 flags.add_argument(
141 '--unique_hit_mean', required=True,
142 help='Mean number of unique sequences within the replicate group '
143 'for filtering', default='1')
144
145 parser.add_argument(
146 '--rdata_save', help='File to write R data to (default: %(default)s)',
147 default='Metagene.rda')
148
149 parser.add_argument(
150 '--ratio_check',
151 help='Check the ratios of the expected phase to maximal phase '
152 'within the putative coding sequence (default: %(default)s)',
153 choices=['TRUE', 'FALSE'], default='TRUE')
154
155 parser.add_argument(
156 '--plot_lengths',
157 help='Length of footprints to be plotted. Multiple values should be '
158 'comma-separated. In that case, multiple plots will be produced'
159 '(default: %(default)s)', default='27')
160
161 parser.add_argument(
162 '--min5p',
163 help='The distance upstream of the translation start to be plotted '
164 '(default: %(default)s)', default='-20')
165
166 parser.add_argument(
167 '--max5p',
168 help='The distance downstream of the translation start to be plotted '
169 '(default: %(default)s)', default='200')
170
171 parser.add_argument(
172 '--min3p',
173 help='The distance upstream of the translation end to be plotted '
174 '(default: %(default)s)', default='-200')
175
176 parser.add_argument(
177 '--max3p',
178 help='The distance downtream of the translation end to be plotted '
179 '(default: %(default)s)', default='20')
180
181 parser.add_argument(
182 '--cap', help='If given, caps the height of plotted values '
183 '(default: %(default)s)')
184
185 parser.add_argument('--plot_title', help='Title of the plot', default='')
186 parser.add_argument('--html_file', help='HTML file with reports')
187 parser.add_argument('--output_path', help='Directory to save output files')
188 parser.add_argument(
189 '--debug', help='Produce debug output', action='store_true')
190
191 args = parser.parse_args()
192 if args.debug:
193 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s',
194 level=logging.DEBUG, stream=sys.stdout)
195 logging.debug('Supplied Arguments\n{}\n'.format(vars(args)))
196
197 if not os.path.exists(args.output_path):
198 os.mkdir(args.output_path)
199
200 do_analysis(
201 rdata_load=args.rdata_load, selected_lengths=args.selected_lengths,
202 selected_frames=args.selected_frames, hit_mean=args.hit_mean,
203 unique_hit_mean=args.unique_hit_mean, ratio_check=args.ratio_check,
204 min5p=args.min5p, max5p=args.max5p, min3p=args.min3p, max3p=args.max3p,
205 cap=args.cap, plot_title=args.plot_title,
206 plot_lengths=args.plot_lengths, rdata_save=args.rdata_save,
207 html_file=args.html_file, output_path=args.output_path)
208
209 logging.debug('Done!')