annotate riboseqr/metagene.py @ 0:e01de823e919 draft default tip

Uploaded
author biotechcoder
date Fri, 01 May 2015 05:41:51 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
1 #!/usr/bin/env python
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
2 import os
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
3 import sys
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
4 import argparse
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
5 import logging
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
6 import rpy2.robjects as robjects
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
7
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
8 import utils
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
9
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
10 rscript = ''
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
11 R = robjects.r
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
12
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
13
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
14 def run_rscript(command=None):
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
15 """Run R command, log it, append to rscript"""
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
16 global rscript
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
17 if not command:
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
18 return
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
19 logging.debug(command)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
20 rscript += '{}\n'.format(command)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
21 R(command)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
22
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
23
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
24 def do_analysis(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
25 rdata_load='Periodicity.rda', selected_lengths='27',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
26 selected_frames='', hit_mean='10', unique_hit_mean='1',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
27 ratio_check='TRUE', min5p='-20', max5p='200', min3p='-200', max3p='20',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
28 cap='', plot_title='', plot_lengths='27', rdata_save='Metagene.rda',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
29 html_file='Metagene-report.html', output_path=os.getcwd()):
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
30 """Metagene analysis from saved periodicity R data file. """
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
31 run_rscript('library(riboSeqR)')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
32 run_rscript('load("{}")'.format(rdata_load))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
33
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
34 logging.debug('fS\n{}\nfCs\n{}\n'.format(R['fS'], R['fCs']))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
35 options = {}
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
36 for key, value, rtype, rmode in (
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
37 ('lengths', selected_lengths, 'int', 'charvector'),
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
38 ('frames', selected_frames, 'int', 'listvector'),
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
39 ('hit_mean', hit_mean, 'int', None),
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
40 ('unique_hit_mean', unique_hit_mean, 'int', None),
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
41 ('ratio_check', ratio_check, 'bool', None),
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
42 ('min5p', min5p, 'int', None), ('max5p', max5p, 'int', None),
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
43 ('min3p', min3p, 'int', None), ('max3p', max3p, 'int', None),
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
44 ('cap', cap, 'int', None),
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
45 ('plot_title', plot_title, 'str', 'charvector'),
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
46 ('plot_lengths', plot_lengths, 'int', 'list')):
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
47 options[key] = utils.process_args(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
48 value, ret_type=rtype, ret_mode=rmode)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
49
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
50 cmd_args = """fCs, lengths={lengths},
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
51 frames={frames}, hitMean={hit_mean},
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
52 unqhitMean={unique_hit_mean}, fS=fS""".format(**options)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
53
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
54 if ratio_check == 'TRUE':
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
55 cmd_args += ', ratioCheck = TRUE'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
56
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
57 run_rscript('ffCs <- filterHits({})'.format(cmd_args))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
58 logging.debug("ffCs\n{}\n".format(R['ffCs']))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
59
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
60 cds_args = ('coordinates=ffCs@CDS, riboDat=riboDat, min5p={min5p}, '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
61 'max5p={max5p}, min3p={min3p}, max3p={max3p}'.format(**options))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
62
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
63 if options['cap']:
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
64 cds_args += ', cap={cap}'.format(**options)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
65
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
66 if options['plot_title']:
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
67 cds_args += ', main={plot_title}'.format(**options)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
68
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
69 html = '<h2>Metagene analysis - results</h2><hr>'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
70 html += ('<p>Lengths of footprints used in analysis - <strong>'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
71 '<code>{0}</code></strong><br>Lengths of footprints selected for '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
72 'the plot - <strong><code>{1}</code></strong>'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
73 '</p>'.format(selected_lengths, plot_lengths))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
74 print 'plot lengths', options['plot_lengths']
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
75 for count, length in enumerate(options['plot_lengths']):
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
76 count += 1
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
77 plot_file = 'Metagene-analysis-plot{0}'.format(count)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
78 for fmat in ('pdf', 'png'):
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
79 run_rscript('{0}(file="{1}")'.format(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
80 fmat, os.path.join(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
81 output_path, '{0}.{1}'.format(plot_file, fmat))))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
82 run_rscript('plotCDS({0},{1})'.format(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
83 cds_args, 'lengths={}'.format(length)))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
84 run_rscript('dev.off()')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
85
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
86 html += ('<h4>Length: {0}</h4><p><img border="1" src="{1}.png" '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
87 'alt="Metagene analysis plot" /><br><a href="{1}.pdf">'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
88 'PDF version</a></p>'.format(length, plot_file))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
89 run_rscript(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
90 'save("ffCs", "riboDat", "fastaCDS", file="{}", compress=FALSE)'.format(rdata_save))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
91
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
92 logging.info('\n{:#^80}\n{}\n{:#^80}\n'.format(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
93 ' R script for this session ', rscript, ' End R script '))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
94
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
95 with open(os.path.join(output_path, 'metagene.R'), 'w') as r:
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
96 r.write(rscript)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
97
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
98 html += ('<h4>R script for this session</h4>'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
99 '<p><a href="metagene.R">metagene.R</a></p>'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
100 '<p>Next step: <em>Plot Ribosome profile</em></p>')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
101
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
102 with open(html_file, 'w') as f:
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
103 f.write(html)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
104
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
105 if __name__ == '__main__':
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
106
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
107 parser = argparse.ArgumentParser(description='Metagene analysis')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
108
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
109 # required arguments
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
110 flags = parser.add_argument_group('required arguments')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
111 flags.add_argument('--rdata_load', required=True,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
112 help='Saved riboSeqR data from Periodicity step.')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
113 flags.add_argument('--selected_lengths', required=True,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
114 help='Select frame lengths to filter. Comma-separated',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
115 default='27')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
116 flags.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
117 '--selected_frames', required=True,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
118 help='Select frames corresponding to frame lengths. Comma-separated')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
119
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
120 flags.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
121 '--hit_mean', required=True,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
122 help='Mean number of hits within the replicate group for filtering',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
123 default='10')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
124
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
125 flags.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
126 '--unique_hit_mean', required=True,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
127 help='Mean number of unique sequences within the replicate group '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
128 'for filtering', default='1')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
129
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
130 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
131 '--rdata_save', help='File to write R data to (default: %(default)s)',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
132 default='Metagene.rda')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
133
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
134 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
135 '--ratio_check',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
136 help='Check the ratios of the expected phase to maximal phase '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
137 'within the putative coding sequence (default: %(default)s)',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
138 choices=['TRUE', 'FALSE'], default='TRUE')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
139
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
140 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
141 '--plot_lengths',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
142 help='Length of footprints to be plotted. Multiple values should be '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
143 'comma-separated. In that case, multiple plots will be produced'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
144 '(default: %(default)s)', default='27')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
145
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
146 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
147 '--min5p',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
148 help='The distance upstream of the translation start to be plotted '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
149 '(default: %(default)s)', default='-20')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
150
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
151 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
152 '--max5p',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
153 help='The distance downstream of the translation start to be plotted '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
154 '(default: %(default)s)', default='200')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
155
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
156 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
157 '--min3p',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
158 help='The distance upstream of the translation end to be plotted '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
159 '(default: %(default)s)', default='-200')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
160
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
161 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
162 '--max3p',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
163 help='The distance downtream of the translation end to be plotted '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
164 '(default: %(default)s)', default='20')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
165
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
166 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
167 '--cap', help='If given, caps the height of plotted values '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
168 '(default: %(default)s)')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
169
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
170 parser.add_argument('--plot_title', help='Title of the plot', default='')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
171 parser.add_argument('--html_file', help='HTML file with reports')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
172 parser.add_argument('--output_path', help='Directory to save output files')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
173 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
174 '--debug', help='Produce debug output', action='store_true')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
175
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
176 args = parser.parse_args()
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
177 if args.debug:
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
178 level = logging.DEBUG
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
179 else:
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
180 level = logging.INFO
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
181
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
182 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
183 level=level, stream=sys.stdout)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
184 logging.debug('Supplied Arguments\n{}\n'.format(vars(args)))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
185
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
186 if not os.path.exists(args.output_path):
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
187 os.mkdir(args.output_path)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
188
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
189 do_analysis(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
190 rdata_load=args.rdata_load, selected_lengths=args.selected_lengths,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
191 selected_frames=args.selected_frames, hit_mean=args.hit_mean,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
192 unique_hit_mean=args.unique_hit_mean, ratio_check=args.ratio_check,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
193 min5p=args.min5p, max5p=args.max5p, min3p=args.min3p, max3p=args.max3p,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
194 cap=args.cap, plot_title=args.plot_title,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
195 plot_lengths=args.plot_lengths, rdata_save=args.rdata_save,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
196 html_file=args.html_file, output_path=args.output_path)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
197
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
198 logging.info('Done!')