annotate riboseqr/triplet.py @ 0:e01de823e919 draft default tip

Uploaded
author biotechcoder
date Fri, 01 May 2015 05:41:51 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
1 #!/usr/bin/env python
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
2 import os
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
3 import sys
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
4 import argparse
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
5 import logging
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
6 import rpy2.robjects as robjects
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
7
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
8 import utils
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
9
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
10 rscript = ''
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
11 R = robjects.r
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
12
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
13
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
14 def run_rscript(command=None):
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
15 """Run R command, log it, append to rscript"""
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
16 global rscript
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
17 if not command:
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
18 return
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
19 logging.debug(command)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
20 rscript += '{}\n'.format(command)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
21 R(command)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
22
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
23
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
24 def find_periodicity(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
25 rdata_load='Prepare.rda', start_codons='ATG', stop_codons='TAG,TAA,TGA',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
26 fasta_file=None, include_lengths='25:30', analyze_plot_lengths='26:30',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
27 text_legend='Frame 0, Frame 1, Frame 2', rdata_save='Periodicity.rda',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
28 html_file='Periodicity-report.html', output_path=os.getcwd()):
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
29 """Plot triplet periodicity from prepared R data file. """
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
30 logging.debug('{}'.format(R('sessionInfo()')))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
31 cmd = 'library(riboSeqR)'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
32 run_rscript(cmd)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
33
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
34 logging.debug('Loading saved R data file')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
35 cmd = 'load("{}")'.format(rdata_load)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
36 run_rscript(cmd)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
37
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
38 # R("""options(showTailLines=Inf)""")
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
39 starts, stops = (utils.process_args(start_codons, ret_mode='charvector'),
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
40 utils.process_args(stop_codons, ret_mode='charvector'))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
41
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
42 cmd = ('fastaCDS <- findCDS(fastaFile={0!r}, startCodon={1}, '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
43 'stopCodon={2})'.format(fasta_file, starts, stops))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
44 run_rscript(cmd)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
45
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
46 logging.info('Potential coding sequences using start codon (ATG) and stop '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
47 'codons TAG, TAA, TGA')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
48 logging.info('{}\n'.format(R['fastaCDS']))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
49
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
50 cmd = """fCs <- frameCounting(riboDat, fastaCDS, lengths={0})
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
51 fS <- readingFrame(rC=fCs, lengths={1}); fS""".\
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
52 format(include_lengths, analyze_plot_lengths)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
53 run_rscript(cmd)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
54
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
55 logging.debug('riboDat \n{}\n'.format(R['riboDat']))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
56 logging.debug('fCs\n{0}\n'.format(R['fCs']))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
57 logging.info('Reading frames for each n-mer\n{}'.format(R['fS']))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
58
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
59 legend = utils.process_args(text_legend, ret_mode='charvector')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
60
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
61 for fmat in ('pdf', 'png'):
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
62 run_rscript('{0}(file="{1}")'.format(fmat, os.path.join(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
63 output_path, '{0}.{1}'.format('Periodicity-plot', fmat))))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
64 run_rscript('plotFS(fS, legend.text = {0})'.format(legend))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
65 run_rscript('dev.off()')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
66
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
67 run_rscript('save("fCs", "fS", "riboDat", "fastaCDS", '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
68 'file="{}", compress=FALSE)'.format(rdata_save))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
69
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
70 html = '<h2>Triplet periodicity - results</h2><hr>'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
71 html += ('<h4>Results of reading frame analysis</h4>'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
72 '<pre>{}</pre><br>'.format(R['fS']))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
73 html += ('<p>Lengths used for reading frame analysis - <code>{0}</code>'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
74 '<br>Lengths selected for the plot - <code>{1}</code>'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
75 '</p>'.format(include_lengths, analyze_plot_lengths))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
76 html += ('<p><img src="Periodicity-plot.png" border="1" '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
77 'alt="Triplet periodicity plot" />'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
78 '<br><a href="Periodicity-plot.pdf">PDF version</a></p>')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
79
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
80 logging.info('\n{:#^80}\n{}\n{:#^80}\n'.format(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
81 ' R script for this session ', rscript, ' End R script '))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
82
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
83 with open(os.path.join(output_path, 'periodicity.R'), 'w') as r:
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
84 r.write(rscript)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
85
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
86 html += ('<h4>R script for this session</h4>'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
87 '<p><a href="periodicity.R">periodicity.R</a></p>'
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
88 '<p>Next step: <em>Metagene analysis</em></p>')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
89
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
90 with open(html_file, 'w') as f:
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
91 f.write(html)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
92
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
93
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
94 if __name__ == '__main__':
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
95
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
96 parser = argparse.ArgumentParser(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
97 description='Plot triplet periodicity for different read lengths.')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
98
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
99 # required arguments
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
100 flags = parser.add_argument_group('required arguments')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
101 flags.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
102 '--rdata_load', required=True,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
103 help='riboSeqR data from input preparation step (Prepare.rda)')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
104 flags.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
105 '--fasta_file', required=True,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
106 help='FASTA file of the reference transcriptome')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
107
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
108 # optional arguments
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
109 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
110 '--start_codons',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
111 help='Start codon(s) to use. (default: %(default)s)', default='ATG')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
112 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
113 '--stop_codons', help='Stop codon(s) to use. (default: %(default)s)',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
114 default='TAG, TAA, TGA')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
115 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
116 '--include_lengths',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
117 help='Lengths of ribosome footprints to be included '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
118 '(default: %(default)s)', default='25:30')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
119 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
120 '--analyze_plot_lengths',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
121 help='Lenghts of reads to be analyzed for frame-shift and plotting '
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
122 '(default: %(default)s)', default='26:30')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
123 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
124 '--text_legend',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
125 help='Text for legend used in the plot (default: %(default)s)',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
126 default='Frame 0, Frame 1, Frame 2')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
127 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
128 '--rdata_save', help='File to write RData to (default: %(default)s)',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
129 default='Periodicity.rda')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
130 parser.add_argument('--html_file', help='Output file for results (HTML)')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
131 parser.add_argument('--output_path',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
132 help='Files are saved in this directory')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
133 parser.add_argument(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
134 '--debug', help='Produce debug output', action='store_true')
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
135
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
136 args = parser.parse_args()
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
137 if args.debug:
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
138 level = logging.DEBUG
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
139 else:
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
140 level = logging.INFO
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
141
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
142 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s',
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
143 level=level, stream=sys.stdout)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
144 logging.info('Supplied Arguments\n{}\n'.format(vars(args)))
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
145
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
146 if not os.path.exists(args.output_path):
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
147 os.mkdir(args.output_path)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
148
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
149 find_periodicity(
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
150 rdata_load=args.rdata_load, start_codons=args.start_codons,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
151 stop_codons=args.stop_codons, fasta_file=args.fasta_file,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
152 include_lengths=args.include_lengths,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
153 analyze_plot_lengths=args.analyze_plot_lengths,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
154 text_legend=args.text_legend,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
155 rdata_save=args.rdata_save, html_file=args.html_file,
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
156 output_path=args.output_path)
e01de823e919 Uploaded
biotechcoder
parents:
diff changeset
157 logging.info("Done!")