Mercurial > repos > biotechcoder > riboseqr_wrapper
comparison riboseqr/prepare.py @ 0:e01de823e919 draft default tip
Uploaded
| author | biotechcoder | 
|---|---|
| date | Fri, 01 May 2015 05:41:51 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:e01de823e919 | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import os | |
| 3 import sys | |
| 4 import argparse | |
| 5 import logging | |
| 6 import rpy2.robjects as robjects | |
| 7 | |
| 8 import utils | |
| 9 | |
| 10 rscript = '' | |
| 11 R = robjects.r | |
| 12 | |
| 13 | |
| 14 def run_rscript(command=None): | |
| 15 """Run R command, log it, append to rscript""" | |
| 16 global rscript | |
| 17 if not command: | |
| 18 return | |
| 19 logging.debug(command) | |
| 20 rscript += '{}\n'.format(command) | |
| 21 R(command) | |
| 22 | |
| 23 | |
| 24 def prep_riboseqr_input(sam_file, output_file): | |
| 25 """Generate input file for riboSeqR from SAM format file.""" | |
| 26 with open(output_file, 'w') as f: | |
| 27 for line in open(sam_file): | |
| 28 if line.startswith('@'): | |
| 29 continue | |
| 30 line = line.split() | |
| 31 flag = line[1] | |
| 32 | |
| 33 if flag != '0': | |
| 34 continue | |
| 35 # make start 0-indexed, sam alignments are 1-indexed | |
| 36 start = int(line[3]) - 1 | |
| 37 (name, sequence) = (line[2], line[9]) | |
| 38 f.write('"+"\t"{0}"\t{1}\t"{2}"\n'.format(name, start, sequence)) | |
| 39 | |
| 40 | |
| 41 def batch_process(sam_files, seq_type, output_path): | |
| 42 """Batch process the conversion of SAM format files -> riboSeqR format | |
| 43 input files. | |
| 44 | |
| 45 Files are saved with file names corresponding to their sequence type. | |
| 46 | |
| 47 """ | |
| 48 outputs = [] | |
| 49 prefix = '{}' | |
| 50 if seq_type == 'riboseq': | |
| 51 prefix = 'RiboSeq file {}' | |
| 52 elif seq_type == 'rnaseq': | |
| 53 prefix = 'RNASeq file {}' | |
| 54 | |
| 55 for count, fname in enumerate(sam_files): | |
| 56 count += 1 | |
| 57 out_file = os.path.join(output_path, prefix.format(count)) | |
| 58 logging.debug('Processing: {}'.format(fname)) | |
| 59 logging.debug('Writing output to: {}'.format(out_file)) | |
| 60 prep_riboseqr_input(fname, out_file) | |
| 61 outputs.append(out_file) | |
| 62 return outputs | |
| 63 | |
| 64 | |
| 65 def generate_ribodata(ribo_files='', rna_files='', replicate_names='', | |
| 66 seqnames='', rdata_save='Prepare.rda', sam_format=True, | |
| 67 html_file='Prepare-report.html', output_path=os.getcwd()): | |
| 68 """Prepares Ribo and RNA seq data in the format required for riboSeqR. Calls | |
| 69 the readRibodata function of riboSeqR and saves the result objects in an | |
| 70 R data file which can be used as input for the next step. | |
| 71 | |
| 72 """ | |
| 73 input_ribo_files = utils.process_args(ribo_files, ret_mode='list') | |
| 74 logging.debug('Found {} Ribo-Seq files'.format(len(input_ribo_files))) | |
| 75 logging.debug(input_ribo_files) | |
| 76 | |
| 77 input_rna_files = [] | |
| 78 if rna_files: | |
| 79 input_rna_files = utils.process_args(rna_files, ret_mode='list') | |
| 80 logging.debug('Found {} RNA-Seq files'.format(len(input_rna_files))) | |
| 81 logging.debug(input_rna_files) | |
| 82 | |
| 83 replicates = utils.process_args(replicate_names, ret_mode='charvector') | |
| 84 logging.debug('Replicates: {}\n'.format(replicates)) | |
| 85 | |
| 86 if sam_format: | |
| 87 ribo_seq_files = batch_process(input_ribo_files, 'riboseq', output_path) | |
| 88 else: | |
| 89 ribo_seq_files = input_ribo_files | |
| 90 | |
| 91 html = '<h2>Prepare riboSeqR input - results</h2><hr>' | |
| 92 if len(ribo_seq_files): | |
| 93 html += '<h4>Generated riboSeqR format input files ' \ | |
| 94 '<em>(RiboSeq)</em></h4><p>' | |
| 95 for fname in ribo_seq_files: | |
| 96 html += '<a href="{0}">{0}</a><br>'.format( | |
| 97 os.path.basename(fname)) | |
| 98 html += '</p>' | |
| 99 | |
| 100 rna_seq_files = [] | |
| 101 if len(input_rna_files): | |
| 102 if sam_format: | |
| 103 rna_seq_files = batch_process( | |
| 104 input_rna_files, 'rnaseq', output_path) | |
| 105 else: | |
| 106 rna_seq_files = input_rna_files | |
| 107 | |
| 108 if len(rna_seq_files): | |
| 109 html += ('<h4>Generated riboSeqR format input files ' | |
| 110 '<em>(RNASeq)</em></h4><p>') | |
| 111 for fname in rna_seq_files: | |
| 112 html += '<a href="{0}">{0}</a><br>'.format( | |
| 113 os.path.basename(fname)) | |
| 114 html += '</p>' | |
| 115 | |
| 116 input_seqnames = utils.process_args(seqnames, ret_mode='charvector') | |
| 117 options = {'ribo_seq_files': 'c({})'.format(str(ribo_seq_files)[1:-1]), | |
| 118 'rna_seq_files': 'c({})'.format(str(rna_seq_files)[1:-1]), | |
| 119 'input_replicates': replicates, | |
| 120 'input_seqnames': input_seqnames} | |
| 121 | |
| 122 logging.debug('{}'.format(R('sessionInfo()'))) | |
| 123 | |
| 124 script = '' | |
| 125 cmd = 'library(riboSeqR)' | |
| 126 run_rscript(cmd) | |
| 127 script += '{}\n'.format(cmd) | |
| 128 | |
| 129 if len(rna_seq_files): | |
| 130 cmd_args = ('riboFiles={ribo_seq_files}, ' | |
| 131 'rnaFiles={rna_seq_files}'.format(**options)) | |
| 132 else: | |
| 133 cmd_args = 'riboFiles={ribo_seq_files}'.format(**options) | |
| 134 | |
| 135 if input_seqnames: | |
| 136 cmd_args += ', seqnames={input_seqnames}'.format(**options) | |
| 137 if replicates: | |
| 138 cmd_args += ', replicates={input_replicates}'.format(**options) | |
| 139 else: | |
| 140 cmd_args += ', replicates=c("")' | |
| 141 cmd = 'riboDat <- readRibodata({0})'.format(cmd_args) | |
| 142 run_rscript(cmd) | |
| 143 script += '{}\n'.format(cmd) | |
| 144 | |
| 145 ribo_data = R['riboDat'] | |
| 146 logging.debug('riboDat \n{}\n'.format(ribo_data)) | |
| 147 cmd = 'save("riboDat", file="{}", compress=FALSE)'.format(rdata_save) | |
| 148 run_rscript(cmd) | |
| 149 script += '{}\n'.format(cmd) | |
| 150 | |
| 151 msg = '\n{:#^80}\n{}\n{:#^80}\n'.format( | |
| 152 ' R script for this session ', script, ' End R script ') | |
| 153 logging.debug(msg) | |
| 154 | |
| 155 with open(os.path.join(output_path, 'prepare.R'), 'w') as r: | |
| 156 r.write(script) | |
| 157 | |
| 158 html += ('<h4>R script for this session</h4>' | |
| 159 '<p><a href="prepare.R">prepare.R</a></p>' | |
| 160 '<p>Next step: <em>Triplet periodicity</em></p>') | |
| 161 | |
| 162 with open(html_file, 'w') as f: | |
| 163 f.write(html) | |
| 164 | |
| 165 return ribo_data | |
| 166 | |
| 167 | |
| 168 if __name__ == '__main__': | |
| 169 | |
| 170 description = ( | |
| 171 'Prepare riboSeqR input file from SAM format RNA/Ribo-Seq alignment.') | |
| 172 parser = argparse.ArgumentParser(description=description) | |
| 173 | |
| 174 # required arguments | |
| 175 flags = parser.add_argument_group('required arguments') | |
| 176 flags.add_argument('--ribo_files', required=True, | |
| 177 help='List of Ribo-Seq files, comma-separated') | |
| 178 # optional arguments | |
| 179 parser.add_argument('--rna_files', | |
| 180 help='List of RNA-Seq files. Comma-separated') | |
| 181 parser.add_argument('--replicate_names', | |
| 182 help='Replicate names, comma-separated') | |
| 183 parser.add_argument('--seqnames', | |
| 184 help='Transcript (seqname) names to be read') | |
| 185 parser.add_argument('--rdata_save', | |
| 186 help='File to write RData to (default: %(default)s)', | |
| 187 default='Prepare.rda') | |
| 188 parser.add_argument( | |
| 189 '--sam_format', | |
| 190 help='Flag. Input is in SAM format', action='store_true') | |
| 191 parser.add_argument('--html_file', help='Output file for results (HTML)') | |
| 192 parser.add_argument('--output_path', | |
| 193 help='Files are saved in this directory') | |
| 194 parser.add_argument('--debug', help='Flag. Produce debug output', | |
| 195 action='store_true') | |
| 196 args = parser.parse_args() | |
| 197 | |
| 198 if args.debug: | |
| 199 level = logging.DEBUG | |
| 200 else: | |
| 201 level = logging.INFO | |
| 202 | |
| 203 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s', | |
| 204 level=level, stream=sys.stdout) | |
| 205 logging.debug('Supplied Arguments: {}'.format(vars(args))) | |
| 206 | |
| 207 if not os.path.exists(args.output_path): | |
| 208 os.mkdir(args.output_path) | |
| 209 | |
| 210 generate_ribodata( | |
| 211 ribo_files=args.ribo_files, rna_files=args.rna_files, | |
| 212 replicate_names=args.replicate_names, seqnames=args.seqnames, | |
| 213 rdata_save=args.rdata_save, | |
| 214 sam_format=args.sam_format, html_file=args.html_file, | |
| 215 output_path=args.output_path | |
| 216 ) | |
| 217 logging.debug('Done') | 
