Mercurial > repos > vimalkumarvelayudhan > riboseqr_wrapper
comparison riboseqr/triplet.py @ 5:423ad61697c4
Bugfix 1: [triplet] Lengths (frameCounting) if given should be a range (not zero).
readingFrame function fails with subscript out of bounds.
Bugfix 2: [triplet] Check if transcript name in SAM matches the name in FASTA. Produce
an error message if it's not. This fixes the problem where an empty plot is
produced (no bars).
[ribosome_profile] - A proper error message is now produced if an invalid transcript name is
provided.
Updated test data
| author | Vimalkumar Velayudhan <vimalkumarvelayudhan@gmail.com> |
|---|---|
| date | Tue, 27 Oct 2015 12:21:50 +0000 |
| parents | c34c364ce75d |
| children |
comparison
equal
deleted
inserted
replaced
| 2:b2eb07000039 | 5:423ad61697c4 |
|---|---|
| 7 | 7 |
| 8 import utils | 8 import utils |
| 9 | 9 |
| 10 rscript = '' | 10 rscript = '' |
| 11 R = robjects.r | 11 R = robjects.r |
| 12 | |
| 13 | |
| 14 class TripletPeriodicityError(Exception): | |
| 15 pass | |
| 12 | 16 |
| 13 | 17 |
| 14 def run_rscript(command=None): | 18 def run_rscript(command=None): |
| 15 """Run R command, log it, append to rscript""" | 19 """Run R command, log it, append to rscript""" |
| 16 global rscript | 20 global rscript |
| 22 return output | 26 return output |
| 23 | 27 |
| 24 | 28 |
| 25 def find_periodicity( | 29 def find_periodicity( |
| 26 rdata_load='Prepare.rda', start_codons='ATG', stop_codons='TAG,TAA,TGA', | 30 rdata_load='Prepare.rda', start_codons='ATG', stop_codons='TAG,TAA,TGA', |
| 27 fasta_file=None, include_lengths='25:30', analyze_plot_lengths='26:30', | 31 fasta_file=None, include_lengths='25:30', analyze_plot_lengths=None, |
| 28 text_legend='Frame 0, Frame 1, Frame 2', rdata_save='Periodicity.rda', | 32 text_legend='Frame 0, Frame 1, Frame 2', rdata_save='Periodicity.rda', |
| 29 html_file='Periodicity-report.html', output_path=os.getcwd()): | 33 html_file='Periodicity-report.html', output_path=os.getcwd()): |
| 30 """Plot triplet periodicity from prepared R data file. """ | 34 """Plot triplet periodicity from prepared R data file. """ |
| 31 logging.debug('{}'.format(R('sessionInfo()'))) | 35 logging.debug('{}'.format(R('sessionInfo()'))) |
| 32 cmd = 'suppressMessages(library(riboSeqR))' | 36 cmd = 'suppressMessages(library(riboSeqR))' |
| 33 run_rscript(cmd) | 37 run_rscript(cmd) |
| 34 | 38 |
| 35 logging.debug('Loading saved R data file') | 39 logging.debug('Loading saved R data file') |
| 36 cmd = 'load("{}")'.format(rdata_load) | 40 cmd = 'load("{}")'.format(rdata_load) |
| 37 run_rscript(cmd) | 41 run_rscript(cmd) |
| 42 | |
| 43 # get the first seqname (populated from input file which was generated from | |
| 44 # SAM) | |
| 45 refseq = R('levels(riboDat@riboGR$`RiboSeq file 1`@seqnames)[1]') | |
| 46 refseq = tuple(refseq)[0] | |
| 47 | |
| 48 fasta_valid = False | |
| 49 for line in open(fasta_file): | |
| 50 if line.startswith('>'): | |
| 51 seqname = line.strip().lstrip('>').split() | |
| 52 # get the FASTA header, split on space, and use the first column to | |
| 53 # compare. Multiple fields separated by space doesn't appear to be | |
| 54 # a problem for riboSeqR | |
| 55 if seqname[0] == refseq: | |
| 56 fasta_valid = True | |
| 57 break | |
| 58 if not fasta_valid: | |
| 59 raise TripletPeriodicityError( | |
| 60 'Transcript name in SAM alignment does not match with name in the transcriptome ' | |
| 61 'FASTA header. Please also check if the reads were aligned to this transcriptome ' | |
| 62 'and not the genome') | |
| 38 | 63 |
| 39 # R("""options(showTailLines=Inf)""") | 64 # R("""options(showTailLines=Inf)""") |
| 40 starts, stops = (utils.process_args(start_codons, ret_mode='charvector'), | 65 starts, stops = (utils.process_args(start_codons, ret_mode='charvector'), |
| 41 utils.process_args(stop_codons, ret_mode='charvector')) | 66 utils.process_args(stop_codons, ret_mode='charvector')) |
| 42 | 67 |
| 47 logging.debug('Potential coding sequences using start codon (ATG) and ' | 72 logging.debug('Potential coding sequences using start codon (ATG) and ' |
| 48 'stop codons TAG, TAA, TGA') | 73 'stop codons TAG, TAA, TGA') |
| 49 logging.debug('{}\n'.format(R['fastaCDS'])) | 74 logging.debug('{}\n'.format(R['fastaCDS'])) |
| 50 | 75 |
| 51 cmd = """fCs <- frameCounting(riboDat, fastaCDS, lengths={0}) | 76 cmd = """fCs <- frameCounting(riboDat, fastaCDS, lengths={0}) |
| 52 fS <- readingFrame(rC=fCs, lengths={1}); fS""".\ | 77 |
| 53 format(include_lengths, analyze_plot_lengths) | 78 """.format(include_lengths) |
| 79 if analyze_plot_lengths: | |
| 80 cmd += 'fS <- readingFrame(rC=fCs, lengths={0}); fS'.format(analyze_plot_lengths) | |
| 81 else: | |
| 82 cmd += 'fS <- readingFrame(rC=fCs); fS' | |
| 54 run_rscript(cmd) | 83 run_rscript(cmd) |
| 55 | 84 |
| 56 logging.debug('riboDat \n{}\n'.format(R['riboDat'])) | 85 logging.debug('riboDat \n{}\n'.format(R['riboDat'])) |
| 57 logging.debug('fCs\n{0}\n'.format(R['fCs'])) | 86 logging.debug('fCs\n{0}\n'.format(R['fCs'])) |
| 58 logging.debug('Reading frames for each n-mer\n{}'.format(R['fS'])) | 87 logging.debug('Reading frames for each n-mer\n{}'.format(R['fS'])) |
| 95 with open(html_file, 'w') as f: | 124 with open(html_file, 'w') as f: |
| 96 f.write(html) | 125 f.write(html) |
| 97 | 126 |
| 98 | 127 |
| 99 if __name__ == '__main__': | 128 if __name__ == '__main__': |
| 100 | 129 |
| 101 parser = argparse.ArgumentParser( | 130 parser = argparse.ArgumentParser( |
| 102 description='Plot triplet periodicity for different read lengths.') | 131 description='Plot triplet periodicity for different read lengths.') |
| 103 | 132 |
| 104 # required arguments | 133 # required arguments |
| 105 flags = parser.add_argument_group('required arguments') | 134 flags = parser.add_argument_group('required arguments') |
| 106 flags.add_argument( | 135 flags.add_argument( |
| 107 '--rdata_load', required=True, | 136 '--rdata_load', required=True, |
| 108 help='riboSeqR data from input preparation step (Prepare.rda)') | 137 help='riboSeqR data from input preparation step (Prepare.rda)') |
| 109 flags.add_argument( | 138 flags.add_argument( |
| 110 '--fasta_file', required=True, | 139 '--fasta_file', required=True, |
| 111 help='FASTA file of the reference transcriptome') | 140 help='FASTA file of the reference transcriptome') |
| 112 | 141 |
| 113 # optional arguments | 142 # optional arguments |
| 114 parser.add_argument( | 143 parser.add_argument( |
| 115 '--start_codons', | 144 '--start_codons', |
| 116 help='Start codon(s) to use. (default: %(default)s)', default='ATG') | 145 help='Start codon(s) to use. (default: %(default)s)', default='ATG') |
| 117 parser.add_argument( | 146 parser.add_argument( |
| 121 '--include_lengths', | 150 '--include_lengths', |
| 122 help='Lengths of ribosome footprints to be included ' | 151 help='Lengths of ribosome footprints to be included ' |
| 123 '(default: %(default)s)', default='25:30') | 152 '(default: %(default)s)', default='25:30') |
| 124 parser.add_argument( | 153 parser.add_argument( |
| 125 '--analyze_plot_lengths', | 154 '--analyze_plot_lengths', |
| 126 help='Lenghts of reads to be analyzed for frame-shift and plotting ' | 155 help='Lenghts of reads to be analyzed for frame-shift and plotting') |
| 127 '(default: %(default)s)', default='26:30') | |
| 128 parser.add_argument( | 156 parser.add_argument( |
| 129 '--text_legend', | 157 '--text_legend', |
| 130 help='Text for legend used in the plot (default: %(default)s)', | 158 help='Text for legend used in the plot (default: %(default)s)', |
| 131 default='Frame 0, Frame 1, Frame 2') | 159 default='Frame 0, Frame 1, Frame 2') |
| 132 parser.add_argument( | 160 parser.add_argument( |
| 135 parser.add_argument('--html_file', help='Output file for results (HTML)') | 163 parser.add_argument('--html_file', help='Output file for results (HTML)') |
| 136 parser.add_argument('--output_path', | 164 parser.add_argument('--output_path', |
| 137 help='Files are saved in this directory') | 165 help='Files are saved in this directory') |
| 138 parser.add_argument( | 166 parser.add_argument( |
| 139 '--debug', help='Produce debug output', action='store_true') | 167 '--debug', help='Produce debug output', action='store_true') |
| 140 | 168 |
| 141 args = parser.parse_args() | 169 args = parser.parse_args() |
| 142 if args.debug: | 170 if args.debug: |
| 143 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s', | 171 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s', |
| 144 level=logging.DEBUG, stream=sys.stdout) | 172 level=logging.DEBUG, stream=sys.stdout) |
| 145 logging.debug('Supplied Arguments\n{}\n'.format(vars(args))) | 173 logging.debug('Supplied Arguments\n{}\n'.format(vars(args))) |
