comparison tools/align_back_trans/align_back_trans.py @ 4:c8469274d136 draft

v0.0.8 Using Biopython 1.67 from Tool Shed or Conda package
author peterjc
date Wed, 10 May 2017 12:05:28 -0400
parents de803005027f
children 2c32e8a8990f
comparison
equal deleted inserted replaced
3:de803005027f 4:c8469274d136
16 16
17 See accompanying text file for licence details (MIT licence). 17 See accompanying text file for licence details (MIT licence).
18 """ 18 """
19 19
20 import sys 20 import sys
21
22 from Bio import AlignIO
23 from Bio import SeqIO
24
25 from Bio.Align import MultipleSeqAlignment
26 from Bio.Alphabet import generic_protein
27 from Bio.Data.CodonTable import ambiguous_generic_by_id
21 from Bio.Seq import Seq 28 from Bio.Seq import Seq
22 from Bio.Alphabet import generic_protein
23 from Bio.Align import MultipleSeqAlignment
24 from Bio import SeqIO
25 from Bio import AlignIO
26 from Bio.Data.CodonTable import ambiguous_generic_by_id
27 29
28 if "-v" in sys.argv or "--version" in sys.argv: 30 if "-v" in sys.argv or "--version" in sys.argv:
29 print "v0.0.7" 31 print "v0.0.7"
30 sys.exit(0) 32 sys.exit(0)
31 33
128 130
129 131
130 def alignment_back_translate(protein_alignment, nucleotide_records, key_function=None, gap=None, table=0): 132 def alignment_back_translate(protein_alignment, nucleotide_records, key_function=None, gap=None, table=0):
131 """Thread nucleotide sequences onto a protein alignment.""" 133 """Thread nucleotide sequences onto a protein alignment."""
132 # TODO - Separate arguments for protein and nucleotide gap characters? 134 # TODO - Separate arguments for protein and nucleotide gap characters?
133 if key_function is None:
134 key_function = lambda x: x
135 if gap is None: 135 if gap is None:
136 gap = "-" 136 gap = "-"
137 137
138 aligned = [] 138 aligned = []
139 for protein in protein_alignment: 139 try:
140 try: 140 if key_function is None:
141 nucleotide = nucleotide_records[key_function(protein.id)] 141 for protein in protein_alignment:
142 except KeyError: 142 nucleotide = nucleotide_records[protein.id]
143 raise ValueError("Could not find nucleotide sequence for protein %r" 143 aligned.append(sequence_back_translate(protein, nucleotide, gap, table))
144 % protein.id) 144 else:
145 aligned.append(sequence_back_translate(protein, nucleotide, gap, table)) 145 for protein in protein_alignment:
146 nucleotide = nucleotide_records[key_function(protein.id)]
147 aligned.append(sequence_back_translate(protein, nucleotide, gap, table))
148 except KeyError:
149 raise ValueError("Could not find nucleotide sequence for protein %r"
150 % protein.id)
151
146 return MultipleSeqAlignment(aligned) 152 return MultipleSeqAlignment(aligned)
147 153
148 154
149 if len(sys.argv) == 4: 155 if len(sys.argv) == 4:
150 align_format, prot_align_file, nuc_fasta_file = sys.argv[1:] 156 align_format, prot_align_file, nuc_fasta_file = sys.argv[1:]