0
|
1 #!/usr/bin/env python
|
|
2 import sys
|
|
3 import logging
|
|
4 import argparse
|
|
5 from Bio import SeqIO
|
|
6 from Bio.Data import CodonTable
|
|
7
|
|
8 logging.basicConfig(level=logging.INFO)
|
|
9 log = logging.getLogger()
|
|
10
|
|
11
|
|
12 def translate(fasta_file, target="protein", table=11, strip_stops=False, met=False):
|
|
13 records = list(SeqIO.parse(fasta_file, "fasta"))
|
|
14
|
|
15 for record in records:
|
|
16 if target == "protein":
|
|
17 mod = len(record.seq) % 3
|
|
18 if mod != 0:
|
|
19 record.seq = record.seq[0:-mod]
|
|
20
|
|
21 # Read http://biopython.org/DIST/docs/api/Bio.Seq.Seq-class.html#transcribe
|
|
22 # for valid CDS conditions.
|
|
23
|
|
24 # Will first try to translate sequence as a CDS,
|
|
25 # then just as a sequence if this fails.
|
|
26
|
|
27 try:
|
|
28 tmpseq = record.seq.translate(table=table, cds=True)
|
|
29 except CodonTable.TranslationError as cte:
|
|
30 log.info("Translation issue at %s: %s", record.id, cte)
|
|
31 tmpseq = record.seq.translate(table=table, cds=False)
|
|
32
|
|
33 # check if stop in middle of protein
|
|
34 if "*" in tmpseq:
|
|
35 log.info(
|
|
36 "Trimming %s from %s to %s due to stop codons",
|
|
37 record.id,
|
|
38 len(record.seq),
|
|
39 3 * len(tmpseq) - 3,
|
|
40 )
|
|
41 tmpseq = tmpseq[0 : str(tmpseq).index("*")]
|
|
42
|
|
43 # add stop to end if strip_stops=False
|
|
44 if not strip_stops:
|
|
45 tmpseq = tmpseq + "*"
|
|
46
|
|
47 if met:
|
|
48 tmpseq = "M" + tmpseq[1:]
|
|
49
|
|
50 record.seq = tmpseq
|
|
51 if len(record.seq) > 0:
|
|
52 SeqIO.write(record, sys.stdout, "fasta")
|
|
53 else:
|
|
54 record.seq = record.seq.transcribe()
|
|
55 SeqIO.write(record, sys.stdout, "fasta")
|
|
56
|
|
57
|
|
58 if __name__ == "__main__":
|
|
59 parser = argparse.ArgumentParser(description="Translate fasta file")
|
|
60 parser.add_argument("fasta_file", type=argparse.FileType("r"), help="Fasta file")
|
|
61 parser.add_argument("--target", choices=["protein", "rna"])
|
|
62 parser.add_argument(
|
|
63 "--table",
|
|
64 type=int,
|
|
65 default=11,
|
|
66 help="Translation table to use",
|
|
67 choices=range(1, 23),
|
|
68 )
|
|
69 parser.add_argument(
|
|
70 "--strip_stops", action="store_true", help="Remove stop characters"
|
|
71 )
|
|
72 parser.add_argument(
|
|
73 "--met", action="store_true", help="Convert first residue to Met"
|
|
74 )
|
|
75
|
|
76 args = parser.parse_args()
|
|
77 translate(**vars(args))
|