comparison fasta_translate.py @ 1:4cbf9299712b draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:41:52 +0000
parents
children
comparison
equal deleted inserted replaced
0:cb42bee49abb 1:4cbf9299712b
1 #!/usr/bin/env python
2 import sys
3 import logging
4 import argparse
5 from Bio import SeqIO
6 from Bio.Data import CodonTable
7
8 logging.basicConfig(level=logging.INFO)
9 log = logging.getLogger()
10
11
12 def translate(fasta_file, target="protein", table=11, strip_stops=False, met=False):
13 records = list(SeqIO.parse(fasta_file, "fasta"))
14
15 for record in records:
16 if target == "protein":
17 mod = len(record.seq) % 3
18 if mod != 0:
19 record.seq = record.seq[0:-mod]
20
21 # Read http://biopython.org/DIST/docs/api/Bio.Seq.Seq-class.html#transcribe
22 # for valid CDS conditions.
23
24 # Will first try to translate sequence as a CDS,
25 # then just as a sequence if this fails.
26
27 try:
28 tmpseq = record.seq.translate(table=table, cds=True)
29 except CodonTable.TranslationError as cte:
30 log.info("Translation issue at %s: %s", record.id, cte)
31 tmpseq = record.seq.translate(table=table, cds=False)
32
33 # check if stop in middle of protein
34 if "*" in tmpseq:
35 log.info(
36 "Trimming %s from %s to %s due to stop codons",
37 record.id,
38 len(record.seq),
39 3 * len(tmpseq) - 3,
40 )
41 tmpseq = tmpseq[0 : str(tmpseq).index("*")]
42
43 # add stop to end if strip_stops=False
44 if not strip_stops:
45 tmpseq = tmpseq + "*"
46
47 if met:
48 tmpseq = "M" + tmpseq[1:]
49
50 record.seq = tmpseq
51 if len(record.seq) > 0:
52 SeqIO.write(record, sys.stdout, "fasta")
53 else:
54 record.seq = record.seq.transcribe()
55 SeqIO.write(record, sys.stdout, "fasta")
56
57
58 if __name__ == "__main__":
59 parser = argparse.ArgumentParser(description="Translate fasta file")
60 parser.add_argument("fasta_file", type=argparse.FileType("r"), help="Fasta file")
61 parser.add_argument("--target", choices=["protein", "rna"])
62 parser.add_argument(
63 "--table",
64 type=int,
65 default=11,
66 help="Translation table to use",
67 choices=range(1, 23),
68 )
69 parser.add_argument(
70 "--strip_stops", action="store_true", help="Remove stop characters"
71 )
72 parser.add_argument(
73 "--met", action="store_true", help="Convert first residue to Met"
74 )
75
76 args = parser.parse_args()
77 translate(**vars(args))