Mercurial > repos > cpt > cpt_fasta_translate
comparison cpt_fasta_translate/fasta_translate.py @ 0:cb42bee49abb draft
Uploaded
author | cpt |
---|---|
date | Fri, 10 Jun 2022 08:47:31 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cb42bee49abb |
---|---|
1 #!/usr/bin/env python | |
2 import sys | |
3 import logging | |
4 import argparse | |
5 from Bio import SeqIO | |
6 from Bio.Data import CodonTable | |
7 | |
8 logging.basicConfig(level=logging.INFO) | |
9 log = logging.getLogger() | |
10 | |
11 | |
12 def translate(fasta_file, target="protein", table=11, strip_stops=False, met=False): | |
13 records = list(SeqIO.parse(fasta_file, "fasta")) | |
14 | |
15 for record in records: | |
16 if target == "protein": | |
17 mod = len(record.seq) % 3 | |
18 if mod != 0: | |
19 record.seq = record.seq[0:-mod] | |
20 | |
21 # Read http://biopython.org/DIST/docs/api/Bio.Seq.Seq-class.html#transcribe | |
22 # for valid CDS conditions. | |
23 | |
24 # Will first try to translate sequence as a CDS, | |
25 # then just as a sequence if this fails. | |
26 | |
27 try: | |
28 tmpseq = record.seq.translate(table=table, cds=True) | |
29 except CodonTable.TranslationError as cte: | |
30 log.info("Translation issue at %s: %s", record.id, cte) | |
31 tmpseq = record.seq.translate(table=table, cds=False) | |
32 | |
33 # check if stop in middle of protein | |
34 if "*" in tmpseq: | |
35 log.info( | |
36 "Trimming %s from %s to %s due to stop codons", | |
37 record.id, | |
38 len(record.seq), | |
39 3 * len(tmpseq) - 3, | |
40 ) | |
41 tmpseq = tmpseq[0 : str(tmpseq).index("*")] | |
42 | |
43 # add stop to end if strip_stops=False | |
44 if not strip_stops: | |
45 tmpseq = tmpseq + "*" | |
46 | |
47 if met: | |
48 tmpseq = "M" + tmpseq[1:] | |
49 | |
50 record.seq = tmpseq | |
51 if len(record.seq) > 0: | |
52 SeqIO.write(record, sys.stdout, "fasta") | |
53 else: | |
54 record.seq = record.seq.transcribe() | |
55 SeqIO.write(record, sys.stdout, "fasta") | |
56 | |
57 | |
58 if __name__ == "__main__": | |
59 parser = argparse.ArgumentParser(description="Translate fasta file") | |
60 parser.add_argument("fasta_file", type=argparse.FileType("r"), help="Fasta file") | |
61 parser.add_argument("--target", choices=["protein", "rna"]) | |
62 parser.add_argument( | |
63 "--table", | |
64 type=int, | |
65 default=11, | |
66 help="Translation table to use", | |
67 choices=range(1, 23), | |
68 ) | |
69 parser.add_argument( | |
70 "--strip_stops", action="store_true", help="Remove stop characters" | |
71 ) | |
72 parser.add_argument( | |
73 "--met", action="store_true", help="Convert first residue to Met" | |
74 ) | |
75 | |
76 args = parser.parse_args() | |
77 translate(**vars(args)) |