comparison fasta_filter_to_region.py @ 0:bc23da9d464c draft default tip

planemo upload for repository http://172.20.124.12:3000/bvalot3/PythonScript commit 9676573ee48ce5343600ab45cd3ac1a6adddabe4
author bvalot
date Tue, 14 Jun 2022 08:15:55 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:bc23da9d464c
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3
4 """Return a filter fasta file for region of accession"""
5
6 import argparse
7 import sys
8
9 from Bio import SeqIO
10
11 desc = "Filter a fasta file based on region."
12 command = argparse.ArgumentParser(
13 prog="fasta_filter_to_region",
14 description=desc,
15 usage="%(prog)s [options] fasta accessions:start-stop",
16 )
17 command.add_argument(
18 "-o",
19 "--output",
20 default=sys.stdout,
21 type=argparse.FileType("w"),
22 nargs="?",
23 help="Output result to, default:stdout",
24 )
25 command.add_argument("fasta", type=argparse.FileType("r"), help="Fasta file to filter")
26 command.add_argument(
27 "access",
28 metavar="accessions",
29 type=str,
30 nargs="+",
31 help="List of region to extract, ex: accession:start-stop",
32 )
33
34 if __name__ == "__main__":
35 """Performed job on execution script"""
36 args = command.parse_args()
37 output = args.output
38 access = {}
39 for acc in args.access:
40 access.setdefault(acc.split(":")[0], []).append(acc.split(":")[1].split("-"))
41
42 for seq in SeqIO.parse(args.fasta, "fasta"):
43 accs = access.get(seq.id)
44 if seq.id not in access:
45 continue
46 for acc in access.get(seq.id):
47 sub = None
48 if int(acc[0]) > int(acc[1]):
49 # reverse
50 sub = seq[int(acc[1]) - 1: int(acc[0])]
51 sub.seq = sub.seq.reverse_complement()
52 sub.id = seq.id + "_" + acc[0] + "_" + acc[1] + "_reverse"
53 else:
54 sub = seq[int(acc[0]) - 1: int(acc[1])]
55 sub.id = seq.id + "_" + acc[0] + "_" + acc[1]
56 SeqIO.write(sub, output, "fasta")
57 access.pop(seq.id)
58 if len(access) > 0:
59 sys.stderr.write("Accessions not found : " + str(" - ").join(access) + "\n")