Mercurial > repos > bvalot > fasta_filter_0_1
comparison fasta_filter_to_region.py @ 0:bc23da9d464c draft default tip
planemo upload for repository http://172.20.124.12:3000/bvalot3/PythonScript commit 9676573ee48ce5343600ab45cd3ac1a6adddabe4
author | bvalot |
---|---|
date | Tue, 14 Jun 2022 08:15:55 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bc23da9d464c |
---|---|
1 #!/usr/bin/env python3 | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 """Return a filter fasta file for region of accession""" | |
5 | |
6 import argparse | |
7 import sys | |
8 | |
9 from Bio import SeqIO | |
10 | |
11 desc = "Filter a fasta file based on region." | |
12 command = argparse.ArgumentParser( | |
13 prog="fasta_filter_to_region", | |
14 description=desc, | |
15 usage="%(prog)s [options] fasta accessions:start-stop", | |
16 ) | |
17 command.add_argument( | |
18 "-o", | |
19 "--output", | |
20 default=sys.stdout, | |
21 type=argparse.FileType("w"), | |
22 nargs="?", | |
23 help="Output result to, default:stdout", | |
24 ) | |
25 command.add_argument("fasta", type=argparse.FileType("r"), help="Fasta file to filter") | |
26 command.add_argument( | |
27 "access", | |
28 metavar="accessions", | |
29 type=str, | |
30 nargs="+", | |
31 help="List of region to extract, ex: accession:start-stop", | |
32 ) | |
33 | |
34 if __name__ == "__main__": | |
35 """Performed job on execution script""" | |
36 args = command.parse_args() | |
37 output = args.output | |
38 access = {} | |
39 for acc in args.access: | |
40 access.setdefault(acc.split(":")[0], []).append(acc.split(":")[1].split("-")) | |
41 | |
42 for seq in SeqIO.parse(args.fasta, "fasta"): | |
43 accs = access.get(seq.id) | |
44 if seq.id not in access: | |
45 continue | |
46 for acc in access.get(seq.id): | |
47 sub = None | |
48 if int(acc[0]) > int(acc[1]): | |
49 # reverse | |
50 sub = seq[int(acc[1]) - 1: int(acc[0])] | |
51 sub.seq = sub.seq.reverse_complement() | |
52 sub.id = seq.id + "_" + acc[0] + "_" + acc[1] + "_reverse" | |
53 else: | |
54 sub = seq[int(acc[0]) - 1: int(acc[1])] | |
55 sub.id = seq.id + "_" + acc[0] + "_" + acc[1] | |
56 SeqIO.write(sub, output, "fasta") | |
57 access.pop(seq.id) | |
58 if len(access) > 0: | |
59 sys.stderr.write("Accessions not found : " + str(" - ").join(access) + "\n") |