Mercurial > repos > cpt > cpt_blasttab_dice_filter
comparison blasttab_dice_filter.py @ 5:99baf3ee2a2b draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:40:11 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:2ed4d8ee01b9 | 5:99baf3ee2a2b |
---|---|
1 #!/usr/bin/env python | |
2 import argparse | |
3 import logging | |
4 | |
5 logging.basicConfig(level=logging.INFO) | |
6 log = logging.getLogger(name="blasttab2gff3") | |
7 | |
8 __doc__ = """ | |
9 Blast TSV files, when transformed to GFF3, do not normally show gaps in the | |
10 blast hits. This tool aims to fill that "gap". | |
11 """ | |
12 | |
13 | |
14 def blasttsv2gff3(blasttsv, min_dice=50): | |
15 # 01 Query Seq-id (ID of your sequence) | |
16 # 02 Subject Seq-id (ID of the database hit) | |
17 # 03 Percentage of identical matches | |
18 # 04 Alignment length | |
19 # 05 Number of mismatches | |
20 # 06 Number of gap openings | |
21 # 07 Start of alignment in query | |
22 # 08 End of alignment in query | |
23 # 09 Start of alignment in subject (database hit) | |
24 # 10 End of alignment in subject (database hit) | |
25 # 11 Expectation value (E-value) | |
26 # 12 Bit score | |
27 # 13 All subject Seq-id(s), separated by a ';' | |
28 # 14 Raw score | |
29 # 15 Number of identical matches | |
30 # 16 Number of positive-scoring matches | |
31 # 17 Total number of gaps | |
32 # 18 Percentage of positive-scoring matches | |
33 # 19 Query frame | |
34 # 20 Subject frame | |
35 # 21 Aligned part of query sequence | |
36 # 22 Aligned part of subject sequence | |
37 # 23 Query sequence length | |
38 # 24 Subject sequence length | |
39 # 25 All subject title(s), separated by a '<>' | |
40 | |
41 for line in blasttsv: | |
42 line = line.strip("\n") | |
43 data = line.split("\t") | |
44 dice = 2 * float(data[14]) / (float(data[22]) + float(data[23])) | |
45 | |
46 if dice >= min_dice: | |
47 yield line | |
48 | |
49 | |
50 if __name__ == "__main__": | |
51 parser = argparse.ArgumentParser(description="Convert Blast TSV to gapped GFF3") | |
52 parser.add_argument( | |
53 "blasttsv", type=argparse.FileType("r"), help="Blast TSV Output" | |
54 ) | |
55 parser.add_argument( | |
56 "--min_dice", type=float, help="Minimum dice score", default=0.5 | |
57 ) | |
58 args = parser.parse_args() | |
59 | |
60 for line in blasttsv2gff3(**vars(args)): | |
61 print(line) |