comparison blasttab_dice_filter.py @ 5:99baf3ee2a2b draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:40:11 +0000
parents
children
comparison
equal deleted inserted replaced
4:2ed4d8ee01b9 5:99baf3ee2a2b
1 #!/usr/bin/env python
2 import argparse
3 import logging
4
5 logging.basicConfig(level=logging.INFO)
6 log = logging.getLogger(name="blasttab2gff3")
7
8 __doc__ = """
9 Blast TSV files, when transformed to GFF3, do not normally show gaps in the
10 blast hits. This tool aims to fill that "gap".
11 """
12
13
14 def blasttsv2gff3(blasttsv, min_dice=50):
15 # 01 Query Seq-id (ID of your sequence)
16 # 02 Subject Seq-id (ID of the database hit)
17 # 03 Percentage of identical matches
18 # 04 Alignment length
19 # 05 Number of mismatches
20 # 06 Number of gap openings
21 # 07 Start of alignment in query
22 # 08 End of alignment in query
23 # 09 Start of alignment in subject (database hit)
24 # 10 End of alignment in subject (database hit)
25 # 11 Expectation value (E-value)
26 # 12 Bit score
27 # 13 All subject Seq-id(s), separated by a ';'
28 # 14 Raw score
29 # 15 Number of identical matches
30 # 16 Number of positive-scoring matches
31 # 17 Total number of gaps
32 # 18 Percentage of positive-scoring matches
33 # 19 Query frame
34 # 20 Subject frame
35 # 21 Aligned part of query sequence
36 # 22 Aligned part of subject sequence
37 # 23 Query sequence length
38 # 24 Subject sequence length
39 # 25 All subject title(s), separated by a '<>'
40
41 for line in blasttsv:
42 line = line.strip("\n")
43 data = line.split("\t")
44 dice = 2 * float(data[14]) / (float(data[22]) + float(data[23]))
45
46 if dice >= min_dice:
47 yield line
48
49
50 if __name__ == "__main__":
51 parser = argparse.ArgumentParser(description="Convert Blast TSV to gapped GFF3")
52 parser.add_argument(
53 "blasttsv", type=argparse.FileType("r"), help="Blast TSV Output"
54 )
55 parser.add_argument(
56 "--min_dice", type=float, help="Minimum dice score", default=0.5
57 )
58 args = parser.parse_args()
59
60 for line in blasttsv2gff3(**vars(args)):
61 print(line)