comparison jbrowse2/blastxml_to_gapped_gff3.py @ 7:234cf4490901 draft

Uploaded
author fubar
date Fri, 05 Jan 2024 04:31:35 +0000
parents 88b9b105c09b
children
comparison
equal deleted inserted replaced
6:88b9b105c09b 7:234cf4490901
30 "BLASTP": "protein_match", 30 "BLASTP": "protein_match",
31 }.get(record.application, "match") 31 }.get(record.application, "match")
32 32
33 recid = record.query 33 recid = record.query
34 if " " in recid: 34 if " " in recid:
35 recid = recid[0 : recid.index(" ")] 35 recid = recid[0: recid.index(" ")]
36 36
37 rec = SeqRecord(Seq("ACTG"), id=recid) 37 rec = SeqRecord(Seq("ACTG"), id=recid)
38 for idx_hit, hit in enumerate(record.alignments): 38 for idx_hit, hit in enumerate(record.alignments):
39 for idx_hsp, hsp in enumerate(hit.hsps): 39 for idx_hsp, hsp in enumerate(hit.hsps):
40 qualifiers = { 40 qualifiers = {
70 "sbjct_end", 70 "sbjct_end",
71 ): 71 ):
72 qualifiers["blast_" + prop] = getattr(hsp, prop, None) 72 qualifiers["blast_" + prop] = getattr(hsp, prop, None)
73 73
74 desc = hit.title.split(" >")[0] 74 desc = hit.title.split(" >")[0]
75 qualifiers["description"] = desc[desc.index(" ") :] 75 qualifiers["description"] = desc[desc.index(" "):]
76 76
77 # This required a fair bit of sketching out/match to figure out 77 # This required a fair bit of sketching out/match to figure out
78 # the first time. 78 # the first time.
79 # 79 #
80 # the match_start location must account for queries and 80 # the match_start location must account for queries and
159 prev = 0 159 prev = 0
160 fq = "" 160 fq = ""
161 fm = "" 161 fm = ""
162 fs = "" 162 fs = ""
163 for position in re.finditer("-", query): 163 for position in re.finditer("-", query):
164 fq += query[prev : position.start()] 164 fq += query[prev: position.start()]
165 fm += match[prev : position.start()] 165 fm += match[prev: position.start()]
166 fs += subject[prev : position.start()] 166 fs += subject[prev: position.start()]
167 prev = position.start() + 1 167 prev = position.start() + 1
168 fq += query[prev:] 168 fq += query[prev:]
169 fm += match[prev:] 169 fm += match[prev:]
170 fs += subject[prev:] 170 fs += subject[prev:]
171 171