annotate blastxml_to_gapped_gff3.py @ 2:561e827baa5f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
author iuc
date Wed, 15 Nov 2017 15:14:58 -0500
parents 877cd0833221
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
877cd0833221 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 014e89419c7304a50d78e3a3bfcf46f3e174fd7c
iuc
parents: 0
diff changeset
1 #!/usr/bin/env python
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
2 import argparse
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
3 import copy
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
4 import logging
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
5 import re
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
6 import sys
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
7
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
8 from BCBio import GFF
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
9 logging.basicConfig(level=logging.INFO)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
10 log = logging.getLogger(name='blastxml2gff3')
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
11
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
12 __doc__ = """
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
13 BlastXML files, when transformed to GFF3, do not normally show gaps in the
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
14 blast hits. This tool aims to fill that "gap".
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
15 """
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
16
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
17
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
18 def blastxml2gff3(blastxml, min_gap=3, trim=False, trim_end=False, include_seq=False):
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
19 from Bio.Blast import NCBIXML
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
20 from Bio.Seq import Seq
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
21 from Bio.SeqRecord import SeqRecord
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
22 from Bio.SeqFeature import SeqFeature, FeatureLocation
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
23
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
24 blast_records = NCBIXML.parse(blastxml)
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
25 for idx_record, record in enumerate(blast_records):
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
26 # http://www.sequenceontology.org/browser/release_2.4/term/SO:0000343
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
27 match_type = { # Currently we can only handle BLASTN, BLASTP
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
28 'BLASTN': 'nucleotide_match',
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
29 'BLASTP': 'protein_match',
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
30 }.get(record.application, 'match')
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
31
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
32 recid = record.query
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
33 if ' ' in recid:
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
34 recid = recid[0:recid.index(' ')]
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
35
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
36 rec = SeqRecord(Seq("ACTG"), id=recid)
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
37 for idx_hit, hit in enumerate(record.alignments):
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
38 for idx_hsp, hsp in enumerate(hit.hsps):
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
39 qualifiers = {
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
40 "ID": 'b2g.%s.%s.%s' % (idx_record, idx_hit, idx_hsp),
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
41 "source": "blast",
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
42 "score": hsp.expect,
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
43 "accession": hit.accession,
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
44 "hit_id": hit.hit_id,
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
45 "length": hit.length,
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
46 "hit_titles": hit.title.split(' >'),
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
47 }
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
48 if include_seq:
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
49 qualifiers.update({
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
50 'blast_qseq': hsp.query,
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
51 'blast_sseq': hsp.sbjct,
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
52 'blast_mseq': hsp.match,
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
53 })
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
54
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
55 for prop in ('score', 'bits', 'identities', 'positives',
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
56 'gaps', 'align_length', 'strand', 'frame',
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
57 'query_start', 'query_end', 'sbjct_start',
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
58 'sbjct_end'):
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
59 qualifiers['blast_' + prop] = getattr(hsp, prop, None)
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
60
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
61 desc = hit.title.split(' >')[0]
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
62 qualifiers['description'] = desc[desc.index(' '):]
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
63
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
64 # This required a fair bit of sketching out/match to figure out
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
65 # the first time.
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
66 #
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
67 # the match_start location must account for queries and
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
68 # subjecst that start at locations other than 1
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
69 parent_match_start = hsp.query_start - hsp.sbjct_start
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
70 # The end is the start + hit.length because the match itself
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
71 # may be longer than the parent feature, so we use the supplied
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
72 # subject/hit length to calculate the real ending of the target
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
73 # protein.
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
74 parent_match_end = hsp.query_start + hit.length + hsp.query.count('-')
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
75
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
76 # If we trim the left end, we need to trim without losing information.
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
77 used_parent_match_start = parent_match_start
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
78 if trim:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
79 if parent_match_start < 1:
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
80 used_parent_match_start = 0
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
81
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
82 if trim or trim_end:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
83 if parent_match_end > hsp.query_end:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
84 parent_match_end = hsp.query_end + 1
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
85
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
86 # The ``match`` feature will hold one or more ``match_part``s
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
87 top_feature = SeqFeature(
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
88 FeatureLocation(used_parent_match_start, parent_match_end),
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
89 type=match_type, strand=0,
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
90 qualifiers=qualifiers
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
91 )
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
92
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
93 # Unlike the parent feature, ``match_part``s have sources.
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
94 part_qualifiers = {
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
95 "source": "blast",
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
96 }
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
97 top_feature.sub_features = []
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
98 for idx_part, (start, end, cigar) in \
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
99 enumerate(generate_parts(hsp.query, hsp.match,
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
100 hsp.sbjct,
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
101 ignore_under=min_gap)):
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
102 part_qualifiers['Gap'] = cigar
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
103 part_qualifiers['ID'] = qualifiers['ID'] + ('.%s' % idx_part)
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
104
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
105 # Otherwise, we have to account for the subject start's location
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
106 match_part_start = parent_match_start + hsp.sbjct_start + start - 1
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
107
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
108 # We used to use hsp.align_length here, but that includes
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
109 # gaps in the parent sequence
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
110 #
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
111 # Furthermore align_length will give calculation errors in weird places
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
112 # So we just use (end-start) for simplicity
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
113 match_part_end = match_part_start + (end - start)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
114
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
115 top_feature.sub_features.append(
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
116 SeqFeature(
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
117 FeatureLocation(match_part_start, match_part_end),
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
118 type="match_part", strand=0,
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
119 qualifiers=copy.deepcopy(part_qualifiers))
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
120 )
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
121
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
122 rec.features.append(top_feature)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
123 rec.annotations = {}
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
124 yield rec
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
125
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
126
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
127 def __remove_query_gaps(query, match, subject):
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
128 """remove positions in all three based on gaps in query
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
129
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
130 In order to simplify math and calculations...we remove all of the gaps
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
131 based on gap locations in the query sequence::
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
132
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
133 Q:ACTG-ACTGACTG
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
134 S:ACTGAAC---CTG
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
135
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
136 will become::
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
137
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
138 Q:ACTGACTGACTG
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
139 S:ACTGAC---CTG
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
140
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
141 which greatly simplifies the process of identifying the correct location
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
142 for a match_part
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
143 """
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
144 prev = 0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
145 fq = ''
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
146 fm = ''
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
147 fs = ''
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
148 for position in re.finditer('-', query):
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
149 fq += query[prev:position.start()]
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
150 fm += match[prev:position.start()]
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
151 fs += subject[prev:position.start()]
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
152 prev = position.start() + 1
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
153 fq += query[prev:]
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
154 fm += match[prev:]
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
155 fs += subject[prev:]
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
156
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
157 return (fq, fm, fs)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
158
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
159
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
160 def generate_parts(query, match, subject, ignore_under=3):
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
161 region_q = []
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
162 region_m = []
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
163 region_s = []
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
164
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
165 (query, match, subject) = __remove_query_gaps(query, match, subject)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
166
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
167 region_start = -1
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
168 region_end = -1
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
169 mismatch_count = 0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
170 for i, (q, m, s) in enumerate(zip(query, match, subject)):
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
171
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
172 # If we have a match
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
173 if m != ' ' or m == '+':
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
174 if region_start == -1:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
175 region_start = i
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
176 # It's a new region, we need to reset or it's pre-seeded with
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
177 # spaces
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
178 region_q = []
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
179 region_m = []
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
180 region_s = []
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
181 region_end = i
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
182 mismatch_count = 0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
183 else:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
184 mismatch_count += 1
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
185
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
186 region_q.append(q)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
187 region_m.append(m)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
188 region_s.append(s)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
189
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
190 if mismatch_count >= ignore_under and region_start != -1 and region_end != -1:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
191 region_q = region_q[0:-ignore_under]
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
192 region_m = region_m[0:-ignore_under]
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
193 region_s = region_s[0:-ignore_under]
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
194 yield region_start, region_end + 1, \
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
195 cigar_from_string(region_q, region_m, region_s, strict_m=True)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
196 region_q = []
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
197 region_m = []
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
198 region_s = []
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
199
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
200 region_start = -1
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
201 region_end = -1
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
202 mismatch_count = 0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
203
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
204 yield region_start, region_end + 1, \
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
205 cigar_from_string(region_q, region_m, region_s, strict_m=True)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
206
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
207
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
208 def _qms_to_matches(query, match, subject, strict_m=True):
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
209 matchline = []
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
210
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
211 for (q, m, s) in zip(query, match, subject):
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
212 ret = ''
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
213
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
214 if m != ' ' or m == '+':
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
215 ret = '='
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
216 elif m == ' ':
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
217 if q == '-':
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
218 ret = 'D'
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
219 elif s == '-':
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
220 ret = 'I'
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
221 else:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
222 ret = 'X'
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
223 else:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
224 log.warn("Bad data: \n\t%s\n\t%s\n\t%s\n" % (query, match, subject))
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
225
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
226 if strict_m:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
227 if ret == '=' or ret == 'X':
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
228 ret = 'M'
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
229
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
230 matchline.append(ret)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
231 return matchline
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
232
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
233
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
234 def _matchline_to_cigar(matchline):
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
235 cigar_line = []
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
236 last_char = matchline[0]
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
237 count = 0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
238 for char in matchline:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
239 if char == last_char:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
240 count += 1
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
241 else:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
242 cigar_line.append("%s%s" % (last_char, count))
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
243 count = 1
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
244 last_char = char
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
245 cigar_line.append("%s%s" % (last_char, count))
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
246 return ' '.join(cigar_line)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
247
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
248
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
249 def cigar_from_string(query, match, subject, strict_m=True):
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
250 matchline = _qms_to_matches(query, match, subject, strict_m=strict_m)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
251 if len(matchline) > 0:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
252 return _matchline_to_cigar(matchline)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
253 else:
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
254 return ""
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
255
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
256
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
257 if __name__ == '__main__':
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
258 parser = argparse.ArgumentParser(description='Convert Blast XML to gapped GFF3', epilog='')
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
259 parser.add_argument('blastxml', type=argparse.FileType("r"), help='Blast XML Output')
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
260 parser.add_argument('--min_gap', type=int, help='Maximum gap size before generating a new match_part', default=3)
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
261 parser.add_argument('--trim', action='store_true', help='Trim blast hits to be only as long as the parent feature')
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
262 parser.add_argument('--trim_end', action='store_true', help='Cut blast results off at end of gene')
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
263 parser.add_argument('--include_seq', action='store_true', help='Include sequence')
0
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
264 args = parser.parse_args()
bd47051afe98 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 8f38145c94ecb1e23c3ff6f0243213dc49d2287e
iuc
parents:
diff changeset
265
2
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
266 for rec in blastxml2gff3(**vars(args)):
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
267 if len(rec.features):
561e827baa5f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blastxml_to_gapped_gff3 commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents: 1
diff changeset
268 GFF.write([rec], sys.stdout)