Mercurial > repos > iuc > jbrowse
annotate blastxml_to_gapped_gff3.py @ 31:2bb2e07a7a21 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 12dff0abf006f6c81f6462cdf4ea9c0c740d1e9c
author | iuc |
---|---|
date | Mon, 15 Apr 2019 10:31:26 -0400 |
parents | ff11d442feed |
children |
rev | line source |
---|---|
14
18be2d72fdee
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0bfb0de98c918860c21808e8832caad9f0535975
iuc
parents:
12
diff
changeset
|
1 #!/usr/bin/env python |
8
ad4b9d7eae6a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 9a243c616a4a3156347e38fdb5f35863ae5133f9
iuc
parents:
3
diff
changeset
|
2 import argparse |
ad4b9d7eae6a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 9a243c616a4a3156347e38fdb5f35863ae5133f9
iuc
parents:
3
diff
changeset
|
3 import copy |
ad4b9d7eae6a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 9a243c616a4a3156347e38fdb5f35863ae5133f9
iuc
parents:
3
diff
changeset
|
4 import logging |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
5 import re |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
6 import sys |
8
ad4b9d7eae6a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 9a243c616a4a3156347e38fdb5f35863ae5133f9
iuc
parents:
3
diff
changeset
|
7 |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
8 from BCBio import GFF |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
9 logging.basicConfig(level=logging.INFO) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
10 log = logging.getLogger(name='blastxml2gff3') |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
11 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
12 __doc__ = """ |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
13 BlastXML files, when transformed to GFF3, do not normally show gaps in the |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
14 blast hits. This tool aims to fill that "gap". |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
15 """ |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
16 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
17 |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
18 def blastxml2gff3(blastxml, min_gap=3, trim=False, trim_end=False, include_seq=False): |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
19 from Bio.Blast import NCBIXML |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
20 from Bio.Seq import Seq |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
21 from Bio.SeqRecord import SeqRecord |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
22 from Bio.SeqFeature import SeqFeature, FeatureLocation |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
23 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
24 blast_records = NCBIXML.parse(blastxml) |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
25 for idx_record, record in enumerate(blast_records): |
3 | 26 # http://www.sequenceontology.org/browser/release_2.4/term/SO:0000343 |
27 match_type = { # Currently we can only handle BLASTN, BLASTP | |
28 'BLASTN': 'nucleotide_match', | |
29 'BLASTP': 'protein_match', | |
30 }.get(record.application, 'match') | |
31 | |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
32 recid = record.query |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
33 if ' ' in recid: |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
34 recid = recid[0:recid.index(' ')] |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
35 |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
36 rec = SeqRecord(Seq("ACTG"), id=recid) |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
37 for idx_hit, hit in enumerate(record.alignments): |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
38 for idx_hsp, hsp in enumerate(hit.hsps): |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
39 qualifiers = { |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
40 "ID": 'b2g.%s.%s.%s' % (idx_record, idx_hit, idx_hsp), |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
41 "source": "blast", |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
42 "score": hsp.expect, |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
43 "accession": hit.accession, |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
44 "hit_id": hit.hit_id, |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
45 "length": hit.length, |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
46 "hit_titles": hit.title.split(' >'), |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
47 } |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
48 if include_seq: |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
49 qualifiers.update({ |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
50 'blast_qseq': hsp.query, |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
51 'blast_sseq': hsp.sbjct, |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
52 'blast_mseq': hsp.match, |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
53 }) |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
54 |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
55 for prop in ('score', 'bits', 'identities', 'positives', |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
56 'gaps', 'align_length', 'strand', 'frame', |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
57 'query_start', 'query_end', 'sbjct_start', |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
58 'sbjct_end'): |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
59 qualifiers['blast_' + prop] = getattr(hsp, prop, None) |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
60 |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
61 desc = hit.title.split(' >')[0] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
62 qualifiers['description'] = desc[desc.index(' '):] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
63 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
64 # This required a fair bit of sketching out/match to figure out |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
65 # the first time. |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
66 # |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
67 # the match_start location must account for queries and |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
68 # subjecst that start at locations other than 1 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
69 parent_match_start = hsp.query_start - hsp.sbjct_start |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
70 # The end is the start + hit.length because the match itself |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
71 # may be longer than the parent feature, so we use the supplied |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
72 # subject/hit length to calculate the real ending of the target |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
73 # protein. |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
74 parent_match_end = hsp.query_start + hit.length + hsp.query.count('-') |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
75 |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
76 # If we trim the left end, we need to trim without losing information. |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
77 used_parent_match_start = parent_match_start |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
78 if trim: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
79 if parent_match_start < 1: |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
80 used_parent_match_start = 0 |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
81 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
82 if trim or trim_end: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
83 if parent_match_end > hsp.query_end: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
84 parent_match_end = hsp.query_end + 1 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
85 |
3 | 86 # The ``match`` feature will hold one or more ``match_part``s |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
87 top_feature = SeqFeature( |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
88 FeatureLocation(used_parent_match_start, parent_match_end), |
3 | 89 type=match_type, strand=0, |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
90 qualifiers=qualifiers |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
91 ) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
92 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
93 # Unlike the parent feature, ``match_part``s have sources. |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
94 part_qualifiers = { |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
95 "source": "blast", |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
96 } |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
97 top_feature.sub_features = [] |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
98 for idx_part, (start, end, cigar) in \ |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
99 enumerate(generate_parts(hsp.query, hsp.match, |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
100 hsp.sbjct, |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
101 ignore_under=min_gap)): |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
102 part_qualifiers['Gap'] = cigar |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
103 part_qualifiers['ID'] = qualifiers['ID'] + ('.%s' % idx_part) |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
104 |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
105 # Otherwise, we have to account for the subject start's location |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
106 match_part_start = parent_match_start + hsp.sbjct_start + start - 1 |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
107 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
108 # We used to use hsp.align_length here, but that includes |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
109 # gaps in the parent sequence |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
110 # |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
111 # Furthermore align_length will give calculation errors in weird places |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
112 # So we just use (end-start) for simplicity |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
113 match_part_end = match_part_start + (end - start) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
114 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
115 top_feature.sub_features.append( |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
116 SeqFeature( |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
117 FeatureLocation(match_part_start, match_part_end), |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
118 type="match_part", strand=0, |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
119 qualifiers=copy.deepcopy(part_qualifiers)) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
120 ) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
121 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
122 rec.features.append(top_feature) |
3 | 123 rec.annotations = {} |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
124 yield rec |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
125 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
126 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
127 def __remove_query_gaps(query, match, subject): |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
128 """remove positions in all three based on gaps in query |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
129 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
130 In order to simplify math and calculations...we remove all of the gaps |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
131 based on gap locations in the query sequence:: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
132 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
133 Q:ACTG-ACTGACTG |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
134 S:ACTGAAC---CTG |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
135 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
136 will become:: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
137 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
138 Q:ACTGACTGACTG |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
139 S:ACTGAC---CTG |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
140 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
141 which greatly simplifies the process of identifying the correct location |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
142 for a match_part |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
143 """ |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
144 prev = 0 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
145 fq = '' |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
146 fm = '' |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
147 fs = '' |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
148 for position in re.finditer('-', query): |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
149 fq += query[prev:position.start()] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
150 fm += match[prev:position.start()] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
151 fs += subject[prev:position.start()] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
152 prev = position.start() + 1 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
153 fq += query[prev:] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
154 fm += match[prev:] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
155 fs += subject[prev:] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
156 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
157 return (fq, fm, fs) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
158 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
159 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
160 def generate_parts(query, match, subject, ignore_under=3): |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
161 region_q = [] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
162 region_m = [] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
163 region_s = [] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
164 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
165 (query, match, subject) = __remove_query_gaps(query, match, subject) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
166 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
167 region_start = -1 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
168 region_end = -1 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
169 mismatch_count = 0 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
170 for i, (q, m, s) in enumerate(zip(query, match, subject)): |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
171 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
172 # If we have a match |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
173 if m != ' ' or m == '+': |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
174 if region_start == -1: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
175 region_start = i |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
176 # It's a new region, we need to reset or it's pre-seeded with |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
177 # spaces |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
178 region_q = [] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
179 region_m = [] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
180 region_s = [] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
181 region_end = i |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
182 mismatch_count = 0 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
183 else: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
184 mismatch_count += 1 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
185 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
186 region_q.append(q) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
187 region_m.append(m) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
188 region_s.append(s) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
189 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
190 if mismatch_count >= ignore_under and region_start != -1 and region_end != -1: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
191 region_q = region_q[0:-ignore_under] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
192 region_m = region_m[0:-ignore_under] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
193 region_s = region_s[0:-ignore_under] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
194 yield region_start, region_end + 1, \ |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
195 cigar_from_string(region_q, region_m, region_s, strict_m=True) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
196 region_q = [] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
197 region_m = [] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
198 region_s = [] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
199 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
200 region_start = -1 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
201 region_end = -1 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
202 mismatch_count = 0 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
203 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
204 yield region_start, region_end + 1, \ |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
205 cigar_from_string(region_q, region_m, region_s, strict_m=True) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
206 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
207 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
208 def _qms_to_matches(query, match, subject, strict_m=True): |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
209 matchline = [] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
210 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
211 for (q, m, s) in zip(query, match, subject): |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
212 ret = '' |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
213 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
214 if m != ' ' or m == '+': |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
215 ret = '=' |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
216 elif m == ' ': |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
217 if q == '-': |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
218 ret = 'D' |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
219 elif s == '-': |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
220 ret = 'I' |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
221 else: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
222 ret = 'X' |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
223 else: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
224 log.warn("Bad data: \n\t%s\n\t%s\n\t%s\n" % (query, match, subject)) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
225 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
226 if strict_m: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
227 if ret == '=' or ret == 'X': |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
228 ret = 'M' |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
229 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
230 matchline.append(ret) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
231 return matchline |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
232 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
233 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
234 def _matchline_to_cigar(matchline): |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
235 cigar_line = [] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
236 last_char = matchline[0] |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
237 count = 0 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
238 for char in matchline: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
239 if char == last_char: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
240 count += 1 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
241 else: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
242 cigar_line.append("%s%s" % (last_char, count)) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
243 count = 1 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
244 last_char = char |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
245 cigar_line.append("%s%s" % (last_char, count)) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
246 return ' '.join(cigar_line) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
247 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
248 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
249 def cigar_from_string(query, match, subject, strict_m=True): |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
250 matchline = _qms_to_matches(query, match, subject, strict_m=strict_m) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
251 if len(matchline) > 0: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
252 return _matchline_to_cigar(matchline) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
253 else: |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
254 return "" |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
255 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
256 |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
257 if __name__ == '__main__': |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
258 parser = argparse.ArgumentParser(description='Convert Blast XML to gapped GFF3', epilog='') |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
259 parser.add_argument('blastxml', type=argparse.FileType("r"), help='Blast XML Output') |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
260 parser.add_argument('--min_gap', type=int, help='Maximum gap size before generating a new match_part', default=3) |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
261 parser.add_argument('--trim', action='store_true', help='Trim blast hits to be only as long as the parent feature') |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
262 parser.add_argument('--trim_end', action='store_true', help='Cut blast results off at end of gene') |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
263 parser.add_argument('--include_seq', action='store_true', help='Include sequence') |
1
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
264 args = parser.parse_args() |
497c6bb3b717
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
iuc
parents:
diff
changeset
|
265 |
17
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
266 for rec in blastxml2gff3(**vars(args)): |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
267 if len(rec.features): |
ff11d442feed
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 908f16ea4eb082227437dc93e06e8cb742f5a257
iuc
parents:
14
diff
changeset
|
268 GFF.write([rec], sys.stdout) |