Mercurial > repos > iuc > progressivemauve
annotate xmfa2gff3.py @ 4:4d869208bd52 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 525bd8ae422c82ea727907e4d7ce7461d196611f"
author | iuc |
---|---|
date | Wed, 13 Oct 2021 11:56:19 +0000 |
parents | 97a43bcbf44d |
children |
rev | line source |
---|---|
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
1
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
2 import argparse |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
3 import logging |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
4 import sys |
1
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
5 |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
6 from BCBio import GFF |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
7 from Bio import SeqIO |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
8 from Bio.Seq import Seq |
1
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
9 from Bio.SeqFeature import ( |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
10 FeatureLocation, |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
11 SeqFeature |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
12 ) |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
13 from Bio.SeqRecord import SeqRecord |
1
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
14 |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
15 logging.basicConfig(level=logging.INFO) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
16 log = logging.getLogger(__name__) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
17 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
18 |
3
97a43bcbf44d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents:
1
diff
changeset
|
19 # Patch bcbio gff to work around url encoding issue. This is clearly |
97a43bcbf44d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents:
1
diff
changeset
|
20 # sub-optimal but we should transition to the newer library. |
97a43bcbf44d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents:
1
diff
changeset
|
21 def _new_format_keyvals(self, keyvals): |
97a43bcbf44d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents:
1
diff
changeset
|
22 return ";".join(["%s=%s" % (k, ",".join(v)) for (k, v) in sorted(keyvals.items())]) |
97a43bcbf44d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents:
1
diff
changeset
|
23 |
97a43bcbf44d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents:
1
diff
changeset
|
24 |
97a43bcbf44d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents:
1
diff
changeset
|
25 GFF.GFFOutput.GFF3Writer._format_keyvals = _new_format_keyvals |
97a43bcbf44d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents:
1
diff
changeset
|
26 |
97a43bcbf44d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents:
1
diff
changeset
|
27 |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
28 def parse_xmfa(xmfa): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
29 """Simple XMFA parser until https://github.com/biopython/biopython/pull/544 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
30 """ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
31 current_lcb = [] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
32 current_seq = {} |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
33 for line in xmfa.readlines(): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
34 if line.startswith('#'): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
35 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
36 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
37 if line.strip() == '=': |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
38 if 'id' in current_seq: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
39 current_lcb.append(current_seq) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
40 current_seq = {} |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
41 yield current_lcb |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
42 current_lcb = [] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
43 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
44 line = line.strip() |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
45 if line.startswith('>'): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
46 if 'id' in current_seq: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
47 current_lcb.append(current_seq) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
48 current_seq = {} |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
49 data = line.strip().split() |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
50 id, loc = data[1].split(':') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
51 start, end = loc.split('-') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
52 current_seq = { |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
53 'rid': '_'.join(data[1:]), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
54 'id': id, |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
55 'start': int(start), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
56 'end': int(end), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
57 'strand': 1 if data[2] == '+' else -1, |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
58 'seq': '' |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
59 } |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
60 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
61 current_seq['seq'] += line.strip() |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
62 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
63 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
64 def _percent_identity(a, b): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
65 """Calculate % identity, ignoring gaps in the host sequence |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
66 """ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
67 match = 0 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
68 mismatch = 0 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
69 for char_a, char_b in zip(list(a), list(b)): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
70 if char_a == '-': |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
71 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
72 if char_a == char_b: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
73 match += 1 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
74 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
75 mismatch += 1 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
76 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
77 if match + mismatch == 0: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
78 return 0 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
79 return 100 * float(match) / (match + mismatch) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
80 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
81 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
82 def _id_tn_dict(sequences): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
83 """Figure out sequence IDs |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
84 """ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
85 label_convert = {} |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
86 if sequences is not None: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
87 if len(sequences) == 1: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
88 for i, record in enumerate(SeqIO.parse(sequences[0], 'fasta')): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
89 label_convert[str(i + 1)] = record.id |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
90 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
91 for i, sequence in enumerate(sequences): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
92 for record in SeqIO.parse(sequence, 'fasta'): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
93 label_convert[str(i + 1)] = record.id |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
94 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
95 return label_convert |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
96 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
97 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
98 def convert_xmfa_to_gff3(xmfa_file, relative_to='1', sequences=None, window_size=1000): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
99 label_convert = _id_tn_dict(sequences) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
100 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
101 lcbs = parse_xmfa(xmfa_file) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
102 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
103 records = [SeqRecord(Seq("A"), id=label_convert.get(relative_to, relative_to))] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
104 for lcb in lcbs: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
105 ids = [seq['id'] for seq in lcb] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
106 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
107 # Doesn't match part of our sequence |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
108 if relative_to not in ids: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
109 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
110 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
111 # Skip sequences that are JUST our "relative_to" genome |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
112 if len(ids) == 1: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
113 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
114 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
115 parent = [seq for seq in lcb if seq['id'] == relative_to][0] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
116 others = [seq for seq in lcb if seq['id'] != relative_to] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
117 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
118 for other in others: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
119 other['feature'] = SeqFeature( |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
120 FeatureLocation(parent['start'], parent['end'] + 1), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
121 type="match", strand=parent['strand'], |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
122 qualifiers={ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
123 "source": "progressiveMauve", |
3
97a43bcbf44d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents:
1
diff
changeset
|
124 "Target": " ".join(map(str, [label_convert.get(other['id'], other['id']), other['start'], other['end'], '+' if other['strand'] > 0 else '-'])), |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
125 "ID": label_convert.get(other['id'], 'xmfa_' + other['rid']) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
126 } |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
127 ) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
128 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
129 for i in range(0, len(lcb[0]['seq']), window_size): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
130 block_seq = parent['seq'][i:i + window_size] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
131 real_window_size = len(block_seq) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
132 real_start = abs(parent['start']) - parent['seq'][0:i].count('-') + i |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
133 real_end = real_start + real_window_size - block_seq.count('-') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
134 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
135 if (real_end - real_start) < 10: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
136 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
137 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
138 if parent['start'] < 0: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
139 strand = -1 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
140 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
141 strand = 1 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
142 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
143 for other in others: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
144 pid = _percent_identity(block_seq, other['seq'][i:i + real_window_size]) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
145 # Ignore 0% identity sequences |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
146 if pid == 0: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
147 continue |
1
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
148 |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
149 # Support for Biopython 1.68 and above, which removed sub_features |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
150 if not hasattr(other['feature'], "sub_features"): |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
151 other['feature'].sub_features = [] |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
152 other['feature'].sub_features.append( |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
153 SeqFeature( |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
154 FeatureLocation(real_start, real_end), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
155 type="match_part", strand=strand, |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
156 qualifiers={ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
157 "source": "progressiveMauve", |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
158 'score': pid |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
159 } |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
160 ) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
161 ) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
162 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
163 for other in others: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
164 records[0].features.append(other['feature']) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
165 return records |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
166 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
167 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
168 if __name__ == '__main__': |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
169 parser = argparse.ArgumentParser(description='Convert XMFA alignments to gff3', prog='xmfa2gff3') |
3
97a43bcbf44d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents:
1
diff
changeset
|
170 parser.add_argument('xmfa_file', type=argparse.FileType('r'), help='XMFA File') |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
171 parser.add_argument('--window_size', type=int, help='Window size for analysis', default=1000) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
172 parser.add_argument('--relative_to', type=str, help='Index of the parent sequence in the MSA', default='1') |
3
97a43bcbf44d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents:
1
diff
changeset
|
173 parser.add_argument('--sequences', type=argparse.FileType('r'), nargs='+', |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
174 help='Fasta files (in same order) passed to parent for reconstructing proper IDs') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
175 parser.add_argument('--version', action='version', version='%(prog)s 1.0') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
176 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
177 args = parser.parse_args() |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
178 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
179 result = convert_xmfa_to_gff3(**vars(args)) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
180 GFF.write(result, sys.stdout) |