Mercurial > repos > iuc > progressivemauve
annotate xmfa2gff3.py @ 2:bdb752f3c6bb draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 7e386c24c2f9f0293abd4877eff708c1fa285751
author | iuc |
---|---|
date | Thu, 07 Feb 2019 05:24:24 -0500 |
parents | bca52822843e |
children | 97a43bcbf44d |
rev | line source |
---|---|
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
1
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
2 import argparse |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
3 import logging |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
4 import sys |
1
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
5 |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
6 from BCBio import GFF |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
7 from Bio import SeqIO |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
8 from Bio.Seq import Seq |
1
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
9 from Bio.SeqFeature import ( |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
10 FeatureLocation, |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
11 SeqFeature |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
12 ) |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
13 from Bio.SeqRecord import SeqRecord |
1
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
14 |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
15 logging.basicConfig(level=logging.INFO) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
16 log = logging.getLogger(__name__) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
17 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
18 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
19 def parse_xmfa(xmfa): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
20 """Simple XMFA parser until https://github.com/biopython/biopython/pull/544 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
21 """ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
22 current_lcb = [] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
23 current_seq = {} |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
24 for line in xmfa.readlines(): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
25 if line.startswith('#'): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
26 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
27 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
28 if line.strip() == '=': |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
29 if 'id' in current_seq: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
30 current_lcb.append(current_seq) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
31 current_seq = {} |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
32 yield current_lcb |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
33 current_lcb = [] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
34 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
35 line = line.strip() |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
36 if line.startswith('>'): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
37 if 'id' in current_seq: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
38 current_lcb.append(current_seq) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
39 current_seq = {} |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
40 data = line.strip().split() |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
41 id, loc = data[1].split(':') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
42 start, end = loc.split('-') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
43 current_seq = { |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
44 'rid': '_'.join(data[1:]), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
45 'id': id, |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
46 'start': int(start), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
47 'end': int(end), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
48 'strand': 1 if data[2] == '+' else -1, |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
49 'seq': '' |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
50 } |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
51 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
52 current_seq['seq'] += line.strip() |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
53 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
54 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
55 def _percent_identity(a, b): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
56 """Calculate % identity, ignoring gaps in the host sequence |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
57 """ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
58 match = 0 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
59 mismatch = 0 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
60 for char_a, char_b in zip(list(a), list(b)): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
61 if char_a == '-': |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
62 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
63 if char_a == char_b: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
64 match += 1 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
65 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
66 mismatch += 1 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
67 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
68 if match + mismatch == 0: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
69 return 0 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
70 return 100 * float(match) / (match + mismatch) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
71 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
72 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
73 def _id_tn_dict(sequences): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
74 """Figure out sequence IDs |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
75 """ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
76 label_convert = {} |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
77 if sequences is not None: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
78 if len(sequences) == 1: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
79 for i, record in enumerate(SeqIO.parse(sequences[0], 'fasta')): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
80 label_convert[str(i + 1)] = record.id |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
81 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
82 for i, sequence in enumerate(sequences): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
83 for record in SeqIO.parse(sequence, 'fasta'): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
84 label_convert[str(i + 1)] = record.id |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
85 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
86 return label_convert |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
87 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
88 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
89 def convert_xmfa_to_gff3(xmfa_file, relative_to='1', sequences=None, window_size=1000): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
90 label_convert = _id_tn_dict(sequences) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
91 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
92 lcbs = parse_xmfa(xmfa_file) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
93 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
94 records = [SeqRecord(Seq("A"), id=label_convert.get(relative_to, relative_to))] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
95 for lcb in lcbs: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
96 ids = [seq['id'] for seq in lcb] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
97 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
98 # Doesn't match part of our sequence |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
99 if relative_to not in ids: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
100 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
101 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
102 # Skip sequences that are JUST our "relative_to" genome |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
103 if len(ids) == 1: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
104 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
105 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
106 parent = [seq for seq in lcb if seq['id'] == relative_to][0] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
107 others = [seq for seq in lcb if seq['id'] != relative_to] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
108 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
109 for other in others: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
110 other['feature'] = SeqFeature( |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
111 FeatureLocation(parent['start'], parent['end'] + 1), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
112 type="match", strand=parent['strand'], |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
113 qualifiers={ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
114 "source": "progressiveMauve", |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
115 "target": label_convert.get(other['id'], other['id']), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
116 "ID": label_convert.get(other['id'], 'xmfa_' + other['rid']) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
117 } |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
118 ) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
119 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
120 for i in range(0, len(lcb[0]['seq']), window_size): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
121 block_seq = parent['seq'][i:i + window_size] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
122 real_window_size = len(block_seq) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
123 real_start = abs(parent['start']) - parent['seq'][0:i].count('-') + i |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
124 real_end = real_start + real_window_size - block_seq.count('-') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
125 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
126 if (real_end - real_start) < 10: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
127 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
128 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
129 if parent['start'] < 0: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
130 strand = -1 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
131 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
132 strand = 1 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
133 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
134 for other in others: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
135 pid = _percent_identity(block_seq, other['seq'][i:i + real_window_size]) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
136 # Ignore 0% identity sequences |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
137 if pid == 0: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
138 continue |
1
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
139 |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
140 # Support for Biopython 1.68 and above, which removed sub_features |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
141 if not hasattr(other['feature'], "sub_features"): |
bca52822843e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
0
diff
changeset
|
142 other['feature'].sub_features = [] |
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
143 other['feature'].sub_features.append( |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
144 SeqFeature( |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
145 FeatureLocation(real_start, real_end), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
146 type="match_part", strand=strand, |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
147 qualifiers={ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
148 "source": "progressiveMauve", |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
149 'score': pid |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
150 } |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
151 ) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
152 ) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
153 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
154 for other in others: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
155 records[0].features.append(other['feature']) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
156 return records |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
157 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
158 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
159 if __name__ == '__main__': |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
160 parser = argparse.ArgumentParser(description='Convert XMFA alignments to gff3', prog='xmfa2gff3') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
161 parser.add_argument('xmfa_file', type=file, help='XMFA File') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
162 parser.add_argument('--window_size', type=int, help='Window size for analysis', default=1000) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
163 parser.add_argument('--relative_to', type=str, help='Index of the parent sequence in the MSA', default='1') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
164 parser.add_argument('--sequences', type=file, nargs='+', |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
165 help='Fasta files (in same order) passed to parent for reconstructing proper IDs') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
166 parser.add_argument('--version', action='version', version='%(prog)s 1.0') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
167 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
168 args = parser.parse_args() |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
169 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
170 result = convert_xmfa_to_gff3(**vars(args)) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
171 GFF.write(result, sys.stdout) |