Mercurial > repos > iuc > progressivemauve
annotate xmfa2gff3.py @ 0:74093fb62bdf draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
author | iuc |
---|---|
date | Wed, 17 Aug 2016 14:46:55 -0400 |
parents | |
children | bca52822843e |
rev | line source |
---|---|
0
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
2 import sys |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
3 from Bio import SeqIO |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
4 from Bio.Seq import Seq |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
5 from Bio.SeqRecord import SeqRecord |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
6 from Bio.SeqFeature import SeqFeature, FeatureLocation |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
7 import argparse |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
8 from BCBio import GFF |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
9 import logging |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
10 logging.basicConfig(level=logging.INFO) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
11 log = logging.getLogger(__name__) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
12 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
13 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
14 def parse_xmfa(xmfa): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
15 """Simple XMFA parser until https://github.com/biopython/biopython/pull/544 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
16 """ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
17 current_lcb = [] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
18 current_seq = {} |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
19 for line in xmfa.readlines(): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
20 if line.startswith('#'): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
21 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
22 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
23 if line.strip() == '=': |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
24 if 'id' in current_seq: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
25 current_lcb.append(current_seq) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
26 current_seq = {} |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
27 yield current_lcb |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
28 current_lcb = [] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
29 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
30 line = line.strip() |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
31 if line.startswith('>'): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
32 if 'id' in current_seq: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
33 current_lcb.append(current_seq) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
34 current_seq = {} |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
35 data = line.strip().split() |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
36 id, loc = data[1].split(':') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
37 start, end = loc.split('-') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
38 current_seq = { |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
39 'rid': '_'.join(data[1:]), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
40 'id': id, |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
41 'start': int(start), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
42 'end': int(end), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
43 'strand': 1 if data[2] == '+' else -1, |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
44 'seq': '' |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
45 } |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
46 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
47 current_seq['seq'] += line.strip() |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
48 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
49 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
50 def _percent_identity(a, b): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
51 """Calculate % identity, ignoring gaps in the host sequence |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
52 """ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
53 match = 0 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
54 mismatch = 0 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
55 for char_a, char_b in zip(list(a), list(b)): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
56 if char_a == '-': |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
57 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
58 if char_a == char_b: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
59 match += 1 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
60 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
61 mismatch += 1 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
62 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
63 if match + mismatch == 0: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
64 return 0 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
65 return 100 * float(match) / (match + mismatch) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
66 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
67 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
68 def _id_tn_dict(sequences): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
69 """Figure out sequence IDs |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
70 """ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
71 label_convert = {} |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
72 if sequences is not None: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
73 if len(sequences) == 1: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
74 for i, record in enumerate(SeqIO.parse(sequences[0], 'fasta')): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
75 label_convert[str(i + 1)] = record.id |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
76 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
77 for i, sequence in enumerate(sequences): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
78 for record in SeqIO.parse(sequence, 'fasta'): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
79 label_convert[str(i + 1)] = record.id |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
80 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
81 return label_convert |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
82 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
83 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
84 def convert_xmfa_to_gff3(xmfa_file, relative_to='1', sequences=None, window_size=1000): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
85 label_convert = _id_tn_dict(sequences) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
86 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
87 lcbs = parse_xmfa(xmfa_file) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
88 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
89 records = [SeqRecord(Seq("A"), id=label_convert.get(relative_to, relative_to))] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
90 for lcb in lcbs: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
91 ids = [seq['id'] for seq in lcb] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
92 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
93 # Doesn't match part of our sequence |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
94 if relative_to not in ids: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
95 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
96 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
97 # Skip sequences that are JUST our "relative_to" genome |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
98 if len(ids) == 1: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
99 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
100 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
101 parent = [seq for seq in lcb if seq['id'] == relative_to][0] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
102 others = [seq for seq in lcb if seq['id'] != relative_to] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
103 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
104 for other in others: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
105 other['feature'] = SeqFeature( |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
106 FeatureLocation(parent['start'], parent['end'] + 1), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
107 type="match", strand=parent['strand'], |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
108 qualifiers={ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
109 "source": "progressiveMauve", |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
110 "target": label_convert.get(other['id'], other['id']), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
111 "ID": label_convert.get(other['id'], 'xmfa_' + other['rid']) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
112 } |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
113 ) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
114 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
115 for i in range(0, len(lcb[0]['seq']), window_size): |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
116 block_seq = parent['seq'][i:i + window_size] |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
117 real_window_size = len(block_seq) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
118 real_start = abs(parent['start']) - parent['seq'][0:i].count('-') + i |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
119 real_end = real_start + real_window_size - block_seq.count('-') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
120 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
121 if (real_end - real_start) < 10: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
122 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
123 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
124 if parent['start'] < 0: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
125 strand = -1 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
126 else: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
127 strand = 1 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
128 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
129 for other in others: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
130 pid = _percent_identity(block_seq, other['seq'][i:i + real_window_size]) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
131 # Ignore 0% identity sequences |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
132 if pid == 0: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
133 continue |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
134 other['feature'].sub_features.append( |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
135 SeqFeature( |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
136 FeatureLocation(real_start, real_end), |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
137 type="match_part", strand=strand, |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
138 qualifiers={ |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
139 "source": "progressiveMauve", |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
140 'score': pid |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
141 } |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
142 ) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
143 ) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
144 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
145 for other in others: |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
146 records[0].features.append(other['feature']) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
147 return records |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
148 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
149 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
150 if __name__ == '__main__': |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
151 parser = argparse.ArgumentParser(description='Convert XMFA alignments to gff3', prog='xmfa2gff3') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
152 parser.add_argument('xmfa_file', type=file, help='XMFA File') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
153 parser.add_argument('--window_size', type=int, help='Window size for analysis', default=1000) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
154 parser.add_argument('--relative_to', type=str, help='Index of the parent sequence in the MSA', default='1') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
155 parser.add_argument('--sequences', type=file, nargs='+', |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
156 help='Fasta files (in same order) passed to parent for reconstructing proper IDs') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
157 parser.add_argument('--version', action='version', version='%(prog)s 1.0') |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
158 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
159 args = parser.parse_args() |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
160 |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
161 result = convert_xmfa_to_gff3(**vars(args)) |
74093fb62bdf
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff
changeset
|
162 GFF.write(result, sys.stdout) |