annotate xmfa2gff3.py @ 0:74093fb62bdf draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
author iuc
date Wed, 17 Aug 2016 14:46:55 -0400
parents
children bca52822843e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
1 #!/usr/bin/env python
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
2 import sys
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
3 from Bio import SeqIO
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
4 from Bio.Seq import Seq
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
5 from Bio.SeqRecord import SeqRecord
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
6 from Bio.SeqFeature import SeqFeature, FeatureLocation
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
7 import argparse
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
8 from BCBio import GFF
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
9 import logging
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
10 logging.basicConfig(level=logging.INFO)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
11 log = logging.getLogger(__name__)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
12
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
13
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
14 def parse_xmfa(xmfa):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
15 """Simple XMFA parser until https://github.com/biopython/biopython/pull/544
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
16 """
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
17 current_lcb = []
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
18 current_seq = {}
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
19 for line in xmfa.readlines():
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
20 if line.startswith('#'):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
21 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
22
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
23 if line.strip() == '=':
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
24 if 'id' in current_seq:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
25 current_lcb.append(current_seq)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
26 current_seq = {}
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
27 yield current_lcb
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
28 current_lcb = []
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
29 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
30 line = line.strip()
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
31 if line.startswith('>'):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
32 if 'id' in current_seq:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
33 current_lcb.append(current_seq)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
34 current_seq = {}
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
35 data = line.strip().split()
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
36 id, loc = data[1].split(':')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
37 start, end = loc.split('-')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
38 current_seq = {
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
39 'rid': '_'.join(data[1:]),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
40 'id': id,
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
41 'start': int(start),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
42 'end': int(end),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
43 'strand': 1 if data[2] == '+' else -1,
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
44 'seq': ''
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
45 }
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
46 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
47 current_seq['seq'] += line.strip()
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
48
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
49
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
50 def _percent_identity(a, b):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
51 """Calculate % identity, ignoring gaps in the host sequence
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
52 """
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
53 match = 0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
54 mismatch = 0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
55 for char_a, char_b in zip(list(a), list(b)):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
56 if char_a == '-':
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
57 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
58 if char_a == char_b:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
59 match += 1
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
60 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
61 mismatch += 1
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
62
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
63 if match + mismatch == 0:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
64 return 0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
65 return 100 * float(match) / (match + mismatch)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
66
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
67
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
68 def _id_tn_dict(sequences):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
69 """Figure out sequence IDs
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
70 """
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
71 label_convert = {}
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
72 if sequences is not None:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
73 if len(sequences) == 1:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
74 for i, record in enumerate(SeqIO.parse(sequences[0], 'fasta')):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
75 label_convert[str(i + 1)] = record.id
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
76 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
77 for i, sequence in enumerate(sequences):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
78 for record in SeqIO.parse(sequence, 'fasta'):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
79 label_convert[str(i + 1)] = record.id
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
80 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
81 return label_convert
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
82
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
83
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
84 def convert_xmfa_to_gff3(xmfa_file, relative_to='1', sequences=None, window_size=1000):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
85 label_convert = _id_tn_dict(sequences)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
86
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
87 lcbs = parse_xmfa(xmfa_file)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
88
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
89 records = [SeqRecord(Seq("A"), id=label_convert.get(relative_to, relative_to))]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
90 for lcb in lcbs:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
91 ids = [seq['id'] for seq in lcb]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
92
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
93 # Doesn't match part of our sequence
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
94 if relative_to not in ids:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
95 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
96
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
97 # Skip sequences that are JUST our "relative_to" genome
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
98 if len(ids) == 1:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
99 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
100
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
101 parent = [seq for seq in lcb if seq['id'] == relative_to][0]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
102 others = [seq for seq in lcb if seq['id'] != relative_to]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
103
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
104 for other in others:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
105 other['feature'] = SeqFeature(
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
106 FeatureLocation(parent['start'], parent['end'] + 1),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
107 type="match", strand=parent['strand'],
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
108 qualifiers={
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
109 "source": "progressiveMauve",
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
110 "target": label_convert.get(other['id'], other['id']),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
111 "ID": label_convert.get(other['id'], 'xmfa_' + other['rid'])
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
112 }
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
113 )
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
114
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
115 for i in range(0, len(lcb[0]['seq']), window_size):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
116 block_seq = parent['seq'][i:i + window_size]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
117 real_window_size = len(block_seq)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
118 real_start = abs(parent['start']) - parent['seq'][0:i].count('-') + i
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
119 real_end = real_start + real_window_size - block_seq.count('-')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
120
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
121 if (real_end - real_start) < 10:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
122 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
123
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
124 if parent['start'] < 0:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
125 strand = -1
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
126 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
127 strand = 1
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
128
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
129 for other in others:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
130 pid = _percent_identity(block_seq, other['seq'][i:i + real_window_size])
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
131 # Ignore 0% identity sequences
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
132 if pid == 0:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
133 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
134 other['feature'].sub_features.append(
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
135 SeqFeature(
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
136 FeatureLocation(real_start, real_end),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
137 type="match_part", strand=strand,
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
138 qualifiers={
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
139 "source": "progressiveMauve",
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
140 'score': pid
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
141 }
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
142 )
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
143 )
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
144
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
145 for other in others:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
146 records[0].features.append(other['feature'])
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
147 return records
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
148
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
149
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
150 if __name__ == '__main__':
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
151 parser = argparse.ArgumentParser(description='Convert XMFA alignments to gff3', prog='xmfa2gff3')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
152 parser.add_argument('xmfa_file', type=file, help='XMFA File')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
153 parser.add_argument('--window_size', type=int, help='Window size for analysis', default=1000)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
154 parser.add_argument('--relative_to', type=str, help='Index of the parent sequence in the MSA', default='1')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
155 parser.add_argument('--sequences', type=file, nargs='+',
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
156 help='Fasta files (in same order) passed to parent for reconstructing proper IDs')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
157 parser.add_argument('--version', action='version', version='%(prog)s 1.0')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
158
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
159 args = parser.parse_args()
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
160
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
161 result = convert_xmfa_to_gff3(**vars(args))
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
162 GFF.write(result, sys.stdout)