annotate prepare_amplicon_info.py @ 15:ede363a64152 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 51d656555f9635bb8988f9eec904ee66657736e1
author iuc
date Wed, 19 Apr 2023 08:30:27 +0000
parents e319b5b65879
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
2
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
3 # extends ivar trim's amplicon info parsing abilities
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
4 # to include calculation of amplicon regions from
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
5 # sets of nested (more than two) primers
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
6
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
7 import sys
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
8
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
9
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
10 # parse primers and their start positions from BED file
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
11 primer_starts = {}
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
12 with open(sys.argv[1]) as i:
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
13 for line in i:
10
e319b5b65879 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 8ce6fd9aee543d9e62db33a9c95f79d8dc4e6dea
iuc
parents: 5
diff changeset
14 line = line.strip()
e319b5b65879 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 8ce6fd9aee543d9e62db33a9c95f79d8dc4e6dea
iuc
parents: 5
diff changeset
15 if not line:
e319b5b65879 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 8ce6fd9aee543d9e62db33a9c95f79d8dc4e6dea
iuc
parents: 5
diff changeset
16 continue
e319b5b65879 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 8ce6fd9aee543d9e62db33a9c95f79d8dc4e6dea
iuc
parents: 5
diff changeset
17 f = line.split('\t')
5
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
18 try:
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
19 if f[5] == '+':
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
20 primer_starts[f[3]] = int(f[1])
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
21 elif f[5] == '-':
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
22 primer_starts[f[3]] = int(f[2]) - 1
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
23 else:
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
24 raise ValueError()
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
25 except (IndexError, ValueError):
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
26 sys.exit(
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
27 'Primer BED file needs to be TAB-separated with the '
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
28 'following columns: '
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
29 'chrom, chromStart, chromEnd, name, score, strand, '
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
30 'where "chromStart", "chromEnd" need to be integer values '
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
31 'and "strand" needs to be either "+" or "-".'
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
32 )
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
33
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
34 # parse amplicon info and record outer primer names
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
35 with open(sys.argv[2]) as i:
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
36 ret_lines = []
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
37 for line in i:
10
e319b5b65879 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 8ce6fd9aee543d9e62db33a9c95f79d8dc4e6dea
iuc
parents: 5
diff changeset
38 line = line.strip()
e319b5b65879 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 8ce6fd9aee543d9e62db33a9c95f79d8dc4e6dea
iuc
parents: 5
diff changeset
39 if not line:
e319b5b65879 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 8ce6fd9aee543d9e62db33a9c95f79d8dc4e6dea
iuc
parents: 5
diff changeset
40 continue
5
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
41 first = last = None
10
e319b5b65879 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 8ce6fd9aee543d9e62db33a9c95f79d8dc4e6dea
iuc
parents: 5
diff changeset
42 for pname in line.split('\t'):
5
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
43 try:
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
44 primer_start = primer_starts[pname]
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
45 except KeyError:
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
46 sys.exit(
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
47 'Amplicon info with primer name not found in '
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
48 f'primer BED file: "{pname}"'
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
49 )
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
50 if first is None or primer_start < primer_starts[first]:
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
51 first = pname
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
52 if last is None or primer_start > primer_starts[last]:
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
53 last = pname
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
54 if first == last:
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
55 sys.exit(
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
56 line
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
57 + 'is not a proper amplicon info line.'
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
58 )
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
59 ret_lines.append(f'{first}\t{last}\n')
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
60
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
61 # write amended amplicon info
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
62 with open(sys.argv[3], 'w') as o:
5e668dc9f379 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
63 o.writelines(ret_lines)