comparison write_amplicon_info_file.py @ 9:8d36959b000d draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
author iuc
date Fri, 20 Aug 2021 20:34:11 +0000
parents 28a6f1908fcc
children ee29337f905c
comparison
equal deleted inserted replaced
8:28a6f1908fcc 9:8d36959b000d
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import argparse 3 import argparse
4 import re 4 import re
5 5
6 AMPLICON_NAME_RE = r'.*_(?P<num>\d+)_[^0-9]*(?P<name>L(?:EFT)?|R(?:IGHT)?)'
7 6
8 7 AMPLICON_PAT = re.compile(r'.*_(?P<num>\d+).*_(?P<name>L(?:EFT)?|R(?:IGHT)?)')
9 def primer_info_to_position(name):
10 position = 0
11 re_match = re.match(AMPLICON_NAME_RE, name)
12 if re_match is None:
13 raise ValueError("{} does not match expected amplicon name format".format(name))
14 side = re_match.group('name')
15 num = re_match.group('num')
16 if side == 'RIGHT' or side == 'R':
17 position += 1000
18 if num is not None:
19 position += int(num)
20 return position
21 8
22 9
23 def write_amplicon_info_file(bed_file, amplicon_info_file): 10 def write_amplicon_info_file(bed_file, amplicon_info_file):
24 amplicon_sets = {} 11 amplicon_sets = {}
25 amplicon_ids = set()
26 for line in bed_file: 12 for line in bed_file:
27 fields = line.strip().split('\t') 13 fields = line.strip().split('\t')
14 start = int(fields[1])
28 name = fields[3] 15 name = fields[3]
29 re_match = re.match(AMPLICON_NAME_RE, name) 16 re_match = AMPLICON_PAT.match(name)
30 if re_match is None: 17 if re_match is None:
31 raise ValueError("{} does not match expected amplicon name format".format(name)) 18 raise ValueError(
19 '{} does not match expected amplicon name format'.format(name)
20 )
32 amplicon_id = int(re_match.group('num')) 21 amplicon_id = int(re_match.group('num'))
33 amplicon_set = amplicon_sets.get(amplicon_id, []) 22 amplicon_set = amplicon_sets.get(amplicon_id, [])
34 amplicon_set.append(name) 23 amplicon_set.append((name, start))
35 amplicon_sets[amplicon_id] = amplicon_set 24 amplicon_sets[amplicon_id] = amplicon_set
36 amplicon_ids.add(amplicon_id)
37 25
38 for id in sorted(list(amplicon_ids)): 26 # write amplicons sorted by number with primers sorted by start position
39 amplicon_info = '\t'.join([name for name in sorted(amplicon_sets[id], key=primer_info_to_position)]) + '\n' 27 for id in sorted(amplicon_sets):
28 amplicon_info = '\t'.join(
29 [name for name, start in sorted(
30 amplicon_sets[id], key=lambda x: x[1]
31 )]
32 ) + '\n'
40 amplicon_info_file.write(amplicon_info) 33 amplicon_info_file.write(amplicon_info)
41 amplicon_info_file.close() 34 amplicon_info_file.close()
42 35
43 36
44 if __name__ == '__main__': 37 if __name__ == '__main__':
45 parser = argparse.ArgumentParser(description='Write an amplicon info file for iVar from a BED file describing primer positions') 38 parser = argparse.ArgumentParser(
46 parser.add_argument('bed_file', type=argparse.FileType(), help='Primer BED file') 39 description='Write an amplicon info file for iVar '
47 parser.add_argument('amplicon_info_file', type=argparse.FileType('w'), help='Output file: amplicon info file in TSV format') 40 'from a BED file describing primer positions'
41 )
42 parser.add_argument(
43 'bed_file', type=argparse.FileType(), help='Primer BED file'
44 )
45 parser.add_argument(
46 'amplicon_info_file', type=argparse.FileType('w'),
47 help='Output file: amplicon info file in TSV format'
48 )
48 args = parser.parse_args() 49 args = parser.parse_args()
49 50
50 write_amplicon_info_file(args.bed_file, args.amplicon_info_file) 51 write_amplicon_info_file(args.bed_file, args.amplicon_info_file)