Mercurial > repos > iuc > ivar_trim
comparison write_amplicon_info_file.py @ 9:c092052ed673 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
| author | iuc |
|---|---|
| date | Fri, 20 Aug 2021 20:35:46 +0000 |
| parents | 397e5f0eb3ef |
| children | 5671e1d3d5ee |
comparison
equal
deleted
inserted
replaced
| 8:397e5f0eb3ef | 9:c092052ed673 |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 | 2 |
| 3 import argparse | 3 import argparse |
| 4 import re | 4 import re |
| 5 | 5 |
| 6 AMPLICON_NAME_RE = r'.*_(?P<num>\d+)_[^0-9]*(?P<name>L(?:EFT)?|R(?:IGHT)?)' | |
| 7 | 6 |
| 8 | 7 AMPLICON_PAT = re.compile(r'.*_(?P<num>\d+).*_(?P<name>L(?:EFT)?|R(?:IGHT)?)') |
| 9 def primer_info_to_position(name): | |
| 10 position = 0 | |
| 11 re_match = re.match(AMPLICON_NAME_RE, name) | |
| 12 if re_match is None: | |
| 13 raise ValueError("{} does not match expected amplicon name format".format(name)) | |
| 14 side = re_match.group('name') | |
| 15 num = re_match.group('num') | |
| 16 if side == 'RIGHT' or side == 'R': | |
| 17 position += 1000 | |
| 18 if num is not None: | |
| 19 position += int(num) | |
| 20 return position | |
| 21 | 8 |
| 22 | 9 |
| 23 def write_amplicon_info_file(bed_file, amplicon_info_file): | 10 def write_amplicon_info_file(bed_file, amplicon_info_file): |
| 24 amplicon_sets = {} | 11 amplicon_sets = {} |
| 25 amplicon_ids = set() | |
| 26 for line in bed_file: | 12 for line in bed_file: |
| 27 fields = line.strip().split('\t') | 13 fields = line.strip().split('\t') |
| 14 start = int(fields[1]) | |
| 28 name = fields[3] | 15 name = fields[3] |
| 29 re_match = re.match(AMPLICON_NAME_RE, name) | 16 re_match = AMPLICON_PAT.match(name) |
| 30 if re_match is None: | 17 if re_match is None: |
| 31 raise ValueError("{} does not match expected amplicon name format".format(name)) | 18 raise ValueError( |
| 19 '{} does not match expected amplicon name format'.format(name) | |
| 20 ) | |
| 32 amplicon_id = int(re_match.group('num')) | 21 amplicon_id = int(re_match.group('num')) |
| 33 amplicon_set = amplicon_sets.get(amplicon_id, []) | 22 amplicon_set = amplicon_sets.get(amplicon_id, []) |
| 34 amplicon_set.append(name) | 23 amplicon_set.append((name, start)) |
| 35 amplicon_sets[amplicon_id] = amplicon_set | 24 amplicon_sets[amplicon_id] = amplicon_set |
| 36 amplicon_ids.add(amplicon_id) | |
| 37 | 25 |
| 38 for id in sorted(list(amplicon_ids)): | 26 # write amplicons sorted by number with primers sorted by start position |
| 39 amplicon_info = '\t'.join([name for name in sorted(amplicon_sets[id], key=primer_info_to_position)]) + '\n' | 27 for id in sorted(amplicon_sets): |
| 28 amplicon_info = '\t'.join( | |
| 29 [name for name, start in sorted( | |
| 30 amplicon_sets[id], key=lambda x: x[1] | |
| 31 )] | |
| 32 ) + '\n' | |
| 40 amplicon_info_file.write(amplicon_info) | 33 amplicon_info_file.write(amplicon_info) |
| 41 amplicon_info_file.close() | 34 amplicon_info_file.close() |
| 42 | 35 |
| 43 | 36 |
| 44 if __name__ == '__main__': | 37 if __name__ == '__main__': |
| 45 parser = argparse.ArgumentParser(description='Write an amplicon info file for iVar from a BED file describing primer positions') | 38 parser = argparse.ArgumentParser( |
| 46 parser.add_argument('bed_file', type=argparse.FileType(), help='Primer BED file') | 39 description='Write an amplicon info file for iVar ' |
| 47 parser.add_argument('amplicon_info_file', type=argparse.FileType('w'), help='Output file: amplicon info file in TSV format') | 40 'from a BED file describing primer positions' |
| 41 ) | |
| 42 parser.add_argument( | |
| 43 'bed_file', type=argparse.FileType(), help='Primer BED file' | |
| 44 ) | |
| 45 parser.add_argument( | |
| 46 'amplicon_info_file', type=argparse.FileType('w'), | |
| 47 help='Output file: amplicon info file in TSV format' | |
| 48 ) | |
| 48 args = parser.parse_args() | 49 args = parser.parse_args() |
| 49 | 50 |
| 50 write_amplicon_info_file(args.bed_file, args.amplicon_info_file) | 51 write_amplicon_info_file(args.bed_file, args.amplicon_info_file) |
