Mercurial > repos > iuc > ivar_variants
annotate write_amplicon_info_file.py @ 8:aea7008fe1f1 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
author | iuc |
---|---|
date | Thu, 05 Aug 2021 12:44:05 +0000 |
parents | |
children | 3888bbe7a9ca |
rev | line source |
---|---|
8
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
2 |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
3 import argparse |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
4 import re |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
5 |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
6 AMPLICON_NAME_RE = r'.*_(?P<num>\d+)_[^0-9]*(?P<name>L(?:EFT)?|R(?:IGHT)?)' |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
7 |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
8 |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
9 def primer_info_to_position(name): |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
10 position = 0 |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
11 re_match = re.match(AMPLICON_NAME_RE, name) |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
12 if re_match is None: |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
13 raise ValueError("{} does not match expected amplicon name format".format(name)) |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
14 side = re_match.group('name') |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
15 num = re_match.group('num') |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
16 if side == 'RIGHT' or side == 'R': |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
17 position += 1000 |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
18 if num is not None: |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
19 position += int(num) |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
20 return position |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
21 |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
22 |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
23 def write_amplicon_info_file(bed_file, amplicon_info_file): |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
24 amplicon_sets = {} |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
25 amplicon_ids = set() |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
26 for line in bed_file: |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
27 fields = line.strip().split('\t') |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
28 name = fields[3] |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
29 re_match = re.match(AMPLICON_NAME_RE, name) |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
30 if re_match is None: |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
31 raise ValueError("{} does not match expected amplicon name format".format(name)) |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
32 amplicon_id = int(re_match.group('num')) |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
33 amplicon_set = amplicon_sets.get(amplicon_id, []) |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
34 amplicon_set.append(name) |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
35 amplicon_sets[amplicon_id] = amplicon_set |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
36 amplicon_ids.add(amplicon_id) |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
37 |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
38 for id in sorted(list(amplicon_ids)): |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
39 amplicon_info = '\t'.join([name for name in sorted(amplicon_sets[id], key=primer_info_to_position)]) + '\n' |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
40 amplicon_info_file.write(amplicon_info) |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
41 amplicon_info_file.close() |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
42 |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
43 |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
44 if __name__ == '__main__': |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
45 parser = argparse.ArgumentParser(description='Write an amplicon info file for iVar from a BED file describing primer positions') |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
46 parser.add_argument('bed_file', type=argparse.FileType(), help='Primer BED file') |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
47 parser.add_argument('amplicon_info_file', type=argparse.FileType('w'), help='Output file: amplicon info file in TSV format') |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
48 args = parser.parse_args() |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
49 |
aea7008fe1f1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff
changeset
|
50 write_amplicon_info_file(args.bed_file, args.amplicon_info_file) |