diff write_amplicon_info_file.py @ 9:8d36959b000d draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
author iuc
date Fri, 20 Aug 2021 20:34:11 +0000
parents 28a6f1908fcc
children ee29337f905c
line wrap: on
line diff
--- a/write_amplicon_info_file.py	Thu Aug 05 12:44:59 2021 +0000
+++ b/write_amplicon_info_file.py	Fri Aug 20 20:34:11 2021 +0000
@@ -3,48 +3,49 @@
 import argparse
 import re
 
-AMPLICON_NAME_RE = r'.*_(?P<num>\d+)_[^0-9]*(?P<name>L(?:EFT)?|R(?:IGHT)?)'
 
-
-def primer_info_to_position(name):
-    position = 0
-    re_match = re.match(AMPLICON_NAME_RE, name)
-    if re_match is None:
-        raise ValueError("{} does not match expected amplicon name format".format(name))
-    side = re_match.group('name')
-    num = re_match.group('num')
-    if side == 'RIGHT' or side == 'R':
-        position += 1000
-    if num is not None:
-        position += int(num)
-    return position
+AMPLICON_PAT = re.compile(r'.*_(?P<num>\d+).*_(?P<name>L(?:EFT)?|R(?:IGHT)?)')
 
 
 def write_amplicon_info_file(bed_file, amplicon_info_file):
     amplicon_sets = {}
-    amplicon_ids = set()
     for line in bed_file:
         fields = line.strip().split('\t')
+        start = int(fields[1])
         name = fields[3]
-        re_match = re.match(AMPLICON_NAME_RE, name)
+        re_match = AMPLICON_PAT.match(name)
         if re_match is None:
-            raise ValueError("{} does not match expected amplicon name format".format(name))
+            raise ValueError(
+                '{} does not match expected amplicon name format'.format(name)
+            )
         amplicon_id = int(re_match.group('num'))
         amplicon_set = amplicon_sets.get(amplicon_id, [])
-        amplicon_set.append(name)
+        amplicon_set.append((name, start))
         amplicon_sets[amplicon_id] = amplicon_set
-        amplicon_ids.add(amplicon_id)
 
-    for id in sorted(list(amplicon_ids)):
-        amplicon_info = '\t'.join([name for name in sorted(amplicon_sets[id], key=primer_info_to_position)]) + '\n'
+    # write amplicons sorted by number with primers sorted by start position
+    for id in sorted(amplicon_sets):
+        amplicon_info = '\t'.join(
+            [name for name, start in sorted(
+                amplicon_sets[id], key=lambda x: x[1]
+            )]
+        ) + '\n'
         amplicon_info_file.write(amplicon_info)
     amplicon_info_file.close()
 
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Write an amplicon info file for iVar from a BED file describing primer positions')
-    parser.add_argument('bed_file', type=argparse.FileType(), help='Primer BED file')
-    parser.add_argument('amplicon_info_file', type=argparse.FileType('w'), help='Output file: amplicon info file in TSV format')
+    parser = argparse.ArgumentParser(
+        description='Write an amplicon info file for iVar '
+                    'from a BED file describing primer positions'
+    )
+    parser.add_argument(
+        'bed_file', type=argparse.FileType(), help='Primer BED file'
+    )
+    parser.add_argument(
+        'amplicon_info_file', type=argparse.FileType('w'),
+        help='Output file: amplicon info file in TSV format'
+    )
     args = parser.parse_args()
 
     write_amplicon_info_file(args.bed_file, args.amplicon_info_file)