diff check_bcfile.py @ 2:bd5ff77e2036 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
author iuc
date Wed, 15 Jul 2020 17:33:16 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/check_bcfile.py	Wed Jul 15 17:33:16 2020 -0400
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+import argparse
+import sys
+
+parser = argparse.ArgumentParser()
+parser.add_argument('bcfile', help='barcode file')
+args = parser.parse_args()
+
+barcodes = []
+
+with open(args.bcfile, "r") as fh:
+    for line in fh:
+        if len(line) == 0:
+            continue
+        if line.startswith("#"):
+            continue
+        barcodes.append(line.split())
+
+if len(barcodes) <= 1:
+    sys.exit("barcode file is empty")
+
+# check that all lines have the same number of columns
+ncol = None
+for bc in barcodes:
+    if ncol is None:
+        ncol = len(bc)
+    elif ncol != len(bc):
+        sys.exit("barcode file has inconsistent number of columns")
+
+isname = False
+for bc in barcodes:
+    if len(bc[-1].strip("ATCGatcg")) > 0:
+        isname = True
+        break
+
+names = set()
+for bc in barcodes:
+    if isname:
+        n = bc[-1]
+    else:
+        n = '-'.join(bc)
+    if n in names:
+        sys.exit("duplicate sample %s in barcode file" % n)
+    names.add(n)