diff demultiplex.py @ 4:146bbd9d58f6 draft default tip

Added tests
author davidvanzessen
date Mon, 12 Nov 2018 09:33:34 -0500
parents b6d63b9efb8f
children
line wrap: on
line diff
--- a/demultiplex.py	Fri Nov 09 05:52:15 2018 -0500
+++ b/demultiplex.py	Mon Nov 12 09:33:34 2018 -0500
@@ -29,17 +29,17 @@
 
 def search_barcode_in_first_half(sequence, barcode):
     if type(sequence) is Seq:
-        sequence = str(sequence)
+        sequence = str(sequence).lower()
     elif type(sequence) is SeqRecord:
-        sequence = str(sequence.seq)
+        sequence = str(sequence.seq).lower()
     return sequence.find(barcode, 0, int(len(sequence) / 2))
 
 
 def search_barcode_in_second_half(sequence, barcode):
     if type(sequence) is Seq:
-        sequence = str(sequence)
+        sequence = str(sequence).lower()
     elif type(sequence) is SeqRecord:
-        sequence = str(sequence.seq)
+        sequence = str(sequence.seq).lower()
     return sequence.find(barcode, int(len(sequence) / 2))
 
 
@@ -65,10 +65,10 @@
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument("-i", "--input", help="The input file")
+    parser.add_argument("-i", "--input", help="The input file", required=True)
     parser.add_argument("-f", "--format", help="The format of the input file (fastq/fasta)", default="auto", choices=["fasta", "fastq", "auto"])
-    parser.add_argument("-o", "--output-dir", help="The output dir")
-    parser.add_argument("-m", "--mapping-file", help="A tab seperated file containing two columns, ID and barcode (no header)")
+    parser.add_argument("-o", "--output-dir", help="The output dir", required=True)
+    parser.add_argument("-m", "--mapping-file", help="A tab seperated file containing two columns, ID and barcode (no header)", required=True)
 
     args = parser.parse_args()
 
@@ -121,9 +121,11 @@
         ID = ID_barcode["ID"]
         barcode = ID_barcode["barcode"]
 
+        logging.info("{0}:\t\t{1}".format(ID, barcode))
+
         output_file_path = os.path.join(
             output_dir,
-            "{0}_{1}.{2}".format(input_basename_no_ext, ID, input_format)
+            "{0}.{1}".format(ID, input_format)
         )
 
         if ID not in ID_file_handle_dict:
@@ -134,15 +136,15 @@
 
         barcode_data_dict[ID] += [BarcodeData(
             ID=ID,
-            barcode=barcode,
-            barcode_reverse=str(Seq(barcode, generic_dna).reverse_complement()),
+            barcode=barcode.lower(),
+            barcode_reverse=str(Seq(barcode, generic_dna).reverse_complement()).lower(),
             output_file_path=output_file_path,
             output_file_handle=ID_file_handle
         )]
 
     discarded_output_file_path = os.path.join(
         output_dir,
-        "{0}_{1}.{2}".format(basename_input_file_path, "discarded", input_format)
+        "{0}.{1}".format("discarded", input_format)
     )
 
     total_sequences = 0