Mercurial > repos > davidvanzessen > demultiplex_emc
diff demultiplex.py @ 4:146bbd9d58f6 draft default tip
Added tests
author | davidvanzessen |
---|---|
date | Mon, 12 Nov 2018 09:33:34 -0500 |
parents | b6d63b9efb8f |
children |
line wrap: on
line diff
--- a/demultiplex.py Fri Nov 09 05:52:15 2018 -0500 +++ b/demultiplex.py Mon Nov 12 09:33:34 2018 -0500 @@ -29,17 +29,17 @@ def search_barcode_in_first_half(sequence, barcode): if type(sequence) is Seq: - sequence = str(sequence) + sequence = str(sequence).lower() elif type(sequence) is SeqRecord: - sequence = str(sequence.seq) + sequence = str(sequence.seq).lower() return sequence.find(barcode, 0, int(len(sequence) / 2)) def search_barcode_in_second_half(sequence, barcode): if type(sequence) is Seq: - sequence = str(sequence) + sequence = str(sequence).lower() elif type(sequence) is SeqRecord: - sequence = str(sequence.seq) + sequence = str(sequence.seq).lower() return sequence.find(barcode, int(len(sequence) / 2)) @@ -65,10 +65,10 @@ def main(): parser = argparse.ArgumentParser() - parser.add_argument("-i", "--input", help="The input file") + parser.add_argument("-i", "--input", help="The input file", required=True) parser.add_argument("-f", "--format", help="The format of the input file (fastq/fasta)", default="auto", choices=["fasta", "fastq", "auto"]) - parser.add_argument("-o", "--output-dir", help="The output dir") - parser.add_argument("-m", "--mapping-file", help="A tab seperated file containing two columns, ID and barcode (no header)") + parser.add_argument("-o", "--output-dir", help="The output dir", required=True) + parser.add_argument("-m", "--mapping-file", help="A tab seperated file containing two columns, ID and barcode (no header)", required=True) args = parser.parse_args() @@ -121,9 +121,11 @@ ID = ID_barcode["ID"] barcode = ID_barcode["barcode"] + logging.info("{0}:\t\t{1}".format(ID, barcode)) + output_file_path = os.path.join( output_dir, - "{0}_{1}.{2}".format(input_basename_no_ext, ID, input_format) + "{0}.{1}".format(ID, input_format) ) if ID not in ID_file_handle_dict: @@ -134,15 +136,15 @@ barcode_data_dict[ID] += [BarcodeData( ID=ID, - barcode=barcode, - barcode_reverse=str(Seq(barcode, generic_dna).reverse_complement()), + barcode=barcode.lower(), + barcode_reverse=str(Seq(barcode, generic_dna).reverse_complement()).lower(), output_file_path=output_file_path, output_file_handle=ID_file_handle )] discarded_output_file_path = os.path.join( output_dir, - "{0}_{1}.{2}".format(basename_input_file_path, "discarded", input_format) + "{0}.{1}".format("discarded", input_format) ) total_sequences = 0