# HG changeset patch # User davidvanzessen # Date 1541760735 18000 # Node ID b6d63b9efb8fc1bf24f7c88e89730d38d6585546 # Parent 500c2eee063d09883cd0b91df0b44205e7e2558c Uploaded diff -r 500c2eee063d -r b6d63b9efb8f demultiplex.py --- a/demultiplex.py Fri Nov 09 05:49:26 2018 -0500 +++ b/demultiplex.py Fri Nov 09 05:52:15 2018 -0500 @@ -17,7 +17,7 @@ Try to guess the file format (fastq/fasta) by looking at the first character of the first line. Should be '@' for fastq and '>' for fasta. """ - with open(file_path, 'rU') as file_handle: + with open(file_path, 'r') as file_handle: for line in file_handle: if line.startswith("@"): return "fastq" @@ -26,7 +26,7 @@ break return None - + def search_barcode_in_first_half(sequence, barcode): if type(sequence) is Seq: sequence = str(sequence) @@ -148,7 +148,7 @@ total_sequences = 0 sequences_assigned_by_id = defaultdict(int) - with open(input_file_path, 'rU') as input_file_handle, open(discarded_output_file_path, 'w') as discarded_output_handle: + with open(input_file_path, 'r') as input_file_handle, open(discarded_output_file_path, 'w') as discarded_output_handle: for record in SeqIO.parse(input_file_handle, input_format): total_sequences += 1 for ID, barcode_datas in barcode_data_dict.items():