Mercurial > repos > davidvanzessen > demultiplex_emc

--- a/demultiplex.py	Fri Nov 09 05:49:26 2018 -0500
+++ b/demultiplex.py	Fri Nov 09 05:52:15 2018 -0500
@@ -17,7 +17,7 @@
     Try to guess the file format (fastq/fasta) by looking at the first character of the first line.
     Should be '@' for fastq and '>' for fasta.
     """
-    with open(file_path, 'rU') as file_handle:
+    with open(file_path, 'r') as file_handle:
         for line in file_handle:
             if line.startswith("@"):
                 return "fastq"
@@ -26,7 +26,7 @@
             break
         return None

-
+
 def search_barcode_in_first_half(sequence, barcode):
     if type(sequence) is Seq:
         sequence = str(sequence)
@@ -148,7 +148,7 @@
     total_sequences = 0
     sequences_assigned_by_id = defaultdict(int)

-    with open(input_file_path, 'rU') as input_file_handle, open(discarded_output_file_path, 'w') as discarded_output_handle:
+    with open(input_file_path, 'r') as input_file_handle, open(discarded_output_file_path, 'w') as discarded_output_handle:
         for record in SeqIO.parse(input_file_handle, input_format):
             total_sequences += 1
             for ID, barcode_datas in barcode_data_dict.items():