comparison demultiplex.py @ 3:b6d63b9efb8f draft

Uploaded
author davidvanzessen
date Fri, 09 Nov 2018 05:52:15 -0500
parents 36c79869620b
children 146bbd9d58f6
comparison
equal deleted inserted replaced
2:500c2eee063d 3:b6d63b9efb8f
15 def sniff_format(file_path): 15 def sniff_format(file_path):
16 """ 16 """
17 Try to guess the file format (fastq/fasta) by looking at the first character of the first line. 17 Try to guess the file format (fastq/fasta) by looking at the first character of the first line.
18 Should be '@' for fastq and '>' for fasta. 18 Should be '@' for fastq and '>' for fasta.
19 """ 19 """
20 with open(file_path, 'rU') as file_handle: 20 with open(file_path, 'r') as file_handle:
21 for line in file_handle: 21 for line in file_handle:
22 if line.startswith("@"): 22 if line.startswith("@"):
23 return "fastq" 23 return "fastq"
24 if line.startswith(">"): 24 if line.startswith(">"):
25 return "fasta" 25 return "fasta"
26 break 26 break
27 return None 27 return None
28 28
29 29
30 def search_barcode_in_first_half(sequence, barcode): 30 def search_barcode_in_first_half(sequence, barcode):
31 if type(sequence) is Seq: 31 if type(sequence) is Seq:
32 sequence = str(sequence) 32 sequence = str(sequence)
33 elif type(sequence) is SeqRecord: 33 elif type(sequence) is SeqRecord:
34 sequence = str(sequence.seq) 34 sequence = str(sequence.seq)
146 ) 146 )
147 147
148 total_sequences = 0 148 total_sequences = 0
149 sequences_assigned_by_id = defaultdict(int) 149 sequences_assigned_by_id = defaultdict(int)
150 150
151 with open(input_file_path, 'rU') as input_file_handle, open(discarded_output_file_path, 'w') as discarded_output_handle: 151 with open(input_file_path, 'r') as input_file_handle, open(discarded_output_file_path, 'w') as discarded_output_handle:
152 for record in SeqIO.parse(input_file_handle, input_format): 152 for record in SeqIO.parse(input_file_handle, input_format):
153 total_sequences += 1 153 total_sequences += 1
154 for ID, barcode_datas in barcode_data_dict.items(): 154 for ID, barcode_datas in barcode_data_dict.items():
155 barcode_position, barcode_data, reverse = search_barcodes_in_sequence(barcode_datas, record) 155 barcode_position, barcode_data, reverse = search_barcodes_in_sequence(barcode_datas, record)
156 if barcode_position == -1: 156 if barcode_position == -1: