Mercurial > repos > davidvanzessen > demultiplex_emc
comparison demultiplex.py @ 3:b6d63b9efb8f draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 09 Nov 2018 05:52:15 -0500 |
parents | 36c79869620b |
children | 146bbd9d58f6 |
comparison
equal
deleted
inserted
replaced
2:500c2eee063d | 3:b6d63b9efb8f |
---|---|
15 def sniff_format(file_path): | 15 def sniff_format(file_path): |
16 """ | 16 """ |
17 Try to guess the file format (fastq/fasta) by looking at the first character of the first line. | 17 Try to guess the file format (fastq/fasta) by looking at the first character of the first line. |
18 Should be '@' for fastq and '>' for fasta. | 18 Should be '@' for fastq and '>' for fasta. |
19 """ | 19 """ |
20 with open(file_path, 'rU') as file_handle: | 20 with open(file_path, 'r') as file_handle: |
21 for line in file_handle: | 21 for line in file_handle: |
22 if line.startswith("@"): | 22 if line.startswith("@"): |
23 return "fastq" | 23 return "fastq" |
24 if line.startswith(">"): | 24 if line.startswith(">"): |
25 return "fasta" | 25 return "fasta" |
26 break | 26 break |
27 return None | 27 return None |
28 | 28 |
29 | 29 |
30 def search_barcode_in_first_half(sequence, barcode): | 30 def search_barcode_in_first_half(sequence, barcode): |
31 if type(sequence) is Seq: | 31 if type(sequence) is Seq: |
32 sequence = str(sequence) | 32 sequence = str(sequence) |
33 elif type(sequence) is SeqRecord: | 33 elif type(sequence) is SeqRecord: |
34 sequence = str(sequence.seq) | 34 sequence = str(sequence.seq) |
146 ) | 146 ) |
147 | 147 |
148 total_sequences = 0 | 148 total_sequences = 0 |
149 sequences_assigned_by_id = defaultdict(int) | 149 sequences_assigned_by_id = defaultdict(int) |
150 | 150 |
151 with open(input_file_path, 'rU') as input_file_handle, open(discarded_output_file_path, 'w') as discarded_output_handle: | 151 with open(input_file_path, 'r') as input_file_handle, open(discarded_output_file_path, 'w') as discarded_output_handle: |
152 for record in SeqIO.parse(input_file_handle, input_format): | 152 for record in SeqIO.parse(input_file_handle, input_format): |
153 total_sequences += 1 | 153 total_sequences += 1 |
154 for ID, barcode_datas in barcode_data_dict.items(): | 154 for ID, barcode_datas in barcode_data_dict.items(): |
155 barcode_position, barcode_data, reverse = search_barcodes_in_sequence(barcode_datas, record) | 155 barcode_position, barcode_data, reverse = search_barcodes_in_sequence(barcode_datas, record) |
156 if barcode_position == -1: | 156 if barcode_position == -1: |