# HG changeset patch # User hepcat72 # Date 1472498480 14400 # Node ID e3f91eee8c7521add38d113332e00ee6c80b6af9 # Parent 50df2d629d5197f52d4f674bbe4da97df245d632 Uploaded diff -r 50df2d629d51 -r e3f91eee8c75 barcode_splitter_multi/._barcode_splitter.py Binary file barcode_splitter_multi/._barcode_splitter.py has changed diff -r 50df2d629d51 -r e3f91eee8c75 barcode_splitter_multi/barcode_splitter.py --- a/barcode_splitter_multi/barcode_splitter.py Fri Aug 26 16:30:56 2016 -0400 +++ b/barcode_splitter_multi/barcode_splitter.py Mon Aug 29 15:21:20 2016 -0400 @@ -10,7 +10,7 @@ from collections import defaultdict import subprocess -__version__ = "0.12" +__version__ = "0.13" __author__ = "Lance Parsons & Robert Leach" __author_email__ = "lparsons@princeton.edu,rleach@princeton.edu" __copyright__ = "Copyright 2011, Lance Parsons & Robert leach" @@ -244,6 +244,11 @@ cur_outputs = unmatchedOutputs unmatched_path = getBarcodeMatchPath(approx_bc_dict,index_seqs) incrementNDDictInt(unmatched_counts,unmatched_path) + if (not UNMATCHED in unmatched_path and + not MULTIMATCHED in unmatched_path): + sys.stderr.write('WARNING: Sequences match barcodes on ' + 'different rows: %s for sequence ID: %s\n' + %(index_seqs, prim_index_read['seq_id'])) elif(MULTIMATCHED in barcode_path): cur_outputs = multimatchedOutputs unmatched_path = getBarcodeMatchPath(approx_bc_dict,barcode_path) @@ -638,18 +643,16 @@ are not matrching instead of listing all sequences as simply "UNMATCHED". It also reduces complexity of the unmatched output by not including specific barcodes, which do not matter in unmatched/multimatched cases.''' - cur_dict = in_dict dim = 0 path = [] for cur_key in keys_list: dim += 1 if cur_key is MATCHED or cur_key is MULTIMATCHED: path.append(cur_key) - elif cur_key in cur_dict: + elif cur_key in in_dict[ANY][str(dim)]: path.append(MATCHED) else: path.append(UNMATCHED) - cur_dict = in_dict[ANY][str(dim)] return path def setNDApproxDictVal(in_dict, keys_list, mismatches):