0
|
1 #!/usr/bin/env python3
|
|
2 """
|
|
3 Assign Ig sequences into clones
|
|
4 """
|
|
5 # Info
|
|
6 __author__ = 'Namita Gupta, Jason Anthony Vander Heiden, Gur Yaari, Mohamed Uduman'
|
|
7 from changeo import __version__, __date__
|
|
8
|
|
9 # Imports
|
|
10 import os
|
|
11 import re
|
|
12 import sys
|
52
|
13 import csv
|
0
|
14 import numpy as np
|
|
15 from argparse import ArgumentParser
|
|
16 from collections import OrderedDict
|
|
17 from itertools import chain
|
|
18 from textwrap import dedent
|
|
19 from time import time
|
|
20 from Bio import pairwise2
|
|
21 from Bio.Seq import translate
|
|
22
|
|
23 # Presto and changeo imports
|
|
24 from presto.Defaults import default_out_args
|
|
25 from presto.IO import getFileType, getOutputHandle, printLog, printProgress
|
|
26 from presto.Multiprocessing import manageProcesses
|
|
27 from presto.Sequence import getDNAScoreDict
|
|
28 from changeo.Commandline import CommonHelpFormatter, getCommonArgParser, parseCommonArgs
|
52
|
29 from changeo.Distance import distance_models, calcDistances, formClusters
|
0
|
30 from changeo.IO import getDbWriter, readDbFile, countDbFile
|
|
31 from changeo.Multiprocessing import DbData, DbResult
|
|
32
|
52
|
33 ## Set maximum field size for csv.reader
|
|
34 csv.field_size_limit(sys.maxsize)
|
|
35
|
0
|
36 # Defaults
|
|
37 default_translate = False
|
|
38 default_distance = 0.0
|
52
|
39 default_index_mode = 'gene'
|
|
40 default_index_action = 'set'
|
|
41 default_bygroup_model = 'ham'
|
0
|
42 default_hclust_model = 'chen2010'
|
|
43 default_seq_field = 'JUNCTION'
|
|
44 default_norm = 'len'
|
|
45 default_sym = 'avg'
|
|
46 default_linkage = 'single'
|
52
|
47 choices_bygroup_model = ('ham', 'aa', 'hh_s1f', 'hh_s5f', 'mk_rs1nf', 'mk_rs5nf', 'hs1f_compat', 'm1n_compat')
|
0
|
48
|
|
49
|
52
|
50 def indexByIdentity(index, key, rec, fields=None):
|
0
|
51 """
|
52
|
52 Updates a preclone index with a simple key
|
|
53
|
|
54 Arguments:
|
|
55 index = preclone index from indexJunctions
|
|
56 key = index key
|
|
57 rec = IgRecord to add to the index
|
|
58 fields = additional annotation fields to use to group preclones;
|
|
59 if None use only V, J and junction length
|
|
60
|
|
61 Returns:
|
|
62 None. Updates index with new key and records.
|
|
63 """
|
|
64 index.setdefault(tuple(key), []).append(rec)
|
|
65
|
|
66
|
|
67 def indexByUnion(index, key, rec, fields=None):
|
|
68 """
|
|
69 Updates a preclone index with the union of nested keys
|
0
|
70
|
|
71 Arguments:
|
52
|
72 index = preclone index from indexJunctions
|
|
73 key = index key
|
|
74 rec = IgRecord to add to the index
|
|
75 fields = additional annotation fields to use to group preclones;
|
|
76 if None use only V, J and junction length
|
0
|
77
|
52
|
78 Returns:
|
|
79 None. Updates index with new key and records.
|
0
|
80 """
|
52
|
81 # List of values for this/new key
|
|
82 val = [rec]
|
|
83 f_range = list(range(2, 3 + (len(fields) if fields else 0)))
|
|
84
|
|
85 # See if field/junction length combination exists in index
|
|
86 outer_dict = index
|
|
87 for field in f_range:
|
|
88 try:
|
|
89 outer_dict = outer_dict[key[field]]
|
|
90 except (KeyError):
|
|
91 outer_dict = None
|
|
92 break
|
|
93 # If field combination exists, look through Js
|
|
94 j_matches = []
|
|
95 if outer_dict is not None:
|
|
96 for j in outer_dict.keys():
|
|
97 if not set(key[1]).isdisjoint(set(j)):
|
|
98 key[1] = tuple(set(key[1]).union(set(j)))
|
|
99 j_matches += [j]
|
|
100 # If J overlap exists, look through Vs for each J
|
|
101 for j in j_matches:
|
|
102 v_matches = []
|
|
103 # Collect V matches for this J
|
|
104 for v in outer_dict[j].keys():
|
|
105 if not set(key[0]).isdisjoint(set(v)):
|
|
106 key[0] = tuple(set(key[0]).union(set(v)))
|
|
107 v_matches += [v]
|
|
108 # If there are V overlaps for this J, pop them out
|
|
109 if v_matches:
|
|
110 val += list(chain(*(outer_dict[j].pop(v) for v in v_matches)))
|
|
111 # If the J dict is now empty, remove it
|
|
112 if not outer_dict[j]:
|
|
113 outer_dict.pop(j, None)
|
|
114
|
|
115 # Add value(s) into index nested dictionary
|
|
116 # OMG Python pointers are the best!
|
|
117 # Add field dictionaries into index
|
|
118 outer_dict = index
|
|
119 for field in f_range:
|
|
120 outer_dict.setdefault(key[field], {})
|
|
121 outer_dict = outer_dict[key[field]]
|
|
122 # Add J, then V into index
|
|
123 if key[1] in outer_dict:
|
|
124 outer_dict[key[1]].update({key[0]: val})
|
0
|
125 else:
|
52
|
126 outer_dict[key[1]] = {key[0]: val}
|
0
|
127
|
|
128
|
52
|
129 def indexJunctions(db_iter, fields=None, mode=default_index_mode,
|
|
130 action=default_index_action):
|
0
|
131 """
|
|
132 Identifies preclonal groups by V, J and junction length
|
|
133
|
|
134 Arguments:
|
|
135 db_iter = an iterator of IgRecords defined by readDbFile
|
|
136 fields = additional annotation fields to use to group preclones;
|
|
137 if None use only V, J and junction length
|
|
138 mode = specificity of alignment call to use for assigning preclones;
|
|
139 one of ('allele', 'gene')
|
|
140 action = how to handle multiple value fields when assigning preclones;
|
|
141 one of ('first', 'set')
|
|
142
|
|
143 Returns:
|
|
144 a dictionary of {(V, J, junction length):[IgRecords]}
|
|
145 """
|
52
|
146 # print(fields)
|
0
|
147 # Define functions for grouping keys
|
|
148 if mode == 'allele' and fields is None:
|
|
149 def _get_key(rec, act):
|
52
|
150 return [rec.getVAllele(act), rec.getJAllele(act),
|
|
151 None if rec.junction is None else len(rec.junction)]
|
0
|
152 elif mode == 'gene' and fields is None:
|
|
153 def _get_key(rec, act):
|
52
|
154 return [rec.getVGene(act), rec.getJGene(act),
|
|
155 None if rec.junction is None else len(rec.junction)]
|
0
|
156 elif mode == 'allele' and fields is not None:
|
|
157 def _get_key(rec, act):
|
|
158 vdj = [rec.getVAllele(act), rec.getJAllele(act),
|
|
159 None if rec.junction is None else len(rec.junction)]
|
|
160 ann = [rec.toDict().get(k, None) for k in fields]
|
52
|
161 return list(chain(vdj, ann))
|
0
|
162 elif mode == 'gene' and fields is not None:
|
|
163 def _get_key(rec, act):
|
|
164 vdj = [rec.getVGene(act), rec.getJGene(act),
|
|
165 None if rec.junction is None else len(rec.junction)]
|
|
166 ann = [rec.toDict().get(k, None) for k in fields]
|
52
|
167 return list(chain(vdj, ann))
|
|
168
|
|
169 # Function to flatten nested dictionary
|
|
170 def _flatten_dict(d, parent_key=''):
|
|
171 items = []
|
|
172 for k, v in d.items():
|
|
173 new_key = parent_key + [k] if parent_key else [k]
|
|
174 if isinstance(v, dict):
|
|
175 items.extend(_flatten_dict(v, new_key).items())
|
|
176 else:
|
|
177 items.append((new_key, v))
|
|
178 flat_dict = {None if None in i[0] else tuple(i[0]): i[1] for i in items}
|
|
179 return flat_dict
|
|
180
|
|
181 if action == 'first':
|
|
182 index_func = indexByIdentity
|
|
183 elif action == 'set':
|
|
184 index_func = indexByUnion
|
|
185 else:
|
|
186 sys.stderr.write('Unrecognized action: %s.\n' % action)
|
0
|
187
|
|
188 start_time = time()
|
|
189 clone_index = {}
|
|
190 rec_count = 0
|
|
191 for rec in db_iter:
|
|
192 key = _get_key(rec, action)
|
|
193
|
|
194 # Print progress
|
|
195 if rec_count == 0:
|
|
196 print('PROGRESS> Grouping sequences')
|
|
197
|
|
198 printProgress(rec_count, step=1000, start_time=start_time)
|
|
199 rec_count += 1
|
|
200
|
|
201 # Assigned passed preclone records to key and failed to index None
|
|
202 if all([k is not None and k != '' for k in key]):
|
52
|
203 # Update index dictionary
|
|
204 index_func(clone_index, key, rec, fields)
|
0
|
205 else:
|
|
206 clone_index.setdefault(None, []).append(rec)
|
|
207
|
|
208 printProgress(rec_count, step=1000, start_time=start_time, end=True)
|
|
209
|
52
|
210 if action == 'set':
|
|
211 clone_index = _flatten_dict(clone_index)
|
|
212
|
0
|
213 return clone_index
|
|
214
|
|
215
|
|
216 def distanceClones(records, model=default_bygroup_model, distance=default_distance,
|
|
217 dist_mat=None, norm=default_norm, sym=default_sym,
|
|
218 linkage=default_linkage, seq_field=default_seq_field):
|
|
219 """
|
|
220 Separates a set of IgRecords into clones
|
|
221
|
|
222 Arguments:
|
|
223 records = an iterator of IgRecords
|
|
224 model = substitution model used to calculate distance
|
|
225 distance = the distance threshold to assign clonal groups
|
|
226 dist_mat = pandas DataFrame of pairwise nucleotide or amino acid distances
|
|
227 norm = normalization method
|
|
228 sym = symmetry method
|
|
229 linkage = type of linkage
|
|
230 seq_field = sequence field used to calculate distance between records
|
|
231
|
|
232 Returns:
|
|
233 a dictionary of lists defining {clone number: [IgRecords clonal group]}
|
|
234 """
|
|
235 # Get distance matrix if not provided
|
52
|
236 if dist_mat is None:
|
|
237 try:
|
|
238 dist_mat = distance_models[model]
|
|
239 except KeyError:
|
|
240 sys.exit('Unrecognized distance model: %s' % args_dict['model'])
|
0
|
241
|
52
|
242 # TODO: can be cleaned up with abstract model class
|
0
|
243 # Determine length of n-mers
|
52
|
244 if model in ['hs1f_compat', 'm1n_compat', 'aa', 'ham', 'hh_s1f', 'mk_rs1nf']:
|
0
|
245 nmer_len = 1
|
52
|
246 elif model in ['hh_s5f', 'mk_rs5nf']:
|
0
|
247 nmer_len = 5
|
|
248 else:
|
52
|
249 sys.exit('Unrecognized distance model: %s.\n' % model)
|
0
|
250
|
|
251 # Define unique junction mapping
|
|
252 seq_map = {}
|
|
253 for ig in records:
|
|
254 seq = ig.getSeqField(seq_field)
|
|
255 # Check if sequence length is 0
|
|
256 if len(seq) == 0:
|
|
257 return None
|
|
258
|
52
|
259 seq = re.sub('[\.-]', 'N', str(seq))
|
0
|
260 if model == 'aa': seq = translate(seq)
|
|
261
|
|
262 seq_map.setdefault(seq, []).append(ig)
|
|
263
|
|
264 # Process records
|
|
265 if len(seq_map) == 1:
|
|
266 return {1:records}
|
|
267
|
|
268 # Define sequences
|
|
269 seqs = list(seq_map.keys())
|
|
270
|
|
271 # Calculate pairwise distance matrix
|
52
|
272 dists = calcDistances(seqs, nmer_len, dist_mat, sym=sym, norm=norm)
|
0
|
273
|
|
274 # Perform hierarchical clustering
|
|
275 clusters = formClusters(dists, linkage, distance)
|
|
276
|
|
277 # Turn clusters into clone dictionary
|
|
278 clone_dict = {}
|
|
279 for i, c in enumerate(clusters):
|
|
280 clone_dict.setdefault(c, []).extend(seq_map[seqs[i]])
|
|
281
|
|
282 return clone_dict
|
|
283
|
|
284
|
|
285 def distChen2010(records):
|
|
286 """
|
|
287 Calculate pairwise distances as defined in Chen 2010
|
|
288
|
|
289 Arguments:
|
|
290 records = list of IgRecords where first is query to be compared to others in list
|
|
291
|
|
292 Returns:
|
|
293 list of distances
|
|
294 """
|
|
295 # Pull out query sequence and V/J information
|
|
296 query = records.popitem(last=False)
|
|
297 query_cdr3 = query.junction[3:-3]
|
|
298 query_v_allele = query.getVAllele()
|
|
299 query_v_gene = query.getVGene()
|
|
300 query_v_family = query.getVFamily()
|
|
301 query_j_allele = query.getJAllele()
|
|
302 query_j_gene = query.getJGene()
|
|
303 # Create alignment scoring dictionary
|
|
304 score_dict = getDNAScoreDict()
|
|
305
|
|
306 scores = [0]*len(records)
|
|
307 for i in range(len(records)):
|
|
308 ld = pairwise2.align.globalds(query_cdr3, records[i].junction[3:-3],
|
|
309 score_dict, -1, -1, one_alignment_only=True)
|
|
310 # Check V similarity
|
|
311 if records[i].getVAllele() == query_v_allele: ld += 0
|
|
312 elif records[i].getVGene() == query_v_gene: ld += 1
|
|
313 elif records[i].getVFamily() == query_v_family: ld += 3
|
|
314 else: ld += 5
|
|
315 # Check J similarity
|
|
316 if records[i].getJAllele() == query_j_allele: ld += 0
|
|
317 elif records[i].getJGene() == query_j_gene: ld += 1
|
|
318 else: ld += 3
|
|
319 # Divide by length
|
|
320 scores[i] = ld/max(len(records[i].junction[3:-3]), query_cdr3)
|
|
321
|
|
322 return scores
|
|
323
|
|
324
|
|
325 def distAdemokun2011(records):
|
|
326 """
|
|
327 Calculate pairwise distances as defined in Ademokun 2011
|
|
328
|
|
329 Arguments:
|
|
330 records = list of IgRecords where first is query to be compared to others in list
|
|
331
|
|
332 Returns:
|
|
333 list of distances
|
|
334 """
|
|
335 # Pull out query sequence and V family information
|
|
336 query = records.popitem(last=False)
|
|
337 query_cdr3 = query.junction[3:-3]
|
|
338 query_v_family = query.getVFamily()
|
|
339 # Create alignment scoring dictionary
|
|
340 score_dict = getDNAScoreDict()
|
|
341
|
|
342 scores = [0]*len(records)
|
|
343 for i in range(len(records)):
|
|
344
|
|
345 if abs(len(query_cdr3) - len(records[i].junction[3:-3])) > 10:
|
|
346 scores[i] = 1
|
|
347 elif query_v_family != records[i].getVFamily():
|
|
348 scores[i] = 1
|
|
349 else:
|
|
350 ld = pairwise2.align.globalds(query_cdr3, records[i].junction[3:-3],
|
|
351 score_dict, -1, -1, one_alignment_only=True)
|
|
352 scores[i] = ld/min(len(records[i].junction[3:-3]), query_cdr3)
|
|
353
|
|
354 return scores
|
|
355
|
|
356
|
|
357 def hierClust(dist_mat, method='chen2010'):
|
|
358 """
|
|
359 Calculate hierarchical clustering
|
|
360
|
|
361 Arguments:
|
|
362 dist_mat = square-formed distance matrix of pairwise CDR3 comparisons
|
|
363
|
|
364 Returns:
|
|
365 list of cluster ids
|
|
366 """
|
|
367 if method == 'chen2010':
|
|
368 clusters = formClusters(dist_mat, 'average', 0.32)
|
|
369 elif method == 'ademokun2011':
|
|
370 clusters = formClusters(dist_mat, 'complete', 0.25)
|
|
371 else: clusters = np.ones(dist_mat.shape[0])
|
|
372
|
|
373 return clusters
|
|
374
|
|
375 # TODO: Merge duplicate feed, process and collect functions.
|
|
376 def feedQueue(alive, data_queue, db_file, group_func, group_args={}):
|
|
377 """
|
|
378 Feeds the data queue with Ig records
|
|
379
|
|
380 Arguments:
|
|
381 alive = a multiprocessing.Value boolean controlling whether processing continues
|
|
382 if False exit process
|
|
383 data_queue = a multiprocessing.Queue to hold data for processing
|
|
384 db_file = the Ig record database file
|
|
385 group_func = the function to use for assigning preclones
|
|
386 group_args = a dictionary of arguments to pass to group_func
|
|
387
|
|
388 Returns:
|
|
389 None
|
|
390 """
|
|
391 # Open input file and perform grouping
|
|
392 try:
|
|
393 # Iterate over Ig records and assign groups
|
|
394 db_iter = readDbFile(db_file)
|
|
395 clone_dict = group_func(db_iter, **group_args)
|
|
396 except:
|
|
397 #sys.stderr.write('Exception in feeder grouping step\n')
|
|
398 alive.value = False
|
|
399 raise
|
|
400
|
|
401 # Add groups to data queue
|
|
402 try:
|
|
403 #print 'START FEED', alive.value
|
|
404 # Iterate over groups and feed data queue
|
|
405 clone_iter = iter(clone_dict.items())
|
|
406 while alive.value:
|
|
407 # Get data from queue
|
|
408 if data_queue.full(): continue
|
|
409 else: data = next(clone_iter, None)
|
|
410 # Exit upon reaching end of iterator
|
|
411 if data is None: break
|
|
412 #print "FEED", alive.value, k
|
|
413
|
|
414 # Feed queue
|
|
415 data_queue.put(DbData(*data))
|
|
416 else:
|
|
417 sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \
|
|
418 % os.getpid())
|
|
419 return None
|
|
420 except:
|
|
421 #sys.stderr.write('Exception in feeder queue feeding step\n')
|
|
422 alive.value = False
|
|
423 raise
|
|
424
|
|
425 return None
|
|
426
|
|
427
|
|
428 def feedQueueClust(alive, data_queue, db_file, group_func=None, group_args={}):
|
|
429 """
|
|
430 Feeds the data queue with Ig records
|
|
431
|
|
432 Arguments:
|
|
433 alive = a multiprocessing.Value boolean controlling whether processing continues
|
|
434 if False exit process
|
|
435 data_queue = a multiprocessing.Queue to hold data for processing
|
|
436 db_file = the Ig record database file
|
|
437
|
|
438 Returns:
|
|
439 None
|
|
440 """
|
|
441 # Open input file and perform grouping
|
|
442 try:
|
|
443 # Iterate over Ig records and order by junction length
|
|
444 records = {}
|
|
445 db_iter = readDbFile(db_file)
|
|
446 for rec in db_iter:
|
|
447 records[rec.id] = rec
|
|
448 records = OrderedDict(sorted(list(records.items()), key=lambda i: i[1].junction_length))
|
|
449 dist_dict = {}
|
|
450 for __ in range(len(records)):
|
|
451 k,v = records.popitem(last=False)
|
|
452 dist_dict[k] = [v].append(list(records.values()))
|
|
453 except:
|
|
454 #sys.stderr.write('Exception in feeder grouping step\n')
|
|
455 alive.value = False
|
|
456 raise
|
|
457
|
|
458 # Add groups to data queue
|
|
459 try:
|
|
460 # print 'START FEED', alive.value
|
|
461 # Iterate over groups and feed data queue
|
|
462 dist_iter = iter(dist_dict.items())
|
|
463 while alive.value:
|
|
464 # Get data from queue
|
|
465 if data_queue.full(): continue
|
|
466 else: data = next(dist_iter, None)
|
|
467 # Exit upon reaching end of iterator
|
|
468 if data is None: break
|
|
469 #print "FEED", alive.value, k
|
|
470
|
|
471 # Feed queue
|
|
472 data_queue.put(DbData(*data))
|
|
473 else:
|
|
474 sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \
|
|
475 % os.getpid())
|
|
476 return None
|
|
477 except:
|
|
478 #sys.stderr.write('Exception in feeder queue feeding step\n')
|
|
479 alive.value = False
|
|
480 raise
|
|
481
|
|
482 return None
|
|
483
|
|
484
|
|
485 def processQueue(alive, data_queue, result_queue, clone_func, clone_args):
|
|
486 """
|
|
487 Pulls from data queue, performs calculations, and feeds results queue
|
|
488
|
|
489 Arguments:
|
|
490 alive = a multiprocessing.Value boolean controlling whether processing continues
|
|
491 if False exit process
|
|
492 data_queue = a multiprocessing.Queue holding data to process
|
|
493 result_queue = a multiprocessing.Queue to hold processed results
|
|
494 clone_func = the function to call for clonal assignment
|
|
495 clone_args = a dictionary of arguments to pass to clone_func
|
|
496
|
|
497 Returns:
|
|
498 None
|
|
499 """
|
|
500 try:
|
|
501 # Iterator over data queue until sentinel object reached
|
|
502 while alive.value:
|
|
503 # Get data from queue
|
|
504 if data_queue.empty(): continue
|
|
505 else: data = data_queue.get()
|
|
506 # Exit upon reaching sentinel
|
|
507 if data is None: break
|
|
508
|
|
509 # Define result object for iteration and get data records
|
|
510 records = data.data
|
52
|
511 # print(data.id)
|
0
|
512 result = DbResult(data.id, records)
|
|
513
|
|
514 # Check for invalid data (due to failed indexing) and add failed result
|
|
515 if not data:
|
|
516 result_queue.put(result)
|
|
517 continue
|
|
518
|
|
519 # Add V(D)J to log
|
|
520 result.log['ID'] = ','.join([str(x) for x in data.id])
|
|
521 result.log['VALLELE'] = ','.join(set([(r.getVAllele() or '') for r in records]))
|
|
522 result.log['DALLELE'] = ','.join(set([(r.getDAllele() or '') for r in records]))
|
|
523 result.log['JALLELE'] = ','.join(set([(r.getJAllele() or '') for r in records]))
|
|
524 result.log['JUNCLEN'] = ','.join(set([(str(len(r.junction)) or '0') for r in records]))
|
|
525 result.log['SEQUENCES'] = len(records)
|
|
526
|
|
527 # Checking for preclone failure and assign clones
|
|
528 clones = clone_func(records, **clone_args) if data else None
|
|
529
|
|
530 # import cProfile
|
|
531 # prof = cProfile.Profile()
|
|
532 # clones = prof.runcall(clone_func, records, **clone_args)
|
|
533 # prof.dump_stats('worker-%d.prof' % os.getpid())
|
|
534
|
|
535 if clones is not None:
|
|
536 result.results = clones
|
|
537 result.valid = True
|
|
538 result.log['CLONES'] = len(clones)
|
|
539 else:
|
|
540 result.log['CLONES'] = 0
|
|
541
|
|
542 # Feed results to result queue
|
|
543 result_queue.put(result)
|
|
544 else:
|
|
545 sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \
|
|
546 % os.getpid())
|
|
547 return None
|
|
548 except:
|
|
549 #sys.stderr.write('Exception in worker\n')
|
|
550 alive.value = False
|
|
551 raise
|
|
552
|
|
553 return None
|
|
554
|
|
555
|
|
556 def processQueueClust(alive, data_queue, result_queue, clone_func, clone_args):
|
|
557 """
|
|
558 Pulls from data queue, performs calculations, and feeds results queue
|
|
559
|
|
560 Arguments:
|
|
561 alive = a multiprocessing.Value boolean controlling whether processing continues
|
|
562 if False exit process
|
|
563 data_queue = a multiprocessing.Queue holding data to process
|
|
564 result_queue = a multiprocessing.Queue to hold processed results
|
|
565 clone_func = the function to call for calculating pairwise distances between sequences
|
|
566 clone_args = a dictionary of arguments to pass to clone_func
|
|
567
|
|
568 Returns:
|
|
569 None
|
|
570 """
|
|
571
|
|
572 try:
|
|
573 # print 'START WORK', alive.value
|
|
574 # Iterator over data queue until sentinel object reached
|
|
575 while alive.value:
|
|
576 # Get data from queue
|
|
577 if data_queue.empty(): continue
|
|
578 else: data = data_queue.get()
|
|
579 # Exit upon reaching sentinel
|
|
580 if data is None: break
|
|
581 # print "WORK", alive.value, data['id']
|
|
582
|
|
583 # Define result object for iteration and get data records
|
|
584 records = data.data
|
|
585 result = DbResult(data.id, records)
|
|
586
|
|
587 # Create row of distance matrix and check for error
|
|
588 dist_row = clone_func(records, **clone_args) if data else None
|
|
589 if dist_row is not None:
|
|
590 result.results = dist_row
|
|
591 result.valid = True
|
|
592
|
|
593 # Feed results to result queue
|
|
594 result_queue.put(result)
|
|
595 else:
|
|
596 sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \
|
|
597 % os.getpid())
|
|
598 return None
|
|
599 except:
|
|
600 #sys.stderr.write('Exception in worker\n')
|
|
601 alive.value = False
|
|
602 raise
|
|
603
|
|
604 return None
|
|
605
|
|
606
|
|
607 def collectQueue(alive, result_queue, collect_queue, db_file, out_args, cluster_func=None, cluster_args={}):
|
|
608 """
|
|
609 Assembles results from a queue of individual sequence results and manages log/file I/O
|
|
610
|
|
611 Arguments:
|
|
612 alive = a multiprocessing.Value boolean controlling whether processing continues
|
|
613 if False exit process
|
|
614 result_queue = a multiprocessing.Queue holding processQueue results
|
|
615 collect_queue = a multiprocessing.Queue to store collector return values
|
|
616 db_file = the input database file name
|
|
617 out_args = common output argument dictionary from parseCommonArgs
|
|
618 cluster_func = the function to call for carrying out clustering on distance matrix
|
|
619 cluster_args = a dictionary of arguments to pass to cluster_func
|
|
620
|
|
621 Returns:
|
|
622 None
|
|
623 (adds 'log' and 'out_files' to collect_dict)
|
|
624 """
|
|
625 # Open output files
|
|
626 try:
|
|
627 # Count records and define output format
|
|
628 out_type = getFileType(db_file) if out_args['out_type'] is None \
|
|
629 else out_args['out_type']
|
|
630 result_count = countDbFile(db_file)
|
|
631
|
|
632 # Defined successful output handle
|
|
633 pass_handle = getOutputHandle(db_file,
|
|
634 out_label='clone-pass',
|
|
635 out_dir=out_args['out_dir'],
|
|
636 out_name=out_args['out_name'],
|
|
637 out_type=out_type)
|
|
638 pass_writer = getDbWriter(pass_handle, db_file, add_fields='CLONE')
|
|
639
|
|
640 # Defined failed alignment output handle
|
|
641 if out_args['failed']:
|
|
642 fail_handle = getOutputHandle(db_file,
|
|
643 out_label='clone-fail',
|
|
644 out_dir=out_args['out_dir'],
|
|
645 out_name=out_args['out_name'],
|
|
646 out_type=out_type)
|
|
647 fail_writer = getDbWriter(fail_handle, db_file)
|
|
648 else:
|
|
649 fail_handle = None
|
|
650 fail_writer = None
|
|
651
|
|
652 # Define log handle
|
|
653 if out_args['log_file'] is None:
|
|
654 log_handle = None
|
|
655 else:
|
|
656 log_handle = open(out_args['log_file'], 'w')
|
|
657 except:
|
|
658 #sys.stderr.write('Exception in collector file opening step\n')
|
|
659 alive.value = False
|
|
660 raise
|
|
661
|
|
662 # Get results from queue and write to files
|
|
663 try:
|
|
664 #print 'START COLLECT', alive.value
|
|
665 # Iterator over results queue until sentinel object reached
|
|
666 start_time = time()
|
|
667 rec_count = clone_count = pass_count = fail_count = 0
|
|
668 while alive.value:
|
|
669 # Get result from queue
|
|
670 if result_queue.empty(): continue
|
|
671 else: result = result_queue.get()
|
|
672 # Exit upon reaching sentinel
|
|
673 if result is None: break
|
|
674 #print "COLLECT", alive.value, result['id']
|
|
675
|
|
676 # Print progress for previous iteration and update record count
|
|
677 if rec_count == 0:
|
|
678 print('PROGRESS> Assigning clones')
|
|
679 printProgress(rec_count, result_count, 0.05, start_time)
|
|
680 rec_count += len(result.data)
|
|
681
|
|
682 # Write passed and failed records
|
|
683 if result:
|
|
684 for clone in result.results.values():
|
|
685 clone_count += 1
|
|
686 for i, rec in enumerate(clone):
|
|
687 rec.annotations['CLONE'] = clone_count
|
|
688 pass_writer.writerow(rec.toDict())
|
|
689 pass_count += 1
|
|
690 result.log['CLONE%i-%i' % (clone_count, i + 1)] = str(rec.junction)
|
|
691
|
|
692 else:
|
|
693 for i, rec in enumerate(result.data):
|
|
694 if fail_writer is not None: fail_writer.writerow(rec.toDict())
|
|
695 fail_count += 1
|
|
696 result.log['CLONE0-%i' % (i + 1)] = str(rec.junction)
|
|
697
|
|
698 # Write log
|
|
699 printLog(result.log, handle=log_handle)
|
|
700 else:
|
|
701 sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \
|
|
702 % os.getpid())
|
|
703 return None
|
|
704
|
|
705 # Print total counts
|
|
706 printProgress(rec_count, result_count, 0.05, start_time)
|
|
707
|
|
708 # Close file handles
|
|
709 pass_handle.close()
|
|
710 if fail_handle is not None: fail_handle.close()
|
|
711 if log_handle is not None: log_handle.close()
|
|
712
|
|
713 # Update return list
|
|
714 log = OrderedDict()
|
|
715 log['OUTPUT'] = os.path.basename(pass_handle.name)
|
|
716 log['CLONES'] = clone_count
|
|
717 log['RECORDS'] = rec_count
|
|
718 log['PASS'] = pass_count
|
|
719 log['FAIL'] = fail_count
|
|
720 collect_dict = {'log':log, 'out_files': [pass_handle.name]}
|
|
721 collect_queue.put(collect_dict)
|
|
722 except:
|
|
723 #sys.stderr.write('Exception in collector result processing step\n')
|
|
724 alive.value = False
|
|
725 raise
|
|
726
|
|
727 return None
|
|
728
|
|
729
|
|
730 def collectQueueClust(alive, result_queue, collect_queue, db_file, out_args, cluster_func, cluster_args):
|
|
731 """
|
|
732 Assembles results from a queue of individual sequence results and manages log/file I/O
|
|
733
|
|
734 Arguments:
|
|
735 alive = a multiprocessing.Value boolean controlling whether processing continues
|
|
736 if False exit process
|
|
737 result_queue = a multiprocessing.Queue holding processQueue results
|
|
738 collect_queue = a multiprocessing.Queue to store collector return values
|
|
739 db_file = the input database file name
|
|
740 out_args = common output argument dictionary from parseCommonArgs
|
|
741 cluster_func = the function to call for carrying out clustering on distance matrix
|
|
742 cluster_args = a dictionary of arguments to pass to cluster_func
|
|
743
|
|
744 Returns:
|
|
745 None
|
|
746 (adds 'log' and 'out_files' to collect_dict)
|
|
747 """
|
|
748 # Open output files
|
|
749 try:
|
|
750
|
|
751 # Iterate over Ig records to count and order by junction length
|
|
752 result_count = 0
|
|
753 records = {}
|
|
754 # print 'Reading file...'
|
|
755 db_iter = readDbFile(db_file)
|
|
756 for rec in db_iter:
|
|
757 records[rec.id] = rec
|
|
758 result_count += 1
|
|
759 records = OrderedDict(sorted(list(records.items()), key=lambda i: i[1].junction_length))
|
|
760
|
|
761 # Define empty matrix to store assembled results
|
|
762 dist_mat = np.zeros((result_count,result_count))
|
|
763
|
|
764 # Count records and define output format
|
|
765 out_type = getFileType(db_file) if out_args['out_type'] is None \
|
|
766 else out_args['out_type']
|
|
767
|
|
768 # Defined successful output handle
|
|
769 pass_handle = getOutputHandle(db_file,
|
|
770 out_label='clone-pass',
|
|
771 out_dir=out_args['out_dir'],
|
|
772 out_name=out_args['out_name'],
|
|
773 out_type=out_type)
|
|
774 pass_writer = getDbWriter(pass_handle, db_file, add_fields='CLONE')
|
|
775
|
|
776 # Defined failed cloning output handle
|
|
777 if out_args['failed']:
|
|
778 fail_handle = getOutputHandle(db_file,
|
|
779 out_label='clone-fail',
|
|
780 out_dir=out_args['out_dir'],
|
|
781 out_name=out_args['out_name'],
|
|
782 out_type=out_type)
|
|
783 fail_writer = getDbWriter(fail_handle, db_file)
|
|
784 else:
|
|
785 fail_handle = None
|
|
786 fail_writer = None
|
|
787
|
|
788 # Open log file
|
|
789 if out_args['log_file'] is None:
|
|
790 log_handle = None
|
|
791 else:
|
|
792 log_handle = open(out_args['log_file'], 'w')
|
|
793 except:
|
|
794 alive.value = False
|
|
795 raise
|
|
796
|
|
797 try:
|
|
798 # Iterator over results queue until sentinel object reached
|
|
799 start_time = time()
|
|
800 row_count = rec_count = 0
|
|
801 while alive.value:
|
|
802 # Get result from queue
|
|
803 if result_queue.empty(): continue
|
|
804 else: result = result_queue.get()
|
|
805 # Exit upon reaching sentinel
|
|
806 if result is None: break
|
|
807
|
|
808 # Print progress for previous iteration
|
|
809 if row_count == 0:
|
|
810 print('PROGRESS> Assigning clones')
|
|
811 printProgress(row_count, result_count, 0.05, start_time)
|
|
812
|
|
813 # Update counts for iteration
|
|
814 row_count += 1
|
|
815 rec_count += len(result)
|
|
816
|
|
817 # Add result row to distance matrix
|
|
818 if result:
|
|
819 dist_mat[list(range(result_count-len(result),result_count)),result_count-len(result)] = result.results
|
|
820
|
|
821 else:
|
|
822 sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \
|
|
823 % os.getpid())
|
|
824 return None
|
|
825
|
|
826 # Calculate linkage and carry out clustering
|
|
827 # print dist_mat
|
|
828 clusters = cluster_func(dist_mat, **cluster_args) if dist_mat is not None else None
|
|
829 clones = {}
|
|
830 # print clusters
|
|
831 for i, c in enumerate(clusters):
|
|
832 clones.setdefault(c, []).append(records[list(records.keys())[i]])
|
|
833
|
|
834 # Write passed and failed records
|
|
835 clone_count = pass_count = fail_count = 0
|
|
836 if clones:
|
|
837 for clone in clones.values():
|
|
838 clone_count += 1
|
|
839 for i, rec in enumerate(clone):
|
|
840 rec.annotations['CLONE'] = clone_count
|
|
841 pass_writer.writerow(rec.toDict())
|
|
842 pass_count += 1
|
|
843 #result.log['CLONE%i-%i' % (clone_count, i + 1)] = str(rec.junction)
|
|
844
|
|
845 else:
|
|
846 for i, rec in enumerate(result.data):
|
|
847 fail_writer.writerow(rec.toDict())
|
|
848 fail_count += 1
|
|
849 #result.log['CLONE0-%i' % (i + 1)] = str(rec.junction)
|
|
850
|
|
851 # Print final progress
|
|
852 printProgress(row_count, result_count, 0.05, start_time)
|
|
853
|
|
854 # Close file handles
|
|
855 pass_handle.close()
|
|
856 if fail_handle is not None: fail_handle.close()
|
|
857 if log_handle is not None: log_handle.close()
|
|
858
|
|
859 # Update return list
|
|
860 log = OrderedDict()
|
|
861 log['OUTPUT'] = os.path.basename(pass_handle.name)
|
|
862 log['CLONES'] = clone_count
|
|
863 log['RECORDS'] = rec_count
|
|
864 log['PASS'] = pass_count
|
|
865 log['FAIL'] = fail_count
|
|
866 collect_dict = {'log':log, 'out_files': [pass_handle.name]}
|
|
867 collect_queue.put(collect_dict)
|
|
868 except:
|
|
869 alive.value = False
|
|
870 raise
|
|
871
|
|
872 return None
|
|
873
|
|
874
|
|
875 def defineClones(db_file, feed_func, work_func, collect_func, clone_func, cluster_func=None,
|
|
876 group_func=None, group_args={}, clone_args={}, cluster_args={},
|
|
877 out_args=default_out_args, nproc=None, queue_size=None):
|
|
878 """
|
|
879 Define clonally related sequences
|
|
880
|
|
881 Arguments:
|
|
882 db_file = filename of input database
|
|
883 feed_func = the function that feeds the queue
|
|
884 work_func = the worker function that will run on each CPU
|
|
885 collect_func = the function that collects results from the workers
|
|
886 group_func = the function to use for assigning preclones
|
|
887 clone_func = the function to use for determining clones within preclonal groups
|
|
888 group_args = a dictionary of arguments to pass to group_func
|
|
889 clone_args = a dictionary of arguments to pass to clone_func
|
|
890 out_args = common output argument dictionary from parseCommonArgs
|
|
891 nproc = the number of processQueue processes;
|
|
892 if None defaults to the number of CPUs
|
|
893 queue_size = maximum size of the argument queue;
|
|
894 if None defaults to 2*nproc
|
|
895
|
|
896 Returns:
|
|
897 a list of successful output file names
|
|
898 """
|
|
899 # Print parameter info
|
|
900 log = OrderedDict()
|
|
901 log['START'] = 'DefineClones'
|
|
902 log['DB_FILE'] = os.path.basename(db_file)
|
|
903 if group_func is not None:
|
|
904 log['GROUP_FUNC'] = group_func.__name__
|
|
905 log['GROUP_ARGS'] = group_args
|
|
906 log['CLONE_FUNC'] = clone_func.__name__
|
|
907
|
|
908 # TODO: this is yucky, but can be fixed by using a model class
|
|
909 clone_log = clone_args.copy()
|
|
910 if 'dist_mat' in clone_log: del clone_log['dist_mat']
|
|
911 log['CLONE_ARGS'] = clone_log
|
|
912
|
|
913 if cluster_func is not None:
|
|
914 log['CLUSTER_FUNC'] = cluster_func.__name__
|
|
915 log['CLUSTER_ARGS'] = cluster_args
|
|
916 log['NPROC'] = nproc
|
|
917 printLog(log)
|
|
918
|
|
919 # Define feeder function and arguments
|
|
920 feed_args = {'db_file': db_file,
|
|
921 'group_func': group_func,
|
|
922 'group_args': group_args}
|
|
923 # Define worker function and arguments
|
|
924 work_args = {'clone_func': clone_func,
|
|
925 'clone_args': clone_args}
|
|
926 # Define collector function and arguments
|
|
927 collect_args = {'db_file': db_file,
|
|
928 'out_args': out_args,
|
|
929 'cluster_func': cluster_func,
|
|
930 'cluster_args': cluster_args}
|
|
931
|
|
932 # Call process manager
|
|
933 result = manageProcesses(feed_func, work_func, collect_func,
|
|
934 feed_args, work_args, collect_args,
|
|
935 nproc, queue_size)
|
|
936
|
|
937 # Print log
|
|
938 result['log']['END'] = 'DefineClones'
|
|
939 printLog(result['log'])
|
|
940
|
|
941 return result['out_files']
|
|
942
|
|
943
|
|
944 def getArgParser():
|
|
945 """
|
|
946 Defines the ArgumentParser
|
|
947
|
|
948 Arguments:
|
|
949 None
|
|
950
|
|
951 Returns:
|
|
952 an ArgumentParser object
|
|
953 """
|
|
954 # Define input and output fields
|
|
955 fields = dedent(
|
|
956 '''
|
|
957 output files:
|
|
958 clone-pass
|
|
959 database with assigned clonal group numbers.
|
|
960 clone-fail
|
|
961 database with records failing clonal grouping.
|
|
962
|
|
963 required fields:
|
52
|
964 SEQUENCE_ID, V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL, JUNCTION
|
0
|
965
|
|
966 <field>
|
|
967 sequence field specified by the --sf parameter
|
|
968
|
|
969 output fields:
|
|
970 CLONE
|
|
971 ''')
|
|
972
|
|
973 # Define ArgumentParser
|
|
974 parser = ArgumentParser(description=__doc__, epilog=fields,
|
|
975 formatter_class=CommonHelpFormatter)
|
|
976 parser.add_argument('--version', action='version',
|
|
977 version='%(prog)s:' + ' %s-%s' %(__version__, __date__))
|
|
978 subparsers = parser.add_subparsers(title='subcommands', dest='command', metavar='',
|
|
979 help='Cloning method')
|
|
980 # TODO: This is a temporary fix for Python issue 9253
|
|
981 subparsers.required = True
|
|
982
|
|
983 # Parent parser
|
|
984 parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True,
|
|
985 multiproc=True)
|
|
986
|
|
987 # Distance cloning method
|
|
988 parser_bygroup = subparsers.add_parser('bygroup', parents=[parser_parent],
|
52
|
989 formatter_class=CommonHelpFormatter,
|
|
990 help='''Defines clones as having same V assignment,
|
|
991 J assignment, and junction length with
|
|
992 specified substitution distance model.''',
|
|
993 description='''Defines clones as having same V assignment,
|
|
994 J assignment, and junction length with
|
|
995 specified substitution distance model.''')
|
0
|
996 parser_bygroup.add_argument('-f', nargs='+', action='store', dest='fields', default=None,
|
|
997 help='Additional fields to use for grouping clones (non VDJ)')
|
|
998 parser_bygroup.add_argument('--mode', action='store', dest='mode',
|
52
|
999 choices=('allele', 'gene'), default=default_index_mode,
|
0
|
1000 help='''Specifies whether to use the V(D)J allele or gene for
|
|
1001 initial grouping.''')
|
52
|
1002 parser_bygroup.add_argument('--act', action='store', dest='action',
|
|
1003 choices=('first', 'set'), default=default_index_action,
|
0
|
1004 help='''Specifies how to handle multiple V(D)J assignments
|
|
1005 for initial grouping.''')
|
|
1006 parser_bygroup.add_argument('--model', action='store', dest='model',
|
52
|
1007 choices=choices_bygroup_model,
|
0
|
1008 default=default_bygroup_model,
|
52
|
1009 help='''Specifies which substitution model to use for calculating distance
|
|
1010 between sequences. The "ham" model is nucleotide Hamming distance and
|
|
1011 "aa" is amino acid Hamming distance. The "hh_s1f" and "hh_s5f" models are
|
|
1012 human specific single nucleotide and 5-mer content models, respectively,
|
|
1013 from Yaari et al, 2013. The "mk_rs1nf" and "mk_rs5nf" models are
|
|
1014 mouse specific single nucleotide and 5-mer content models, respectively,
|
|
1015 from Cui et al, 2016. The "m1n_compat" and "hs1f_compat" models are
|
|
1016 deprecated models provided backwards compatibility with the "m1n" and
|
|
1017 "hs1f" models in Change-O v0.3.3 and SHazaM v0.1.4. Both
|
|
1018 5-mer models should be considered experimental.''')
|
0
|
1019 parser_bygroup.add_argument('--dist', action='store', dest='distance', type=float,
|
|
1020 default=default_distance,
|
|
1021 help='The distance threshold for clonal grouping')
|
|
1022 parser_bygroup.add_argument('--norm', action='store', dest='norm',
|
|
1023 choices=('len', 'mut', 'none'), default=default_norm,
|
|
1024 help='''Specifies how to normalize distances. One of none
|
|
1025 (do not normalize), len (normalize by length),
|
|
1026 or mut (normalize by number of mutations between sequences).''')
|
|
1027 parser_bygroup.add_argument('--sym', action='store', dest='sym',
|
|
1028 choices=('avg', 'min'), default=default_sym,
|
|
1029 help='''Specifies how to combine asymmetric distances. One of avg
|
|
1030 (average of A->B and B->A) or min (minimum of A->B and B->A).''')
|
|
1031 parser_bygroup.add_argument('--link', action='store', dest='linkage',
|
|
1032 choices=('single', 'average', 'complete'), default=default_linkage,
|
|
1033 help='''Type of linkage to use for hierarchical clustering.''')
|
|
1034 parser_bygroup.add_argument('--sf', action='store', dest='seq_field',
|
|
1035 default=default_seq_field,
|
|
1036 help='''The name of the field to be used to calculate
|
|
1037 distance between records''')
|
|
1038 parser_bygroup.set_defaults(feed_func=feedQueue)
|
|
1039 parser_bygroup.set_defaults(work_func=processQueue)
|
|
1040 parser_bygroup.set_defaults(collect_func=collectQueue)
|
|
1041 parser_bygroup.set_defaults(group_func=indexJunctions)
|
|
1042 parser_bygroup.set_defaults(clone_func=distanceClones)
|
|
1043
|
52
|
1044 # Chen2010
|
|
1045 parser_chen = subparsers.add_parser('chen2010', parents=[parser_parent],
|
0
|
1046 formatter_class=CommonHelpFormatter,
|
52
|
1047 help='''Defines clones by method specified in Chen, 2010.''',
|
|
1048 description='''Defines clones by method specified in Chen, 2010.''')
|
|
1049 parser_chen.set_defaults(feed_func=feedQueueClust)
|
|
1050 parser_chen.set_defaults(work_func=processQueueClust)
|
|
1051 parser_chen.set_defaults(collect_func=collectQueueClust)
|
|
1052 parser_chen.set_defaults(cluster_func=hierClust)
|
|
1053
|
|
1054 # Ademokun2011
|
|
1055 parser_ade = subparsers.add_parser('ademokun2011', parents=[parser_parent],
|
|
1056 formatter_class=CommonHelpFormatter,
|
|
1057 help='''Defines clones by method specified in Ademokun, 2011.''',
|
|
1058 description='''Defines clones by method specified in Ademokun, 2011.''')
|
|
1059 parser_ade.set_defaults(feed_func=feedQueueClust)
|
|
1060 parser_ade.set_defaults(work_func=processQueueClust)
|
|
1061 parser_ade.set_defaults(collect_func=collectQueueClust)
|
|
1062 parser_ade.set_defaults(cluster_func=hierClust)
|
0
|
1063
|
|
1064 return parser
|
|
1065
|
|
1066
|
|
1067 if __name__ == '__main__':
|
|
1068 """
|
|
1069 Parses command line arguments and calls main function
|
|
1070 """
|
|
1071 # Parse arguments
|
|
1072 parser = getArgParser()
|
|
1073 args = parser.parse_args()
|
|
1074 args_dict = parseCommonArgs(args)
|
|
1075 # Convert case of fields
|
|
1076 if 'seq_field' in args_dict:
|
|
1077 args_dict['seq_field'] = args_dict['seq_field'].upper()
|
|
1078 if 'fields' in args_dict and args_dict['fields'] is not None:
|
|
1079 args_dict['fields'] = [f.upper() for f in args_dict['fields']]
|
|
1080
|
|
1081 # Define clone_args
|
|
1082 if args.command == 'bygroup':
|
|
1083 args_dict['group_args'] = {'fields': args_dict['fields'],
|
|
1084 'action': args_dict['action'],
|
|
1085 'mode':args_dict['mode']}
|
|
1086 args_dict['clone_args'] = {'model': args_dict['model'],
|
|
1087 'distance': args_dict['distance'],
|
|
1088 'norm': args_dict['norm'],
|
|
1089 'sym': args_dict['sym'],
|
|
1090 'linkage': args_dict['linkage'],
|
|
1091 'seq_field': args_dict['seq_field']}
|
|
1092
|
52
|
1093 # Get distance matrix
|
|
1094 try:
|
|
1095 args_dict['clone_args']['dist_mat'] = distance_models[args_dict['model']]
|
|
1096 except KeyError:
|
|
1097 sys.exit('Unrecognized distance model: %s' % args_dict['model'])
|
0
|
1098
|
|
1099 del args_dict['fields']
|
|
1100 del args_dict['action']
|
|
1101 del args_dict['mode']
|
|
1102 del args_dict['model']
|
|
1103 del args_dict['distance']
|
|
1104 del args_dict['norm']
|
|
1105 del args_dict['sym']
|
|
1106 del args_dict['linkage']
|
|
1107 del args_dict['seq_field']
|
|
1108
|
|
1109 # Define clone_args
|
52
|
1110 if args.command == 'chen2010':
|
|
1111 args_dict['clone_func'] = distChen2010
|
|
1112 args_dict['cluster_args'] = {'method': args.command }
|
|
1113
|
|
1114 if args.command == 'ademokun2011':
|
|
1115 args_dict['clone_func'] = distAdemokun2011
|
|
1116 args_dict['cluster_args'] = {'method': args.command }
|
0
|
1117
|
|
1118 # Call defineClones
|
|
1119 del args_dict['command']
|
|
1120 del args_dict['db_files']
|
|
1121 for f in args.__dict__['db_files']:
|
|
1122 args_dict['db_file'] = f
|
52
|
1123 defineClones(**args_dict)
|