0
|
1 #!/usr/bin/env python3
|
|
2 """
|
|
3 Parses tab delimited database files
|
|
4 """
|
|
5 # Info
|
|
6 __author__ = 'Jason Anthony Vander Heiden'
|
|
7 from changeo import __version__, __date__
|
|
8
|
|
9 # Imports
|
|
10 import csv
|
|
11 import os
|
|
12 import re
|
|
13 from argparse import ArgumentParser
|
|
14 from collections import OrderedDict
|
|
15
|
|
16 from textwrap import dedent
|
|
17 from time import time
|
|
18 from Bio import SeqIO
|
|
19 from Bio.Seq import Seq
|
|
20 from Bio.SeqRecord import SeqRecord
|
|
21 from Bio.Alphabet import IUPAC
|
|
22
|
|
23 # Presto and changeo imports
|
|
24 from presto.Defaults import default_delimiter, default_out_args
|
|
25 from presto.Annotation import flattenAnnotation
|
|
26 from presto.IO import getOutputHandle, printLog, printProgress, printMessage
|
|
27 from changeo.Defaults import default_csv_size
|
|
28 from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs
|
|
29 from changeo.IO import getDbWriter, readDbFile, countDbFile
|
|
30
|
|
31 # System settings
|
|
32 csv.field_size_limit(default_csv_size)
|
|
33
|
|
34 # Defaults
|
|
35 default_id_field = 'SEQUENCE_ID'
|
|
36 default_seq_field = 'SEQUENCE_IMGT'
|
|
37 default_germ_field = 'GERMLINE_IMGT_D_MASK'
|
|
38 default_index_field = 'INDEX'
|
|
39
|
|
40 # TODO: convert SQL-ish operations to modify_func() as per ParseHeaders
|
|
41
|
|
42 def getDbSeqRecord(db_record, id_field, seq_field, meta_fields=None,
|
|
43 delimiter=default_delimiter):
|
|
44 """
|
|
45 Parses a database record into a SeqRecord
|
|
46
|
|
47 Arguments:
|
|
48 db_record = a dictionary containing a database record
|
|
49 id_field = the field containing identifiers
|
|
50 seq_field = the field containing sequences
|
|
51 meta_fields = a list of fields to add to sequence annotations
|
|
52 delimiter = a tuple of delimiters for (fields, values, value lists)
|
|
53
|
|
54 Returns:
|
|
55 a SeqRecord
|
|
56 """
|
|
57 # Return None if ID or sequence fields are empty
|
|
58 if not db_record[id_field] or not db_record[seq_field]:
|
|
59 return None
|
|
60
|
|
61 # Create description string
|
|
62 desc_dict = OrderedDict([('ID', db_record[id_field])])
|
|
63 if meta_fields is not None:
|
|
64 desc_dict.update([(f, db_record[f]) for f in meta_fields if f in db_record])
|
|
65 desc_str = flattenAnnotation(desc_dict, delimiter=delimiter)
|
|
66
|
|
67 # Create SeqRecord
|
|
68 seq_record = SeqRecord(Seq(db_record[seq_field], IUPAC.ambiguous_dna),
|
|
69 id=desc_str, name=desc_str, description='')
|
|
70
|
|
71 return seq_record
|
|
72
|
|
73
|
|
74 def splitDbFile(db_file, field, num_split=None, out_args=default_out_args):
|
|
75 """
|
|
76 Divides a tab-delimited database file into segments by description tags
|
|
77
|
|
78 Arguments:
|
|
79 db_file = filename of the tab-delimited database file to split
|
|
80 field = the field name by which to split db_file
|
|
81 num_split = the numerical threshold by which to group sequences;
|
|
82 if None treat field as textual
|
|
83 out_args = common output argument dictionary from parseCommonArgs
|
|
84
|
|
85 Returns:
|
|
86 a list of output file names
|
|
87 """
|
|
88 log = OrderedDict()
|
|
89 log['START'] = 'ParseDb'
|
|
90 log['COMMAND'] = 'split'
|
|
91 log['FILE'] = os.path.basename(db_file)
|
|
92 log['FIELD'] = field
|
|
93 log['NUM_SPLIT'] = num_split
|
|
94 printLog(log)
|
|
95
|
|
96 # Open IgRecord reader iter object
|
|
97 reader = readDbFile(db_file, ig=False)
|
|
98
|
|
99 # Determine total numbers of records
|
|
100 rec_count = countDbFile(db_file)
|
|
101
|
|
102 start_time = time()
|
|
103 count = 0
|
|
104 # Sort records into files based on textual field
|
|
105 if num_split is None:
|
|
106 # Create set of unique field tags
|
|
107 tmp_iter = readDbFile(db_file, ig=False)
|
|
108 tag_list = list(set([row[field] for row in tmp_iter]))
|
|
109
|
|
110 # Forbidden characters in filename and replacements
|
|
111 noGood = {'\/':'f','\\':'b','?':'q','\%':'p','*':'s',':':'c',
|
|
112 '\|':'pi','\"':'dq','\'':'sq','<':'gt','>':'lt',' ':'_'}
|
|
113 # Replace forbidden characters in tag_list
|
|
114 tag_dict = {}
|
|
115 for tag in tag_list:
|
|
116 for c,r in noGood.items():
|
|
117 tag_dict[tag] = (tag_dict.get(tag, tag).replace(c,r) \
|
|
118 if c in tag else tag_dict.get(tag, tag))
|
|
119
|
|
120 # Create output handles
|
|
121 handles_dict = {tag:getOutputHandle(db_file,
|
|
122 '%s-%s' % (field, label),
|
|
123 out_type = out_args['out_type'],
|
|
124 out_name = out_args['out_name'],
|
|
125 out_dir = out_args['out_dir'])
|
|
126 for tag, label in tag_dict.items()}
|
|
127
|
|
128 # Create Db writer instances
|
|
129 writers_dict = {tag:getDbWriter(handles_dict[tag], db_file)
|
|
130 for tag in tag_dict}
|
|
131
|
|
132 # Iterate over IgRecords
|
|
133 for row in reader:
|
|
134 printProgress(count, rec_count, 0.05, start_time)
|
|
135 count += 1
|
|
136 # Write row to appropriate file
|
|
137 tag = row[field]
|
|
138 writers_dict[tag].writerow(row)
|
|
139
|
|
140 # Sort records into files based on numeric num_split
|
|
141 else:
|
|
142 num_split = float(num_split)
|
|
143
|
|
144 # Create output handles
|
|
145 handles_dict = {'under':getOutputHandle(db_file,
|
|
146 'under-%.1f' % num_split,
|
|
147 out_type = out_args['out_type'],
|
|
148 out_name = out_args['out_name'],
|
|
149 out_dir = out_args['out_dir']),
|
|
150 'atleast':getOutputHandle(db_file,
|
|
151 'atleast-%.1f' % num_split,
|
|
152 out_type = out_args['out_type'],
|
|
153 out_name = out_args['out_name'],
|
|
154 out_dir = out_args['out_dir'])}
|
|
155
|
|
156 # Create Db writer instances
|
|
157 writers_dict = {'under':getDbWriter(handles_dict['under'], db_file),
|
|
158 'atleast':getDbWriter(handles_dict['atleast'], db_file)}
|
|
159
|
|
160 # Iterate over IgRecords
|
|
161 for row in reader:
|
|
162 printProgress(count, rec_count, 0.05, start_time)
|
|
163 count += 1
|
|
164 tag = row[field]
|
|
165 tag = 'under' if float(tag) < num_split else 'atleast'
|
|
166 writers_dict[tag].writerow(row)
|
|
167
|
|
168 # Write log
|
|
169 printProgress(count, rec_count, 0.05, start_time)
|
|
170 log = OrderedDict()
|
|
171 for i, k in enumerate(handles_dict):
|
|
172 log['OUTPUT%i' % (i + 1)] = os.path.basename(handles_dict[k].name)
|
|
173 log['RECORDS'] = rec_count
|
|
174 log['PARTS'] = len(handles_dict)
|
|
175 log['END'] = 'ParseDb'
|
|
176 printLog(log)
|
|
177
|
|
178 # Close output file handles
|
|
179 for t in handles_dict: handles_dict[t].close()
|
|
180
|
|
181 return [handles_dict[t].name for t in handles_dict]
|
|
182
|
|
183
|
|
184 # TODO: SHOULD ALLOW FOR UNSORTED CLUSTER COLUMN
|
|
185 # TODO: SHOULD ALLOW FOR GROUPING FIELDS
|
|
186 def convertDbBaseline(db_file, id_field=default_id_field, seq_field=default_seq_field,
|
|
187 germ_field=default_germ_field, cluster_field=None,
|
|
188 meta_fields=None, out_args=default_out_args):
|
|
189 """
|
|
190 Builds fasta files from database records
|
|
191
|
|
192 Arguments:
|
|
193 db_file = the database file name
|
|
194 id_field = the field containing identifiers
|
|
195 seq_field = the field containing sample sequences
|
|
196 germ_field = the field containing germline sequences
|
|
197 cluster_field = the field containing clonal groupings
|
|
198 if None write the germline for each record
|
|
199 meta_fields = a list of fields to add to sequence annotations
|
|
200 out_args = common output argument dictionary from parseCommonArgs
|
|
201
|
|
202 Returns:
|
|
203 the output file name
|
|
204 """
|
|
205 log = OrderedDict()
|
|
206 log['START'] = 'ParseDb'
|
|
207 log['COMMAND'] = 'fasta'
|
|
208 log['FILE'] = os.path.basename(db_file)
|
|
209 log['ID_FIELD'] = id_field
|
|
210 log['SEQ_FIELD'] = seq_field
|
|
211 log['GERM_FIELD'] = germ_field
|
|
212 log['CLUSTER_FIELD'] = cluster_field
|
|
213 if meta_fields is not None: log['META_FIELDS'] = ','.join(meta_fields)
|
|
214 printLog(log)
|
|
215
|
|
216 # Open file handles
|
|
217 db_iter = readDbFile(db_file, ig=False)
|
|
218 pass_handle = getOutputHandle(db_file, out_label='sequences', out_dir=out_args['out_dir'],
|
|
219 out_name=out_args['out_name'], out_type='clip')
|
|
220 # Count records
|
|
221 result_count = countDbFile(db_file)
|
|
222
|
|
223 # Iterate over records
|
|
224 start_time = time()
|
|
225 rec_count = germ_count = pass_count = fail_count = 0
|
|
226 cluster_last = None
|
|
227 for rec in db_iter:
|
|
228 # Print progress for previous iteration
|
|
229 printProgress(rec_count, result_count, 0.05, start_time)
|
|
230 rec_count += 1
|
|
231
|
|
232 # Update cluster ID
|
|
233 cluster = rec.get(cluster_field, None)
|
|
234
|
|
235 # Get germline SeqRecord when needed
|
|
236 if cluster_field is None:
|
|
237 germ = getDbSeqRecord(rec, id_field, germ_field, meta_fields,
|
|
238 delimiter=out_args['delimiter'])
|
|
239 germ.id = '>' + germ.id
|
|
240 elif cluster != cluster_last:
|
|
241 germ = getDbSeqRecord(rec, cluster_field, germ_field,
|
|
242 delimiter=out_args['delimiter'])
|
|
243 germ.id = '>' + germ.id
|
|
244 else:
|
|
245 germ = None
|
|
246
|
|
247 # Get read SeqRecord
|
|
248 seq = getDbSeqRecord(rec, id_field, seq_field, meta_fields,
|
|
249 delimiter=out_args['delimiter'])
|
|
250
|
|
251 # Write germline
|
|
252 if germ is not None:
|
|
253 germ_count += 1
|
|
254 SeqIO.write(germ, pass_handle, 'fasta')
|
|
255
|
|
256 # Write sequences
|
|
257 if seq is not None:
|
|
258 pass_count += 1
|
|
259 SeqIO.write(seq, pass_handle, 'fasta')
|
|
260 else:
|
|
261 fail_count += 1
|
|
262
|
|
263 # Set last cluster ID
|
|
264 cluster_last = cluster
|
|
265
|
|
266 # Print counts
|
|
267 printProgress(rec_count, result_count, 0.05, start_time)
|
|
268 log = OrderedDict()
|
|
269 log['OUTPUT'] = os.path.basename(pass_handle.name)
|
|
270 log['RECORDS'] = rec_count
|
|
271 log['GERMLINES'] = germ_count
|
|
272 log['PASS'] = pass_count
|
|
273 log['FAIL'] = fail_count
|
|
274 log['END'] = 'ParseDb'
|
|
275 printLog(log)
|
|
276
|
|
277 # Close file handles
|
|
278 pass_handle.close()
|
|
279
|
|
280 return pass_handle.name
|
|
281
|
|
282
|
|
283 def convertDbFasta(db_file, id_field=default_id_field, seq_field=default_seq_field,
|
|
284 meta_fields=None, out_args=default_out_args):
|
|
285 """
|
|
286 Builds fasta files from database records
|
|
287
|
|
288 Arguments:
|
|
289 db_file = the database file name
|
|
290 id_field = the field containing identifiers
|
|
291 seq_field = the field containing sequences
|
|
292 meta_fields = a list of fields to add to sequence annotations
|
|
293 out_args = common output argument dictionary from parseCommonArgs
|
|
294
|
|
295 Returns:
|
|
296 the output file name
|
|
297 """
|
|
298 log = OrderedDict()
|
|
299 log['START'] = 'ParseDb'
|
|
300 log['COMMAND'] = 'fasta'
|
|
301 log['FILE'] = os.path.basename(db_file)
|
|
302 log['ID_FIELD'] = id_field
|
|
303 log['SEQ_FIELD'] = seq_field
|
|
304 if meta_fields is not None: log['META_FIELDS'] = ','.join(meta_fields)
|
|
305 printLog(log)
|
|
306
|
|
307 # Open file handles
|
|
308 out_type = 'fasta'
|
|
309 db_iter = readDbFile(db_file, ig=False)
|
|
310 pass_handle = getOutputHandle(db_file, out_label='sequences', out_dir=out_args['out_dir'],
|
|
311 out_name=out_args['out_name'], out_type=out_type)
|
|
312 # Count records
|
|
313 result_count = countDbFile(db_file)
|
|
314
|
|
315 # Iterate over records
|
|
316 start_time = time()
|
|
317 rec_count = pass_count = fail_count = 0
|
|
318 for rec in db_iter:
|
|
319 # Print progress for previous iteration
|
|
320 printProgress(rec_count, result_count, 0.05, start_time)
|
|
321 rec_count += 1
|
|
322
|
|
323 # Get SeqRecord
|
|
324 seq = getDbSeqRecord(rec, id_field, seq_field, meta_fields, out_args['delimiter'])
|
|
325
|
|
326 # Write sequences
|
|
327 if seq is not None:
|
|
328 pass_count += 1
|
|
329 SeqIO.write(seq, pass_handle, out_type)
|
|
330 else:
|
|
331 fail_count += 1
|
|
332
|
|
333 # Print counts
|
|
334 printProgress(rec_count, result_count, 0.05, start_time)
|
|
335 log = OrderedDict()
|
|
336 log['OUTPUT'] = os.path.basename(pass_handle.name)
|
|
337 log['RECORDS'] = rec_count
|
|
338 log['PASS'] = pass_count
|
|
339 log['FAIL'] = fail_count
|
|
340 log['END'] = 'ParseDb'
|
|
341 printLog(log)
|
|
342
|
|
343 # Close file handles
|
|
344 pass_handle.close()
|
|
345
|
|
346 return pass_handle.name
|
|
347
|
|
348
|
|
349 def addDbFile(db_file, fields, values, out_args=default_out_args):
|
|
350 """
|
|
351 Adds field and value pairs to a database file
|
|
352
|
|
353 Arguments:
|
|
354 db_file = the database file name
|
|
355 fields = a list of fields to add
|
|
356 values = a list of values to assign to all rows of each field
|
|
357 out_args = common output argument dictionary from parseCommonArgs
|
|
358
|
|
359 Returns:
|
|
360 the output file name
|
|
361 """
|
|
362 log = OrderedDict()
|
|
363 log['START'] = 'ParseDb'
|
|
364 log['COMMAND'] = 'add'
|
|
365 log['FILE'] = os.path.basename(db_file)
|
|
366 log['FIELDS'] = ','.join(fields)
|
|
367 log['VALUES'] = ','.join(values)
|
|
368 printLog(log)
|
|
369
|
|
370 # Open file handles
|
|
371 db_iter = readDbFile(db_file, ig=False)
|
|
372 pass_handle = getOutputHandle(db_file, out_label='parse-add', out_dir=out_args['out_dir'],
|
|
373 out_name=out_args['out_name'], out_type='tab')
|
|
374 pass_writer = getDbWriter(pass_handle, db_file, add_fields=fields)
|
|
375 # Count records
|
|
376 result_count = countDbFile(db_file)
|
|
377
|
|
378 # Define fields and values to append
|
|
379 add_dict = {k:v for k,v in zip(fields, values) if k not in db_iter.fieldnames}
|
|
380
|
|
381 # Iterate over records
|
|
382 start_time = time()
|
|
383 rec_count = 0
|
|
384 for rec in db_iter:
|
|
385 # Print progress for previous iteration
|
|
386 printProgress(rec_count, result_count, 0.05, start_time)
|
|
387 rec_count += 1
|
|
388 # Write updated row
|
|
389 rec.update(add_dict)
|
|
390 pass_writer.writerow(rec)
|
|
391
|
|
392 # Print counts
|
|
393 printProgress(rec_count, result_count, 0.05, start_time)
|
|
394 log = OrderedDict()
|
|
395 log['OUTPUT'] = os.path.basename(pass_handle.name)
|
|
396 log['RECORDS'] = rec_count
|
|
397 log['END'] = 'ParseDb'
|
|
398 printLog(log)
|
|
399
|
|
400 # Close file handles
|
|
401 pass_handle.close()
|
|
402
|
|
403 return pass_handle.name
|
|
404
|
|
405
|
|
406 def indexDbFile(db_file, field=default_index_field, out_args=default_out_args):
|
|
407 """
|
|
408 Adds an index column to a database file
|
|
409
|
|
410 Arguments:
|
|
411 db_file = the database file name
|
|
412 field = the name of the index field to add
|
|
413 out_args = common output argument dictionary from parseCommonArgs
|
|
414
|
|
415 Returns:
|
|
416 the output file name
|
|
417 """
|
|
418 log = OrderedDict()
|
|
419 log['START'] = 'ParseDb'
|
|
420 log['COMMAND'] = 'index'
|
|
421 log['FILE'] = os.path.basename(db_file)
|
|
422 log['FIELD'] = field
|
|
423 printLog(log)
|
|
424
|
|
425 # Open file handles
|
|
426 db_iter = readDbFile(db_file, ig=False)
|
|
427 pass_handle = getOutputHandle(db_file, out_label='parse-index', out_dir=out_args['out_dir'],
|
|
428 out_name=out_args['out_name'], out_type='tab')
|
|
429 pass_writer = getDbWriter(pass_handle, db_file, add_fields=field)
|
|
430 # Count records
|
|
431 result_count = countDbFile(db_file)
|
|
432
|
|
433 # Iterate over records
|
|
434 start_time = time()
|
|
435 rec_count = 0
|
|
436 for rec in db_iter:
|
|
437 # Print progress for previous iteration
|
|
438 printProgress(rec_count, result_count, 0.05, start_time)
|
|
439 rec_count += 1
|
|
440
|
|
441 # Add count and write updated row
|
|
442 rec.update({field:rec_count})
|
|
443 pass_writer.writerow(rec)
|
|
444
|
|
445 # Print counts
|
|
446 printProgress(rec_count, result_count, 0.05, start_time)
|
|
447 log = OrderedDict()
|
|
448 log['OUTPUT'] = os.path.basename(pass_handle.name)
|
|
449 log['RECORDS'] = rec_count
|
|
450 log['END'] = 'ParseDb'
|
|
451 printLog(log)
|
|
452
|
|
453 # Close file handles
|
|
454 pass_handle.close()
|
|
455
|
|
456 return pass_handle.name
|
|
457
|
|
458
|
|
459 def dropDbFile(db_file, fields, out_args=default_out_args):
|
|
460 """
|
|
461 Deletes entire fields from a database file
|
|
462
|
|
463 Arguments:
|
|
464 db_file = the database file name
|
|
465 fields = a list of fields to drop
|
|
466 out_args = common output argument dictionary from parseCommonArgs
|
|
467
|
|
468 Returns:
|
|
469 the output file name
|
|
470 """
|
|
471 log = OrderedDict()
|
|
472 log['START'] = 'ParseDb'
|
|
473 log['COMMAND'] = 'add'
|
|
474 log['FILE'] = os.path.basename(db_file)
|
|
475 log['FIELDS'] = ','.join(fields)
|
|
476 printLog(log)
|
|
477
|
|
478 # Open file handles
|
|
479 db_iter = readDbFile(db_file, ig=False)
|
|
480 pass_handle = getOutputHandle(db_file, out_label='parse-drop', out_dir=out_args['out_dir'],
|
|
481 out_name=out_args['out_name'], out_type='tab')
|
|
482 pass_writer = getDbWriter(pass_handle, db_file, exclude_fields=fields)
|
|
483 # Count records
|
|
484 result_count = countDbFile(db_file)
|
|
485
|
|
486 # Iterate over records
|
|
487 start_time = time()
|
|
488 rec_count = 0
|
|
489 for rec in db_iter:
|
|
490 # Print progress for previous iteration
|
|
491 printProgress(rec_count, result_count, 0.05, start_time)
|
|
492 rec_count += 1
|
|
493 # Write row
|
|
494 pass_writer.writerow(rec)
|
|
495
|
|
496 # Print counts
|
|
497 printProgress(rec_count, result_count, 0.05, start_time)
|
|
498 log = OrderedDict()
|
|
499 log['OUTPUT'] = os.path.basename(pass_handle.name)
|
|
500 log['RECORDS'] = rec_count
|
|
501 log['END'] = 'ParseDb'
|
|
502 printLog(log)
|
|
503
|
|
504 # Close file handles
|
|
505 pass_handle.close()
|
|
506
|
|
507 return pass_handle.name
|
|
508
|
|
509
|
|
510 def deleteDbFile(db_file, fields, values, logic='any', regex=False,
|
|
511 out_args=default_out_args):
|
|
512 """
|
|
513 Deletes records from a database file
|
|
514
|
|
515 Arguments:
|
|
516 db_file = the database file name
|
|
517 fields = a list of fields to check for deletion criteria
|
|
518 values = a list of values defining deletion targets
|
|
519 logic = one of 'any' or 'all' defining whether one or all fields must have a match.
|
|
520 regex = if False do exact full string matches; if True allow partial regex matches.
|
|
521 out_args = common output argument dictionary from parseCommonArgs
|
|
522
|
|
523 Returns:
|
|
524 the output file name
|
|
525 """
|
|
526 # Define string match function
|
|
527 if regex:
|
|
528 def _match_func(x, patterns): return any([re.search(p, x) for p in patterns])
|
|
529 else:
|
|
530 def _match_func(x, patterns): return x in patterns
|
|
531
|
|
532 # Define logic function
|
|
533 if logic == 'any':
|
|
534 _logic_func = any
|
|
535 elif logic == 'all':
|
|
536 _logic_func = all
|
|
537
|
|
538 log = OrderedDict()
|
|
539 log['START'] = 'ParseDb'
|
|
540 log['COMMAND'] = 'delete'
|
|
541 log['FILE'] = os.path.basename(db_file)
|
|
542 log['FIELDS'] = ','.join(fields)
|
|
543 log['VALUES'] = ','.join(values)
|
|
544 printLog(log)
|
|
545
|
|
546 # Open file handles
|
|
547 db_iter = readDbFile(db_file, ig=False)
|
|
548 pass_handle = getOutputHandle(db_file, out_label='parse-delete', out_dir=out_args['out_dir'],
|
|
549 out_name=out_args['out_name'], out_type='tab')
|
|
550 pass_writer = getDbWriter(pass_handle, db_file)
|
|
551 # Count records
|
|
552 result_count = countDbFile(db_file)
|
|
553
|
|
554 # Iterate over records
|
|
555 start_time = time()
|
|
556 rec_count = pass_count = fail_count = 0
|
|
557 for rec in db_iter:
|
|
558 # Print progress for previous iteration
|
|
559 printProgress(rec_count, result_count, 0.05, start_time)
|
|
560 rec_count += 1
|
|
561
|
|
562 # Check for deletion values in all fields
|
|
563 delete = _logic_func([_match_func(rec.get(f, False), values) for f in fields])
|
|
564
|
|
565 # Write sequences
|
|
566 if not delete:
|
|
567 pass_count += 1
|
|
568 pass_writer.writerow(rec)
|
|
569 else:
|
|
570 fail_count += 1
|
|
571
|
|
572 # Print counts
|
|
573 printProgress(rec_count, result_count, 0.05, start_time)
|
|
574 log = OrderedDict()
|
|
575 log['OUTPUT'] = os.path.basename(pass_handle.name)
|
|
576 log['RECORDS'] = rec_count
|
|
577 log['KEPT'] = pass_count
|
|
578 log['DELETED'] = fail_count
|
|
579 log['END'] = 'ParseDb'
|
|
580 printLog(log)
|
|
581
|
|
582 # Close file handles
|
|
583 pass_handle.close()
|
|
584
|
|
585 return pass_handle.name
|
|
586
|
|
587
|
|
588 def renameDbFile(db_file, fields, names, out_args=default_out_args):
|
|
589 """
|
|
590 Renames fields in a database file
|
|
591
|
|
592 Arguments:
|
|
593 db_file = the database file name
|
|
594 fields = a list of fields to rename
|
|
595 values = a list of new names for fields
|
|
596 out_args = common output argument dictionary from parseCommonArgs
|
|
597
|
|
598 Returns:
|
|
599 the output file name
|
|
600 """
|
|
601 log = OrderedDict()
|
|
602 log['START'] = 'ParseDb'
|
|
603 log['COMMAND'] = 'rename'
|
|
604 log['FILE'] = os.path.basename(db_file)
|
|
605 log['FIELDS'] = ','.join(fields)
|
|
606 log['NAMES'] = ','.join(names)
|
|
607 printLog(log)
|
|
608
|
|
609 # Open file handles
|
|
610 db_iter = readDbFile(db_file, ig=False)
|
|
611 pass_handle = getOutputHandle(db_file, out_label='parse-rename', out_dir=out_args['out_dir'],
|
|
612 out_name=out_args['out_name'], out_type='tab')
|
|
613
|
|
614 # Get header and rename fields
|
|
615 header = (readDbFile(db_file, ig=False)).fieldnames
|
|
616 for f, n in zip(fields, names):
|
|
617 i = header.index(f)
|
|
618 header[i] = n
|
|
619
|
|
620 # Open writer and write new header
|
|
621 # TODO: should modify getDbWriter to take a list of fields
|
|
622 pass_writer = csv.DictWriter(pass_handle, fieldnames=header, dialect='excel-tab')
|
|
623 pass_writer.writeheader()
|
|
624
|
|
625 # Count records
|
|
626 result_count = countDbFile(db_file)
|
|
627
|
|
628 # Iterate over records
|
|
629 start_time = time()
|
|
630 rec_count = 0
|
|
631 for rec in db_iter:
|
|
632 # Print progress for previous iteration
|
|
633 printProgress(rec_count, result_count, 0.05, start_time)
|
|
634 rec_count += 1
|
|
635 # TODO: repeating renaming is unnecessary. should had a non-dict reader/writer to DbCore
|
|
636 # Rename fields
|
|
637 for f, n in zip(fields, names):
|
|
638 rec[n] = rec.pop(f)
|
|
639 # Write
|
|
640 pass_writer.writerow(rec)
|
|
641
|
|
642 # Print counts
|
|
643 printProgress(rec_count, result_count, 0.05, start_time)
|
|
644 log = OrderedDict()
|
|
645 log['OUTPUT'] = os.path.basename(pass_handle.name)
|
|
646 log['RECORDS'] = rec_count
|
|
647 log['END'] = 'ParseDb'
|
|
648 printLog(log)
|
|
649
|
|
650 # Close file handles
|
|
651 pass_handle.close()
|
|
652
|
|
653 return pass_handle.name
|
|
654
|
|
655
|
|
656 def selectDbFile(db_file, fields, values, logic='any', regex=False,
|
|
657 out_args=default_out_args):
|
|
658 """
|
|
659 Selects records from a database file
|
|
660
|
|
661 Arguments:
|
|
662 db_file = the database file name
|
|
663 fields = a list of fields to check for selection criteria
|
|
664 values = a list of values defining selection targets
|
|
665 logic = one of 'any' or 'all' defining whether one or all fields must have a match.
|
|
666 regex = if False do exact full string matches; if True allow partial regex matches.
|
|
667 out_args = common output argument dictionary from parseCommonArgs
|
|
668
|
|
669 Returns:
|
|
670 the output file name
|
|
671 """
|
|
672 # Define string match function
|
|
673 if regex:
|
|
674 def _match_func(x, patterns): return any([re.search(p, x) for p in patterns])
|
|
675 else:
|
|
676 def _match_func(x, patterns): return x in patterns
|
|
677
|
|
678 # Define logic function
|
|
679 if logic == 'any':
|
|
680 _logic_func = any
|
|
681 elif logic == 'all':
|
|
682 _logic_func = all
|
|
683
|
|
684 # Print console log
|
|
685 log = OrderedDict()
|
|
686 log['START'] = 'ParseDb'
|
|
687 log['COMMAND'] = 'select'
|
|
688 log['FILE'] = os.path.basename(db_file)
|
|
689 log['FIELDS'] = ','.join(fields)
|
|
690 log['VALUES'] = ','.join(values)
|
|
691 log['REGEX'] =regex
|
|
692 printLog(log)
|
|
693
|
|
694 # Open file handles
|
|
695 db_iter = readDbFile(db_file, ig=False)
|
|
696 pass_handle = getOutputHandle(db_file, out_label='parse-select', out_dir=out_args['out_dir'],
|
|
697 out_name=out_args['out_name'], out_type='tab')
|
|
698 pass_writer = getDbWriter(pass_handle, db_file)
|
|
699 # Count records
|
|
700 result_count = countDbFile(db_file)
|
|
701
|
|
702 # Iterate over records
|
|
703 start_time = time()
|
|
704 rec_count = pass_count = fail_count = 0
|
|
705 for rec in db_iter:
|
|
706 # Print progress for previous iteration
|
|
707 printProgress(rec_count, result_count, 0.05, start_time)
|
|
708 rec_count += 1
|
|
709
|
|
710 # Check for selection values in all fields
|
|
711 select = _logic_func([_match_func(rec.get(f, False), values) for f in fields])
|
|
712
|
|
713 # Write sequences
|
|
714 if select:
|
|
715 pass_count += 1
|
|
716 pass_writer.writerow(rec)
|
|
717 else:
|
|
718 fail_count += 1
|
|
719
|
|
720 # Print counts
|
|
721 printProgress(rec_count, result_count, 0.05, start_time)
|
|
722 log = OrderedDict()
|
|
723 log['OUTPUT'] = os.path.basename(pass_handle.name)
|
|
724 log['RECORDS'] = rec_count
|
|
725 log['SELECTED'] = pass_count
|
|
726 log['DISCARDED'] = fail_count
|
|
727 log['END'] = 'ParseDb'
|
|
728 printLog(log)
|
|
729
|
|
730 # Close file handles
|
|
731 pass_handle.close()
|
|
732
|
|
733 return pass_handle.name
|
|
734
|
|
735
|
|
736 def sortDbFile(db_file, field, numeric=False, descend=False,
|
|
737 out_args=default_out_args):
|
|
738 """
|
|
739 Sorts records by values in an annotation field
|
|
740
|
|
741 Arguments:
|
|
742 db_file = the database filename
|
|
743 field = the field name to sort by
|
|
744 numeric = if True sort field numerically;
|
|
745 if False sort field alphabetically
|
|
746 descend = if True sort in descending order;
|
|
747 if False sort in ascending order
|
|
748
|
|
749 out_args = common output argument dictionary from parseCommonArgs
|
|
750
|
|
751 Returns:
|
|
752 the output file name
|
|
753 """
|
|
754 log = OrderedDict()
|
|
755 log['START'] = 'ParseDb'
|
|
756 log['COMMAND'] = 'sort'
|
|
757 log['FILE'] = os.path.basename(db_file)
|
|
758 log['FIELD'] = field
|
|
759 log['NUMERIC'] = numeric
|
|
760 printLog(log)
|
|
761
|
|
762 # Open file handles
|
|
763 db_iter = readDbFile(db_file, ig=False)
|
|
764 pass_handle = getOutputHandle(db_file, out_label='parse-sort', out_dir=out_args['out_dir'],
|
|
765 out_name=out_args['out_name'], out_type='tab')
|
|
766 pass_writer = getDbWriter(pass_handle, db_file)
|
|
767
|
|
768
|
|
769 # Store all records in a dictionary
|
|
770 start_time = time()
|
|
771 printMessage("Indexing: Running", start_time=start_time)
|
|
772 db_dict = {i:r for i, r in enumerate(db_iter)}
|
|
773 result_count = len(db_dict)
|
|
774
|
|
775 # Sort db_dict by field values
|
|
776 tag_dict = {k:v[field] for k, v in db_dict.items()}
|
|
777 if numeric: tag_dict = {k:float(v or 0) for k, v in tag_dict.items()}
|
|
778 sorted_keys = sorted(tag_dict, key=tag_dict.get, reverse=descend)
|
|
779 printMessage("Indexing: Done", start_time=start_time, end=True)
|
|
780
|
|
781 # Iterate over records
|
|
782 start_time = time()
|
|
783 rec_count = 0
|
|
784 for key in sorted_keys:
|
|
785 # Print progress for previous iteration
|
|
786 printProgress(rec_count, result_count, 0.05, start_time)
|
|
787 rec_count += 1
|
|
788
|
|
789 # Write records
|
|
790 pass_writer.writerow(db_dict[key])
|
|
791
|
|
792 # Print counts
|
|
793 printProgress(rec_count, result_count, 0.05, start_time)
|
|
794 log = OrderedDict()
|
|
795 log['OUTPUT'] = os.path.basename(pass_handle.name)
|
|
796 log['RECORDS'] = rec_count
|
|
797 log['END'] = 'ParseDb'
|
|
798 printLog(log)
|
|
799
|
|
800 # Close file handles
|
|
801 pass_handle.close()
|
|
802
|
|
803 return pass_handle.name
|
|
804
|
|
805
|
|
806 def updateDbFile(db_file, field, values, updates, out_args=default_out_args):
|
|
807 """
|
|
808 Updates field and value pairs to a database file
|
|
809
|
|
810 Arguments:
|
|
811 db_file = the database file name
|
|
812 field = the field to update
|
|
813 values = a list of values to specifying which rows to update
|
|
814 updates = a list of values to update each value with
|
|
815 out_args = common output argument dictionary from parseCommonArgs
|
|
816
|
|
817 Returns:
|
|
818 the output file name
|
|
819 """
|
|
820 log = OrderedDict()
|
|
821 log['START'] = 'ParseDb'
|
|
822 log['COMMAND'] = 'update'
|
|
823 log['FILE'] = os.path.basename(db_file)
|
|
824 log['FIELD'] = field
|
|
825 log['VALUES'] = ','.join(values)
|
|
826 log['UPDATES'] = ','.join(updates)
|
|
827 printLog(log)
|
|
828
|
|
829 # Open file handles
|
|
830 db_iter = readDbFile(db_file, ig=False)
|
|
831 pass_handle = getOutputHandle(db_file, out_label='parse-update', out_dir=out_args['out_dir'],
|
|
832 out_name=out_args['out_name'], out_type='tab')
|
|
833 pass_writer = getDbWriter(pass_handle, db_file)
|
|
834 # Count records
|
|
835 result_count = countDbFile(db_file)
|
|
836
|
|
837 # Iterate over records
|
|
838 start_time = time()
|
|
839 rec_count = pass_count = 0
|
|
840 for rec in db_iter:
|
|
841 # Print progress for previous iteration
|
|
842 printProgress(rec_count, result_count, 0.05, start_time)
|
|
843 rec_count += 1
|
|
844
|
|
845 # Updated values if found
|
|
846 for x, y in zip(values, updates):
|
|
847 if rec[field] == x:
|
|
848 rec[field] = y
|
|
849 pass_count += 1
|
|
850
|
|
851 # Write records
|
|
852 pass_writer.writerow(rec)
|
|
853
|
|
854 # Print counts
|
|
855 printProgress(rec_count, result_count, 0.05, start_time)
|
|
856 log = OrderedDict()
|
|
857 log['OUTPUT'] = os.path.basename(pass_handle.name)
|
|
858 log['RECORDS'] = rec_count
|
|
859 log['UPDATED'] = pass_count
|
|
860 log['END'] = 'ParseDb'
|
|
861 printLog(log)
|
|
862
|
|
863 # Close file handles
|
|
864 pass_handle.close()
|
|
865
|
|
866 return pass_handle.name
|
|
867
|
|
868
|
|
869 def getArgParser():
|
|
870 """
|
|
871 Defines the ArgumentParser
|
|
872
|
|
873 Arguments:
|
|
874 None
|
|
875
|
|
876 Returns:
|
|
877 an ArgumentParser object
|
|
878 """
|
|
879 # Define input and output field help message
|
|
880 fields = dedent(
|
|
881 '''
|
|
882 output files:
|
|
883 sequences
|
|
884 FASTA formatted sequences output from the subcommands fasta and clip.
|
|
885 <field>-<value>
|
|
886 database files partitioned by annotation <field> and <value>.
|
|
887 parse-<command>
|
|
888 output of the database modification functions where <command> is one of
|
|
889 the subcommands add, index, drop, delete, rename, select, sort or update.
|
|
890
|
|
891 required fields:
|
|
892 SEQUENCE_ID
|
|
893
|
|
894 optional fields:
|
|
895 JUNCTION, SEQUENCE_IMGT, SEQUENCE_VDJ, GERMLINE_IMGT, GERMLINE_VDJ,
|
|
896 GERMLINE_IMGT_D_MASK, GERMLINE_VDJ_D_MASK,
|
|
897 GERMLINE_IMGT_V_REGION, GERMLINE_VDJ_V_REGION
|
|
898
|
|
899 output fields:
|
|
900 None
|
|
901 ''')
|
|
902
|
|
903 # Define ArgumentParser
|
|
904 parser = ArgumentParser(description=__doc__, epilog=fields,
|
|
905 formatter_class=CommonHelpFormatter)
|
|
906 parser.add_argument('--version', action='version',
|
|
907 version='%(prog)s:' + ' %s-%s' %(__version__, __date__))
|
|
908 subparsers = parser.add_subparsers(title='subcommands', dest='command', metavar='',
|
|
909 help='Database operation')
|
|
910 # TODO: This is a temporary fix for Python issue 9253
|
|
911 subparsers.required = True
|
|
912
|
|
913 # Define parent parser
|
|
914 parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True,
|
|
915 failed=False, log=False)
|
|
916
|
|
917 # Subparser to convert database entries to sequence file
|
|
918 parser_seq = subparsers.add_parser('fasta', parents=[parser_parent],
|
|
919 formatter_class=CommonHelpFormatter,
|
|
920 help='Creates a fasta file from database records.',
|
|
921 description='Creates a fasta file from database records.')
|
|
922 parser_seq.add_argument('--if', action='store', dest='id_field',
|
|
923 default=default_id_field,
|
|
924 help='The name of the field containing identifiers')
|
|
925 parser_seq.add_argument('--sf', action='store', dest='seq_field',
|
|
926 default=default_seq_field,
|
|
927 help='The name of the field containing sequences')
|
|
928 parser_seq.add_argument('--mf', nargs='+', action='store', dest='meta_fields',
|
|
929 help='List of annotation fields to add to the sequence description')
|
|
930 parser_seq.set_defaults(func=convertDbFasta)
|
|
931
|
|
932 # Subparser to convert database entries to clip-fasta file
|
|
933 parser_baseln = subparsers.add_parser('baseline', parents=[parser_parent],
|
|
934 formatter_class=CommonHelpFormatter,
|
|
935 description='Creates a BASELINe fasta file from database records.',
|
|
936 help='''Creates a specially formatted fasta file
|
|
937 from database records for input into the BASELINe
|
|
938 website. The format groups clonally related sequences
|
|
939 sequentially, with the germline sequence preceding
|
|
940 each clone and denoted by headers starting with ">>".''')
|
|
941 parser_baseln.add_argument('--if', action='store', dest='id_field',
|
|
942 default=default_id_field,
|
|
943 help='The name of the field containing identifiers')
|
|
944 parser_baseln.add_argument('--sf', action='store', dest='seq_field',
|
|
945 default=default_seq_field,
|
|
946 help='The name of the field containing reads')
|
|
947 parser_baseln.add_argument('--gf', action='store', dest='germ_field',
|
|
948 default=default_germ_field,
|
|
949 help='The name of the field containing germline sequences')
|
|
950 parser_baseln.add_argument('--cf', action='store', dest='cluster_field', default=None,
|
|
951 help='The name of the field containing containing sorted clone IDs')
|
|
952 parser_baseln.add_argument('--mf', nargs='+', action='store', dest='meta_fields',
|
|
953 help='List of annotation fields to add to the sequence description')
|
|
954 parser_baseln.set_defaults(func=convertDbBaseline)
|
|
955
|
|
956 # Subparser to partition files by annotation values
|
|
957 parser_split = subparsers.add_parser('split', parents=[parser_parent],
|
|
958 formatter_class=CommonHelpFormatter,
|
|
959 help='Splits database files by field values.',
|
|
960 description='Splits database files by field values')
|
|
961 parser_split.add_argument('-f', action='store', dest='field', type=str, required=True,
|
|
962 help='Annotation field by which to split database files.')
|
|
963 parser_split.add_argument('--num', action='store', dest='num_split', type=float, default=None,
|
|
964 help='''Specify to define the field as numeric and group
|
|
965 records by whether they are less than or at least
|
|
966 (greater than or equal to) the specified value.''')
|
|
967 parser_split.set_defaults(func=splitDbFile)
|
|
968
|
|
969 # Subparser to add records
|
|
970 parser_add = subparsers.add_parser('add', parents=[parser_parent],
|
|
971 formatter_class=CommonHelpFormatter,
|
|
972 help='Adds field and value pairs.',
|
|
973 description='Adds field and value pairs.')
|
|
974 parser_add.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
|
|
975 help='The name of the fields to add.')
|
|
976 parser_add.add_argument('-u', nargs='+', action='store', dest='values', required=True,
|
|
977 help='The value to assign to all rows for each field.')
|
|
978 parser_add.set_defaults(func=addDbFile)
|
|
979
|
|
980 # Subparser to delete records
|
|
981 parser_delete = subparsers.add_parser('delete', parents=[parser_parent],
|
|
982 formatter_class=CommonHelpFormatter,
|
|
983 help='Deletes specific records.',
|
|
984 description='Deletes specific records.')
|
|
985 parser_delete.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
|
|
986 help='The name of the fields to check for deletion criteria.')
|
|
987 parser_delete.add_argument('-u', nargs='+', action='store', dest='values', default=['', 'NA'],
|
|
988 help='''The values defining which records to delete. A value
|
|
989 may appear in any of the fields specified with -f.''')
|
|
990 parser_delete.add_argument('--logic', action='store', dest='logic',
|
|
991 choices=('any', 'all'), default='any',
|
|
992 help='''Defines whether a value may appear in any field (any)
|
|
993 or whether it must appear in all fields (all).''')
|
|
994 parser_delete.add_argument('--regex', action='store_true', dest='regex',
|
|
995 help='''If specified, treat values as regular expressions
|
|
996 and allow partial string matches.''')
|
|
997 parser_delete.set_defaults(func=deleteDbFile)
|
|
998
|
|
999 # Subparser to drop fields
|
|
1000 parser_drop = subparsers.add_parser('drop', parents=[parser_parent],
|
|
1001 formatter_class=CommonHelpFormatter,
|
|
1002 help='Deletes entire fields.',
|
|
1003 description='Deletes specific records.')
|
|
1004 parser_drop.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
|
|
1005 help='The name of the fields to delete from the database.')
|
|
1006 parser_drop.set_defaults(func=dropDbFile)
|
|
1007
|
|
1008 # Subparser to index fields
|
|
1009 parser_index = subparsers.add_parser('index', parents=[parser_parent],
|
|
1010 formatter_class=CommonHelpFormatter,
|
|
1011 help='Adds a numeric index field.',
|
|
1012 description='Adds a numeric index field.')
|
|
1013 parser_index.add_argument('-f', action='store', dest='field',
|
|
1014 default=default_index_field,
|
|
1015 help='The name of the index field to add to the database.')
|
|
1016 parser_index.set_defaults(func=indexDbFile)
|
|
1017
|
|
1018 # Subparser to rename fields
|
|
1019 parser_rename = subparsers.add_parser('rename', parents=[parser_parent],
|
|
1020 formatter_class=CommonHelpFormatter,
|
|
1021 help='Renames fields.',
|
|
1022 description='Renames fields.')
|
|
1023 parser_rename.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
|
|
1024 help='List of fields to rename.')
|
|
1025 parser_rename.add_argument('-k', nargs='+', action='store', dest='names', required=True,
|
|
1026 help='List of new names for each field.')
|
|
1027 parser_rename.set_defaults(func=renameDbFile)
|
|
1028
|
|
1029 # Subparser to select records
|
|
1030 parser_select = subparsers.add_parser('select', parents=[parser_parent],
|
|
1031 formatter_class=CommonHelpFormatter,
|
|
1032 help='Selects specific records.',
|
|
1033 description='Selects specific records.')
|
|
1034 parser_select.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
|
|
1035 help='The name of the fields to check for selection criteria.')
|
|
1036 parser_select.add_argument('-u', nargs='+', action='store', dest='values', required=True,
|
|
1037 help='''The values defining with records to select. A value
|
|
1038 may appear in any of the fields specified with -f.''')
|
|
1039 parser_select.add_argument('--logic', action='store', dest='logic',
|
|
1040 choices=('any', 'all'), default='any',
|
|
1041 help='''Defines whether a value may appear in any field (any)
|
|
1042 or whether it must appear in all fields (all).''')
|
|
1043 parser_select.add_argument('--regex', action='store_true', dest='regex',
|
|
1044 help='''If specified, treat values as regular expressions
|
|
1045 and allow partial string matches.''')
|
|
1046 parser_select.set_defaults(func=selectDbFile)
|
|
1047
|
|
1048 # Subparser to sort file by records
|
|
1049 parser_sort = subparsers.add_parser('sort', parents=[parser_parent],
|
|
1050 formatter_class=CommonHelpFormatter,
|
|
1051 help='Sorts records by field values.',
|
|
1052 description='Sorts records by field values.')
|
|
1053 parser_sort.add_argument('-f', action='store', dest='field', type=str, required=True,
|
|
1054 help='The annotation field by which to sort records.')
|
|
1055 parser_sort.add_argument('--num', action='store_true', dest='numeric', default=False,
|
|
1056 help='''Specify to define the sort column as numeric rather
|
|
1057 than textual.''')
|
|
1058 parser_sort.add_argument('--descend', action='store_true', dest='descend',
|
|
1059 help='''If specified, sort records in descending, rather
|
|
1060 than ascending, order by values in the target field.''')
|
|
1061 parser_sort.set_defaults(func=sortDbFile)
|
|
1062
|
|
1063 # Subparser to update records
|
|
1064 parser_update = subparsers.add_parser('update', parents=[parser_parent],
|
|
1065 formatter_class=CommonHelpFormatter,
|
|
1066 help='Updates field and value pairs.',
|
|
1067 description='Updates field and value pairs.')
|
|
1068 parser_update.add_argument('-f', action='store', dest='field', required=True,
|
|
1069 help='The name of the field to update.')
|
|
1070 parser_update.add_argument('-u', nargs='+', action='store', dest='values', required=True,
|
|
1071 help='The values that will be replaced.')
|
|
1072 parser_update.add_argument('-t', nargs='+', action='store', dest='updates', required=True,
|
|
1073 help='''The new value to assign to each selected row.''')
|
|
1074 parser_update.set_defaults(func=updateDbFile)
|
|
1075
|
|
1076 return parser
|
|
1077
|
|
1078
|
|
1079 if __name__ == '__main__':
|
|
1080 """
|
|
1081 Parses command line arguments and calls main function
|
|
1082 """
|
|
1083 # Parse arguments
|
|
1084 parser = getArgParser()
|
|
1085 checkArgs(parser)
|
|
1086 args = parser.parse_args()
|
|
1087 args_dict = parseCommonArgs(args)
|
|
1088 # Convert case of fields
|
|
1089 if 'id_field' in args_dict:
|
|
1090 args_dict['id_field'] = args_dict['id_field'].upper()
|
|
1091 if 'seq_field' in args_dict:
|
|
1092 args_dict['seq_field'] = args_dict['seq_field'].upper()
|
|
1093 if 'germ_field' in args_dict:
|
|
1094 args_dict['germ_field'] = args_dict['germ_field'].upper()
|
|
1095 if 'field' in args_dict:
|
|
1096 args_dict['field'] = args_dict['field'].upper()
|
|
1097 if 'cluster_field' in args_dict and args_dict['cluster_field'] is not None:
|
|
1098 args_dict['cluster_field'] = args_dict['cluster_field'].upper()
|
|
1099 if 'meta_fields' in args_dict and args_dict['meta_fields'] is not None:
|
|
1100 args_dict['meta_fields'] = [f.upper() for f in args_dict['meta_fields']]
|
|
1101 if 'fields' in args_dict:
|
|
1102 args_dict['fields'] = [f.upper() for f in args_dict['fields']]
|
|
1103
|
|
1104 # Check modify_args arguments
|
|
1105 if args.command == 'add' and len(args_dict['fields']) != len(args_dict['values']):
|
|
1106 parser.error('You must specify exactly one value (-u) per field (-f)')
|
|
1107 elif args.command == 'rename' and len(args_dict['fields']) != len(args_dict['names']):
|
|
1108 parser.error('You must specify exactly one new name (-k) per field (-f)')
|
|
1109 elif args.command == 'update' and len(args_dict['values']) != len(args_dict['updates']):
|
|
1110 parser.error('You must specify exactly one value (-u) per replacement (-t)')
|
|
1111
|
|
1112 # Call parser function for each database file
|
|
1113 del args_dict['command']
|
|
1114 del args_dict['func']
|
|
1115 del args_dict['db_files']
|
|
1116 for f in args.__dict__['db_files']:
|
|
1117 args_dict['db_file'] = f
|
|
1118 args.func(**args_dict)
|
|
1119
|