Mercurial > repos > davidvanzessen > change_o
comparison ParseDb.py @ 0:183edf446dcf draft default tip
Uploaded
| author | davidvanzessen |
|---|---|
| date | Mon, 17 Jul 2017 07:44:27 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:183edf446dcf |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 """ | |
| 3 Parses tab delimited database files | |
| 4 """ | |
| 5 # Info | |
| 6 __author__ = 'Jason Anthony Vander Heiden' | |
| 7 from changeo import __version__, __date__ | |
| 8 | |
| 9 # Imports | |
| 10 import csv | |
| 11 import os | |
| 12 import re | |
| 13 from argparse import ArgumentParser | |
| 14 from collections import OrderedDict | |
| 15 | |
| 16 from textwrap import dedent | |
| 17 from time import time | |
| 18 from Bio import SeqIO | |
| 19 from Bio.Seq import Seq | |
| 20 from Bio.SeqRecord import SeqRecord | |
| 21 from Bio.Alphabet import IUPAC | |
| 22 | |
| 23 # Presto and changeo imports | |
| 24 from presto.Defaults import default_delimiter, default_out_args | |
| 25 from presto.Annotation import flattenAnnotation | |
| 26 from presto.IO import getOutputHandle, printLog, printProgress, printMessage | |
| 27 from changeo.Defaults import default_csv_size | |
| 28 from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs | |
| 29 from changeo.IO import getDbWriter, readDbFile, countDbFile | |
| 30 | |
| 31 # System settings | |
| 32 csv.field_size_limit(default_csv_size) | |
| 33 | |
| 34 # Defaults | |
| 35 default_id_field = 'SEQUENCE_ID' | |
| 36 default_seq_field = 'SEQUENCE_IMGT' | |
| 37 default_germ_field = 'GERMLINE_IMGT_D_MASK' | |
| 38 default_index_field = 'INDEX' | |
| 39 | |
| 40 # TODO: convert SQL-ish operations to modify_func() as per ParseHeaders | |
| 41 | |
| 42 def getDbSeqRecord(db_record, id_field, seq_field, meta_fields=None, | |
| 43 delimiter=default_delimiter): | |
| 44 """ | |
| 45 Parses a database record into a SeqRecord | |
| 46 | |
| 47 Arguments: | |
| 48 db_record = a dictionary containing a database record | |
| 49 id_field = the field containing identifiers | |
| 50 seq_field = the field containing sequences | |
| 51 meta_fields = a list of fields to add to sequence annotations | |
| 52 delimiter = a tuple of delimiters for (fields, values, value lists) | |
| 53 | |
| 54 Returns: | |
| 55 a SeqRecord | |
| 56 """ | |
| 57 # Return None if ID or sequence fields are empty | |
| 58 if not db_record[id_field] or not db_record[seq_field]: | |
| 59 return None | |
| 60 | |
| 61 # Create description string | |
| 62 desc_dict = OrderedDict([('ID', db_record[id_field])]) | |
| 63 if meta_fields is not None: | |
| 64 desc_dict.update([(f, db_record[f]) for f in meta_fields if f in db_record]) | |
| 65 desc_str = flattenAnnotation(desc_dict, delimiter=delimiter) | |
| 66 | |
| 67 # Create SeqRecord | |
| 68 seq_record = SeqRecord(Seq(db_record[seq_field], IUPAC.ambiguous_dna), | |
| 69 id=desc_str, name=desc_str, description='') | |
| 70 | |
| 71 return seq_record | |
| 72 | |
| 73 | |
| 74 def splitDbFile(db_file, field, num_split=None, out_args=default_out_args): | |
| 75 """ | |
| 76 Divides a tab-delimited database file into segments by description tags | |
| 77 | |
| 78 Arguments: | |
| 79 db_file = filename of the tab-delimited database file to split | |
| 80 field = the field name by which to split db_file | |
| 81 num_split = the numerical threshold by which to group sequences; | |
| 82 if None treat field as textual | |
| 83 out_args = common output argument dictionary from parseCommonArgs | |
| 84 | |
| 85 Returns: | |
| 86 a list of output file names | |
| 87 """ | |
| 88 log = OrderedDict() | |
| 89 log['START'] = 'ParseDb' | |
| 90 log['COMMAND'] = 'split' | |
| 91 log['FILE'] = os.path.basename(db_file) | |
| 92 log['FIELD'] = field | |
| 93 log['NUM_SPLIT'] = num_split | |
| 94 printLog(log) | |
| 95 | |
| 96 # Open IgRecord reader iter object | |
| 97 reader = readDbFile(db_file, ig=False) | |
| 98 | |
| 99 # Determine total numbers of records | |
| 100 rec_count = countDbFile(db_file) | |
| 101 | |
| 102 start_time = time() | |
| 103 count = 0 | |
| 104 # Sort records into files based on textual field | |
| 105 if num_split is None: | |
| 106 # Create set of unique field tags | |
| 107 tmp_iter = readDbFile(db_file, ig=False) | |
| 108 tag_list = list(set([row[field] for row in tmp_iter])) | |
| 109 | |
| 110 # Forbidden characters in filename and replacements | |
| 111 noGood = {'\/':'f','\\':'b','?':'q','\%':'p','*':'s',':':'c', | |
| 112 '\|':'pi','\"':'dq','\'':'sq','<':'gt','>':'lt',' ':'_'} | |
| 113 # Replace forbidden characters in tag_list | |
| 114 tag_dict = {} | |
| 115 for tag in tag_list: | |
| 116 for c,r in noGood.items(): | |
| 117 tag_dict[tag] = (tag_dict.get(tag, tag).replace(c,r) \ | |
| 118 if c in tag else tag_dict.get(tag, tag)) | |
| 119 | |
| 120 # Create output handles | |
| 121 handles_dict = {tag:getOutputHandle(db_file, | |
| 122 '%s-%s' % (field, label), | |
| 123 out_type = out_args['out_type'], | |
| 124 out_name = out_args['out_name'], | |
| 125 out_dir = out_args['out_dir']) | |
| 126 for tag, label in tag_dict.items()} | |
| 127 | |
| 128 # Create Db writer instances | |
| 129 writers_dict = {tag:getDbWriter(handles_dict[tag], db_file) | |
| 130 for tag in tag_dict} | |
| 131 | |
| 132 # Iterate over IgRecords | |
| 133 for row in reader: | |
| 134 printProgress(count, rec_count, 0.05, start_time) | |
| 135 count += 1 | |
| 136 # Write row to appropriate file | |
| 137 tag = row[field] | |
| 138 writers_dict[tag].writerow(row) | |
| 139 | |
| 140 # Sort records into files based on numeric num_split | |
| 141 else: | |
| 142 num_split = float(num_split) | |
| 143 | |
| 144 # Create output handles | |
| 145 handles_dict = {'under':getOutputHandle(db_file, | |
| 146 'under-%.1f' % num_split, | |
| 147 out_type = out_args['out_type'], | |
| 148 out_name = out_args['out_name'], | |
| 149 out_dir = out_args['out_dir']), | |
| 150 'atleast':getOutputHandle(db_file, | |
| 151 'atleast-%.1f' % num_split, | |
| 152 out_type = out_args['out_type'], | |
| 153 out_name = out_args['out_name'], | |
| 154 out_dir = out_args['out_dir'])} | |
| 155 | |
| 156 # Create Db writer instances | |
| 157 writers_dict = {'under':getDbWriter(handles_dict['under'], db_file), | |
| 158 'atleast':getDbWriter(handles_dict['atleast'], db_file)} | |
| 159 | |
| 160 # Iterate over IgRecords | |
| 161 for row in reader: | |
| 162 printProgress(count, rec_count, 0.05, start_time) | |
| 163 count += 1 | |
| 164 tag = row[field] | |
| 165 tag = 'under' if float(tag) < num_split else 'atleast' | |
| 166 writers_dict[tag].writerow(row) | |
| 167 | |
| 168 # Write log | |
| 169 printProgress(count, rec_count, 0.05, start_time) | |
| 170 log = OrderedDict() | |
| 171 for i, k in enumerate(handles_dict): | |
| 172 log['OUTPUT%i' % (i + 1)] = os.path.basename(handles_dict[k].name) | |
| 173 log['RECORDS'] = rec_count | |
| 174 log['PARTS'] = len(handles_dict) | |
| 175 log['END'] = 'ParseDb' | |
| 176 printLog(log) | |
| 177 | |
| 178 # Close output file handles | |
| 179 for t in handles_dict: handles_dict[t].close() | |
| 180 | |
| 181 return [handles_dict[t].name for t in handles_dict] | |
| 182 | |
| 183 | |
| 184 # TODO: SHOULD ALLOW FOR UNSORTED CLUSTER COLUMN | |
| 185 # TODO: SHOULD ALLOW FOR GROUPING FIELDS | |
| 186 def convertDbBaseline(db_file, id_field=default_id_field, seq_field=default_seq_field, | |
| 187 germ_field=default_germ_field, cluster_field=None, | |
| 188 meta_fields=None, out_args=default_out_args): | |
| 189 """ | |
| 190 Builds fasta files from database records | |
| 191 | |
| 192 Arguments: | |
| 193 db_file = the database file name | |
| 194 id_field = the field containing identifiers | |
| 195 seq_field = the field containing sample sequences | |
| 196 germ_field = the field containing germline sequences | |
| 197 cluster_field = the field containing clonal groupings | |
| 198 if None write the germline for each record | |
| 199 meta_fields = a list of fields to add to sequence annotations | |
| 200 out_args = common output argument dictionary from parseCommonArgs | |
| 201 | |
| 202 Returns: | |
| 203 the output file name | |
| 204 """ | |
| 205 log = OrderedDict() | |
| 206 log['START'] = 'ParseDb' | |
| 207 log['COMMAND'] = 'fasta' | |
| 208 log['FILE'] = os.path.basename(db_file) | |
| 209 log['ID_FIELD'] = id_field | |
| 210 log['SEQ_FIELD'] = seq_field | |
| 211 log['GERM_FIELD'] = germ_field | |
| 212 log['CLUSTER_FIELD'] = cluster_field | |
| 213 if meta_fields is not None: log['META_FIELDS'] = ','.join(meta_fields) | |
| 214 printLog(log) | |
| 215 | |
| 216 # Open file handles | |
| 217 db_iter = readDbFile(db_file, ig=False) | |
| 218 pass_handle = getOutputHandle(db_file, out_label='sequences', out_dir=out_args['out_dir'], | |
| 219 out_name=out_args['out_name'], out_type='clip') | |
| 220 # Count records | |
| 221 result_count = countDbFile(db_file) | |
| 222 | |
| 223 # Iterate over records | |
| 224 start_time = time() | |
| 225 rec_count = germ_count = pass_count = fail_count = 0 | |
| 226 cluster_last = None | |
| 227 for rec in db_iter: | |
| 228 # Print progress for previous iteration | |
| 229 printProgress(rec_count, result_count, 0.05, start_time) | |
| 230 rec_count += 1 | |
| 231 | |
| 232 # Update cluster ID | |
| 233 cluster = rec.get(cluster_field, None) | |
| 234 | |
| 235 # Get germline SeqRecord when needed | |
| 236 if cluster_field is None: | |
| 237 germ = getDbSeqRecord(rec, id_field, germ_field, meta_fields, | |
| 238 delimiter=out_args['delimiter']) | |
| 239 germ.id = '>' + germ.id | |
| 240 elif cluster != cluster_last: | |
| 241 germ = getDbSeqRecord(rec, cluster_field, germ_field, | |
| 242 delimiter=out_args['delimiter']) | |
| 243 germ.id = '>' + germ.id | |
| 244 else: | |
| 245 germ = None | |
| 246 | |
| 247 # Get read SeqRecord | |
| 248 seq = getDbSeqRecord(rec, id_field, seq_field, meta_fields, | |
| 249 delimiter=out_args['delimiter']) | |
| 250 | |
| 251 # Write germline | |
| 252 if germ is not None: | |
| 253 germ_count += 1 | |
| 254 SeqIO.write(germ, pass_handle, 'fasta') | |
| 255 | |
| 256 # Write sequences | |
| 257 if seq is not None: | |
| 258 pass_count += 1 | |
| 259 SeqIO.write(seq, pass_handle, 'fasta') | |
| 260 else: | |
| 261 fail_count += 1 | |
| 262 | |
| 263 # Set last cluster ID | |
| 264 cluster_last = cluster | |
| 265 | |
| 266 # Print counts | |
| 267 printProgress(rec_count, result_count, 0.05, start_time) | |
| 268 log = OrderedDict() | |
| 269 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 270 log['RECORDS'] = rec_count | |
| 271 log['GERMLINES'] = germ_count | |
| 272 log['PASS'] = pass_count | |
| 273 log['FAIL'] = fail_count | |
| 274 log['END'] = 'ParseDb' | |
| 275 printLog(log) | |
| 276 | |
| 277 # Close file handles | |
| 278 pass_handle.close() | |
| 279 | |
| 280 return pass_handle.name | |
| 281 | |
| 282 | |
| 283 def convertDbFasta(db_file, id_field=default_id_field, seq_field=default_seq_field, | |
| 284 meta_fields=None, out_args=default_out_args): | |
| 285 """ | |
| 286 Builds fasta files from database records | |
| 287 | |
| 288 Arguments: | |
| 289 db_file = the database file name | |
| 290 id_field = the field containing identifiers | |
| 291 seq_field = the field containing sequences | |
| 292 meta_fields = a list of fields to add to sequence annotations | |
| 293 out_args = common output argument dictionary from parseCommonArgs | |
| 294 | |
| 295 Returns: | |
| 296 the output file name | |
| 297 """ | |
| 298 log = OrderedDict() | |
| 299 log['START'] = 'ParseDb' | |
| 300 log['COMMAND'] = 'fasta' | |
| 301 log['FILE'] = os.path.basename(db_file) | |
| 302 log['ID_FIELD'] = id_field | |
| 303 log['SEQ_FIELD'] = seq_field | |
| 304 if meta_fields is not None: log['META_FIELDS'] = ','.join(meta_fields) | |
| 305 printLog(log) | |
| 306 | |
| 307 # Open file handles | |
| 308 out_type = 'fasta' | |
| 309 db_iter = readDbFile(db_file, ig=False) | |
| 310 pass_handle = getOutputHandle(db_file, out_label='sequences', out_dir=out_args['out_dir'], | |
| 311 out_name=out_args['out_name'], out_type=out_type) | |
| 312 # Count records | |
| 313 result_count = countDbFile(db_file) | |
| 314 | |
| 315 # Iterate over records | |
| 316 start_time = time() | |
| 317 rec_count = pass_count = fail_count = 0 | |
| 318 for rec in db_iter: | |
| 319 # Print progress for previous iteration | |
| 320 printProgress(rec_count, result_count, 0.05, start_time) | |
| 321 rec_count += 1 | |
| 322 | |
| 323 # Get SeqRecord | |
| 324 seq = getDbSeqRecord(rec, id_field, seq_field, meta_fields, out_args['delimiter']) | |
| 325 | |
| 326 # Write sequences | |
| 327 if seq is not None: | |
| 328 pass_count += 1 | |
| 329 SeqIO.write(seq, pass_handle, out_type) | |
| 330 else: | |
| 331 fail_count += 1 | |
| 332 | |
| 333 # Print counts | |
| 334 printProgress(rec_count, result_count, 0.05, start_time) | |
| 335 log = OrderedDict() | |
| 336 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 337 log['RECORDS'] = rec_count | |
| 338 log['PASS'] = pass_count | |
| 339 log['FAIL'] = fail_count | |
| 340 log['END'] = 'ParseDb' | |
| 341 printLog(log) | |
| 342 | |
| 343 # Close file handles | |
| 344 pass_handle.close() | |
| 345 | |
| 346 return pass_handle.name | |
| 347 | |
| 348 | |
| 349 def addDbFile(db_file, fields, values, out_args=default_out_args): | |
| 350 """ | |
| 351 Adds field and value pairs to a database file | |
| 352 | |
| 353 Arguments: | |
| 354 db_file = the database file name | |
| 355 fields = a list of fields to add | |
| 356 values = a list of values to assign to all rows of each field | |
| 357 out_args = common output argument dictionary from parseCommonArgs | |
| 358 | |
| 359 Returns: | |
| 360 the output file name | |
| 361 """ | |
| 362 log = OrderedDict() | |
| 363 log['START'] = 'ParseDb' | |
| 364 log['COMMAND'] = 'add' | |
| 365 log['FILE'] = os.path.basename(db_file) | |
| 366 log['FIELDS'] = ','.join(fields) | |
| 367 log['VALUES'] = ','.join(values) | |
| 368 printLog(log) | |
| 369 | |
| 370 # Open file handles | |
| 371 db_iter = readDbFile(db_file, ig=False) | |
| 372 pass_handle = getOutputHandle(db_file, out_label='parse-add', out_dir=out_args['out_dir'], | |
| 373 out_name=out_args['out_name'], out_type='tab') | |
| 374 pass_writer = getDbWriter(pass_handle, db_file, add_fields=fields) | |
| 375 # Count records | |
| 376 result_count = countDbFile(db_file) | |
| 377 | |
| 378 # Define fields and values to append | |
| 379 add_dict = {k:v for k,v in zip(fields, values) if k not in db_iter.fieldnames} | |
| 380 | |
| 381 # Iterate over records | |
| 382 start_time = time() | |
| 383 rec_count = 0 | |
| 384 for rec in db_iter: | |
| 385 # Print progress for previous iteration | |
| 386 printProgress(rec_count, result_count, 0.05, start_time) | |
| 387 rec_count += 1 | |
| 388 # Write updated row | |
| 389 rec.update(add_dict) | |
| 390 pass_writer.writerow(rec) | |
| 391 | |
| 392 # Print counts | |
| 393 printProgress(rec_count, result_count, 0.05, start_time) | |
| 394 log = OrderedDict() | |
| 395 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 396 log['RECORDS'] = rec_count | |
| 397 log['END'] = 'ParseDb' | |
| 398 printLog(log) | |
| 399 | |
| 400 # Close file handles | |
| 401 pass_handle.close() | |
| 402 | |
| 403 return pass_handle.name | |
| 404 | |
| 405 | |
| 406 def indexDbFile(db_file, field=default_index_field, out_args=default_out_args): | |
| 407 """ | |
| 408 Adds an index column to a database file | |
| 409 | |
| 410 Arguments: | |
| 411 db_file = the database file name | |
| 412 field = the name of the index field to add | |
| 413 out_args = common output argument dictionary from parseCommonArgs | |
| 414 | |
| 415 Returns: | |
| 416 the output file name | |
| 417 """ | |
| 418 log = OrderedDict() | |
| 419 log['START'] = 'ParseDb' | |
| 420 log['COMMAND'] = 'index' | |
| 421 log['FILE'] = os.path.basename(db_file) | |
| 422 log['FIELD'] = field | |
| 423 printLog(log) | |
| 424 | |
| 425 # Open file handles | |
| 426 db_iter = readDbFile(db_file, ig=False) | |
| 427 pass_handle = getOutputHandle(db_file, out_label='parse-index', out_dir=out_args['out_dir'], | |
| 428 out_name=out_args['out_name'], out_type='tab') | |
| 429 pass_writer = getDbWriter(pass_handle, db_file, add_fields=field) | |
| 430 # Count records | |
| 431 result_count = countDbFile(db_file) | |
| 432 | |
| 433 # Iterate over records | |
| 434 start_time = time() | |
| 435 rec_count = 0 | |
| 436 for rec in db_iter: | |
| 437 # Print progress for previous iteration | |
| 438 printProgress(rec_count, result_count, 0.05, start_time) | |
| 439 rec_count += 1 | |
| 440 | |
| 441 # Add count and write updated row | |
| 442 rec.update({field:rec_count}) | |
| 443 pass_writer.writerow(rec) | |
| 444 | |
| 445 # Print counts | |
| 446 printProgress(rec_count, result_count, 0.05, start_time) | |
| 447 log = OrderedDict() | |
| 448 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 449 log['RECORDS'] = rec_count | |
| 450 log['END'] = 'ParseDb' | |
| 451 printLog(log) | |
| 452 | |
| 453 # Close file handles | |
| 454 pass_handle.close() | |
| 455 | |
| 456 return pass_handle.name | |
| 457 | |
| 458 | |
| 459 def dropDbFile(db_file, fields, out_args=default_out_args): | |
| 460 """ | |
| 461 Deletes entire fields from a database file | |
| 462 | |
| 463 Arguments: | |
| 464 db_file = the database file name | |
| 465 fields = a list of fields to drop | |
| 466 out_args = common output argument dictionary from parseCommonArgs | |
| 467 | |
| 468 Returns: | |
| 469 the output file name | |
| 470 """ | |
| 471 log = OrderedDict() | |
| 472 log['START'] = 'ParseDb' | |
| 473 log['COMMAND'] = 'add' | |
| 474 log['FILE'] = os.path.basename(db_file) | |
| 475 log['FIELDS'] = ','.join(fields) | |
| 476 printLog(log) | |
| 477 | |
| 478 # Open file handles | |
| 479 db_iter = readDbFile(db_file, ig=False) | |
| 480 pass_handle = getOutputHandle(db_file, out_label='parse-drop', out_dir=out_args['out_dir'], | |
| 481 out_name=out_args['out_name'], out_type='tab') | |
| 482 pass_writer = getDbWriter(pass_handle, db_file, exclude_fields=fields) | |
| 483 # Count records | |
| 484 result_count = countDbFile(db_file) | |
| 485 | |
| 486 # Iterate over records | |
| 487 start_time = time() | |
| 488 rec_count = 0 | |
| 489 for rec in db_iter: | |
| 490 # Print progress for previous iteration | |
| 491 printProgress(rec_count, result_count, 0.05, start_time) | |
| 492 rec_count += 1 | |
| 493 # Write row | |
| 494 pass_writer.writerow(rec) | |
| 495 | |
| 496 # Print counts | |
| 497 printProgress(rec_count, result_count, 0.05, start_time) | |
| 498 log = OrderedDict() | |
| 499 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 500 log['RECORDS'] = rec_count | |
| 501 log['END'] = 'ParseDb' | |
| 502 printLog(log) | |
| 503 | |
| 504 # Close file handles | |
| 505 pass_handle.close() | |
| 506 | |
| 507 return pass_handle.name | |
| 508 | |
| 509 | |
| 510 def deleteDbFile(db_file, fields, values, logic='any', regex=False, | |
| 511 out_args=default_out_args): | |
| 512 """ | |
| 513 Deletes records from a database file | |
| 514 | |
| 515 Arguments: | |
| 516 db_file = the database file name | |
| 517 fields = a list of fields to check for deletion criteria | |
| 518 values = a list of values defining deletion targets | |
| 519 logic = one of 'any' or 'all' defining whether one or all fields must have a match. | |
| 520 regex = if False do exact full string matches; if True allow partial regex matches. | |
| 521 out_args = common output argument dictionary from parseCommonArgs | |
| 522 | |
| 523 Returns: | |
| 524 the output file name | |
| 525 """ | |
| 526 # Define string match function | |
| 527 if regex: | |
| 528 def _match_func(x, patterns): return any([re.search(p, x) for p in patterns]) | |
| 529 else: | |
| 530 def _match_func(x, patterns): return x in patterns | |
| 531 | |
| 532 # Define logic function | |
| 533 if logic == 'any': | |
| 534 _logic_func = any | |
| 535 elif logic == 'all': | |
| 536 _logic_func = all | |
| 537 | |
| 538 log = OrderedDict() | |
| 539 log['START'] = 'ParseDb' | |
| 540 log['COMMAND'] = 'delete' | |
| 541 log['FILE'] = os.path.basename(db_file) | |
| 542 log['FIELDS'] = ','.join(fields) | |
| 543 log['VALUES'] = ','.join(values) | |
| 544 printLog(log) | |
| 545 | |
| 546 # Open file handles | |
| 547 db_iter = readDbFile(db_file, ig=False) | |
| 548 pass_handle = getOutputHandle(db_file, out_label='parse-delete', out_dir=out_args['out_dir'], | |
| 549 out_name=out_args['out_name'], out_type='tab') | |
| 550 pass_writer = getDbWriter(pass_handle, db_file) | |
| 551 # Count records | |
| 552 result_count = countDbFile(db_file) | |
| 553 | |
| 554 # Iterate over records | |
| 555 start_time = time() | |
| 556 rec_count = pass_count = fail_count = 0 | |
| 557 for rec in db_iter: | |
| 558 # Print progress for previous iteration | |
| 559 printProgress(rec_count, result_count, 0.05, start_time) | |
| 560 rec_count += 1 | |
| 561 | |
| 562 # Check for deletion values in all fields | |
| 563 delete = _logic_func([_match_func(rec.get(f, False), values) for f in fields]) | |
| 564 | |
| 565 # Write sequences | |
| 566 if not delete: | |
| 567 pass_count += 1 | |
| 568 pass_writer.writerow(rec) | |
| 569 else: | |
| 570 fail_count += 1 | |
| 571 | |
| 572 # Print counts | |
| 573 printProgress(rec_count, result_count, 0.05, start_time) | |
| 574 log = OrderedDict() | |
| 575 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 576 log['RECORDS'] = rec_count | |
| 577 log['KEPT'] = pass_count | |
| 578 log['DELETED'] = fail_count | |
| 579 log['END'] = 'ParseDb' | |
| 580 printLog(log) | |
| 581 | |
| 582 # Close file handles | |
| 583 pass_handle.close() | |
| 584 | |
| 585 return pass_handle.name | |
| 586 | |
| 587 | |
| 588 def renameDbFile(db_file, fields, names, out_args=default_out_args): | |
| 589 """ | |
| 590 Renames fields in a database file | |
| 591 | |
| 592 Arguments: | |
| 593 db_file = the database file name | |
| 594 fields = a list of fields to rename | |
| 595 values = a list of new names for fields | |
| 596 out_args = common output argument dictionary from parseCommonArgs | |
| 597 | |
| 598 Returns: | |
| 599 the output file name | |
| 600 """ | |
| 601 log = OrderedDict() | |
| 602 log['START'] = 'ParseDb' | |
| 603 log['COMMAND'] = 'rename' | |
| 604 log['FILE'] = os.path.basename(db_file) | |
| 605 log['FIELDS'] = ','.join(fields) | |
| 606 log['NAMES'] = ','.join(names) | |
| 607 printLog(log) | |
| 608 | |
| 609 # Open file handles | |
| 610 db_iter = readDbFile(db_file, ig=False) | |
| 611 pass_handle = getOutputHandle(db_file, out_label='parse-rename', out_dir=out_args['out_dir'], | |
| 612 out_name=out_args['out_name'], out_type='tab') | |
| 613 | |
| 614 # Get header and rename fields | |
| 615 header = (readDbFile(db_file, ig=False)).fieldnames | |
| 616 for f, n in zip(fields, names): | |
| 617 i = header.index(f) | |
| 618 header[i] = n | |
| 619 | |
| 620 # Open writer and write new header | |
| 621 # TODO: should modify getDbWriter to take a list of fields | |
| 622 pass_writer = csv.DictWriter(pass_handle, fieldnames=header, dialect='excel-tab') | |
| 623 pass_writer.writeheader() | |
| 624 | |
| 625 # Count records | |
| 626 result_count = countDbFile(db_file) | |
| 627 | |
| 628 # Iterate over records | |
| 629 start_time = time() | |
| 630 rec_count = 0 | |
| 631 for rec in db_iter: | |
| 632 # Print progress for previous iteration | |
| 633 printProgress(rec_count, result_count, 0.05, start_time) | |
| 634 rec_count += 1 | |
| 635 # TODO: repeating renaming is unnecessary. should had a non-dict reader/writer to DbCore | |
| 636 # Rename fields | |
| 637 for f, n in zip(fields, names): | |
| 638 rec[n] = rec.pop(f) | |
| 639 # Write | |
| 640 pass_writer.writerow(rec) | |
| 641 | |
| 642 # Print counts | |
| 643 printProgress(rec_count, result_count, 0.05, start_time) | |
| 644 log = OrderedDict() | |
| 645 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 646 log['RECORDS'] = rec_count | |
| 647 log['END'] = 'ParseDb' | |
| 648 printLog(log) | |
| 649 | |
| 650 # Close file handles | |
| 651 pass_handle.close() | |
| 652 | |
| 653 return pass_handle.name | |
| 654 | |
| 655 | |
| 656 def selectDbFile(db_file, fields, values, logic='any', regex=False, | |
| 657 out_args=default_out_args): | |
| 658 """ | |
| 659 Selects records from a database file | |
| 660 | |
| 661 Arguments: | |
| 662 db_file = the database file name | |
| 663 fields = a list of fields to check for selection criteria | |
| 664 values = a list of values defining selection targets | |
| 665 logic = one of 'any' or 'all' defining whether one or all fields must have a match. | |
| 666 regex = if False do exact full string matches; if True allow partial regex matches. | |
| 667 out_args = common output argument dictionary from parseCommonArgs | |
| 668 | |
| 669 Returns: | |
| 670 the output file name | |
| 671 """ | |
| 672 # Define string match function | |
| 673 if regex: | |
| 674 def _match_func(x, patterns): return any([re.search(p, x) for p in patterns]) | |
| 675 else: | |
| 676 def _match_func(x, patterns): return x in patterns | |
| 677 | |
| 678 # Define logic function | |
| 679 if logic == 'any': | |
| 680 _logic_func = any | |
| 681 elif logic == 'all': | |
| 682 _logic_func = all | |
| 683 | |
| 684 # Print console log | |
| 685 log = OrderedDict() | |
| 686 log['START'] = 'ParseDb' | |
| 687 log['COMMAND'] = 'select' | |
| 688 log['FILE'] = os.path.basename(db_file) | |
| 689 log['FIELDS'] = ','.join(fields) | |
| 690 log['VALUES'] = ','.join(values) | |
| 691 log['REGEX'] =regex | |
| 692 printLog(log) | |
| 693 | |
| 694 # Open file handles | |
| 695 db_iter = readDbFile(db_file, ig=False) | |
| 696 pass_handle = getOutputHandle(db_file, out_label='parse-select', out_dir=out_args['out_dir'], | |
| 697 out_name=out_args['out_name'], out_type='tab') | |
| 698 pass_writer = getDbWriter(pass_handle, db_file) | |
| 699 # Count records | |
| 700 result_count = countDbFile(db_file) | |
| 701 | |
| 702 # Iterate over records | |
| 703 start_time = time() | |
| 704 rec_count = pass_count = fail_count = 0 | |
| 705 for rec in db_iter: | |
| 706 # Print progress for previous iteration | |
| 707 printProgress(rec_count, result_count, 0.05, start_time) | |
| 708 rec_count += 1 | |
| 709 | |
| 710 # Check for selection values in all fields | |
| 711 select = _logic_func([_match_func(rec.get(f, False), values) for f in fields]) | |
| 712 | |
| 713 # Write sequences | |
| 714 if select: | |
| 715 pass_count += 1 | |
| 716 pass_writer.writerow(rec) | |
| 717 else: | |
| 718 fail_count += 1 | |
| 719 | |
| 720 # Print counts | |
| 721 printProgress(rec_count, result_count, 0.05, start_time) | |
| 722 log = OrderedDict() | |
| 723 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 724 log['RECORDS'] = rec_count | |
| 725 log['SELECTED'] = pass_count | |
| 726 log['DISCARDED'] = fail_count | |
| 727 log['END'] = 'ParseDb' | |
| 728 printLog(log) | |
| 729 | |
| 730 # Close file handles | |
| 731 pass_handle.close() | |
| 732 | |
| 733 return pass_handle.name | |
| 734 | |
| 735 | |
| 736 def sortDbFile(db_file, field, numeric=False, descend=False, | |
| 737 out_args=default_out_args): | |
| 738 """ | |
| 739 Sorts records by values in an annotation field | |
| 740 | |
| 741 Arguments: | |
| 742 db_file = the database filename | |
| 743 field = the field name to sort by | |
| 744 numeric = if True sort field numerically; | |
| 745 if False sort field alphabetically | |
| 746 descend = if True sort in descending order; | |
| 747 if False sort in ascending order | |
| 748 | |
| 749 out_args = common output argument dictionary from parseCommonArgs | |
| 750 | |
| 751 Returns: | |
| 752 the output file name | |
| 753 """ | |
| 754 log = OrderedDict() | |
| 755 log['START'] = 'ParseDb' | |
| 756 log['COMMAND'] = 'sort' | |
| 757 log['FILE'] = os.path.basename(db_file) | |
| 758 log['FIELD'] = field | |
| 759 log['NUMERIC'] = numeric | |
| 760 printLog(log) | |
| 761 | |
| 762 # Open file handles | |
| 763 db_iter = readDbFile(db_file, ig=False) | |
| 764 pass_handle = getOutputHandle(db_file, out_label='parse-sort', out_dir=out_args['out_dir'], | |
| 765 out_name=out_args['out_name'], out_type='tab') | |
| 766 pass_writer = getDbWriter(pass_handle, db_file) | |
| 767 | |
| 768 | |
| 769 # Store all records in a dictionary | |
| 770 start_time = time() | |
| 771 printMessage("Indexing: Running", start_time=start_time) | |
| 772 db_dict = {i:r for i, r in enumerate(db_iter)} | |
| 773 result_count = len(db_dict) | |
| 774 | |
| 775 # Sort db_dict by field values | |
| 776 tag_dict = {k:v[field] for k, v in db_dict.items()} | |
| 777 if numeric: tag_dict = {k:float(v or 0) for k, v in tag_dict.items()} | |
| 778 sorted_keys = sorted(tag_dict, key=tag_dict.get, reverse=descend) | |
| 779 printMessage("Indexing: Done", start_time=start_time, end=True) | |
| 780 | |
| 781 # Iterate over records | |
| 782 start_time = time() | |
| 783 rec_count = 0 | |
| 784 for key in sorted_keys: | |
| 785 # Print progress for previous iteration | |
| 786 printProgress(rec_count, result_count, 0.05, start_time) | |
| 787 rec_count += 1 | |
| 788 | |
| 789 # Write records | |
| 790 pass_writer.writerow(db_dict[key]) | |
| 791 | |
| 792 # Print counts | |
| 793 printProgress(rec_count, result_count, 0.05, start_time) | |
| 794 log = OrderedDict() | |
| 795 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 796 log['RECORDS'] = rec_count | |
| 797 log['END'] = 'ParseDb' | |
| 798 printLog(log) | |
| 799 | |
| 800 # Close file handles | |
| 801 pass_handle.close() | |
| 802 | |
| 803 return pass_handle.name | |
| 804 | |
| 805 | |
| 806 def updateDbFile(db_file, field, values, updates, out_args=default_out_args): | |
| 807 """ | |
| 808 Updates field and value pairs to a database file | |
| 809 | |
| 810 Arguments: | |
| 811 db_file = the database file name | |
| 812 field = the field to update | |
| 813 values = a list of values to specifying which rows to update | |
| 814 updates = a list of values to update each value with | |
| 815 out_args = common output argument dictionary from parseCommonArgs | |
| 816 | |
| 817 Returns: | |
| 818 the output file name | |
| 819 """ | |
| 820 log = OrderedDict() | |
| 821 log['START'] = 'ParseDb' | |
| 822 log['COMMAND'] = 'update' | |
| 823 log['FILE'] = os.path.basename(db_file) | |
| 824 log['FIELD'] = field | |
| 825 log['VALUES'] = ','.join(values) | |
| 826 log['UPDATES'] = ','.join(updates) | |
| 827 printLog(log) | |
| 828 | |
| 829 # Open file handles | |
| 830 db_iter = readDbFile(db_file, ig=False) | |
| 831 pass_handle = getOutputHandle(db_file, out_label='parse-update', out_dir=out_args['out_dir'], | |
| 832 out_name=out_args['out_name'], out_type='tab') | |
| 833 pass_writer = getDbWriter(pass_handle, db_file) | |
| 834 # Count records | |
| 835 result_count = countDbFile(db_file) | |
| 836 | |
| 837 # Iterate over records | |
| 838 start_time = time() | |
| 839 rec_count = pass_count = 0 | |
| 840 for rec in db_iter: | |
| 841 # Print progress for previous iteration | |
| 842 printProgress(rec_count, result_count, 0.05, start_time) | |
| 843 rec_count += 1 | |
| 844 | |
| 845 # Updated values if found | |
| 846 for x, y in zip(values, updates): | |
| 847 if rec[field] == x: | |
| 848 rec[field] = y | |
| 849 pass_count += 1 | |
| 850 | |
| 851 # Write records | |
| 852 pass_writer.writerow(rec) | |
| 853 | |
| 854 # Print counts | |
| 855 printProgress(rec_count, result_count, 0.05, start_time) | |
| 856 log = OrderedDict() | |
| 857 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 858 log['RECORDS'] = rec_count | |
| 859 log['UPDATED'] = pass_count | |
| 860 log['END'] = 'ParseDb' | |
| 861 printLog(log) | |
| 862 | |
| 863 # Close file handles | |
| 864 pass_handle.close() | |
| 865 | |
| 866 return pass_handle.name | |
| 867 | |
| 868 | |
| 869 def getArgParser(): | |
| 870 """ | |
| 871 Defines the ArgumentParser | |
| 872 | |
| 873 Arguments: | |
| 874 None | |
| 875 | |
| 876 Returns: | |
| 877 an ArgumentParser object | |
| 878 """ | |
| 879 # Define input and output field help message | |
| 880 fields = dedent( | |
| 881 ''' | |
| 882 output files: | |
| 883 sequences | |
| 884 FASTA formatted sequences output from the subcommands fasta and clip. | |
| 885 <field>-<value> | |
| 886 database files partitioned by annotation <field> and <value>. | |
| 887 parse-<command> | |
| 888 output of the database modification functions where <command> is one of | |
| 889 the subcommands add, index, drop, delete, rename, select, sort or update. | |
| 890 | |
| 891 required fields: | |
| 892 SEQUENCE_ID | |
| 893 | |
| 894 optional fields: | |
| 895 JUNCTION, SEQUENCE_IMGT, SEQUENCE_VDJ, GERMLINE_IMGT, GERMLINE_VDJ, | |
| 896 GERMLINE_IMGT_D_MASK, GERMLINE_VDJ_D_MASK, | |
| 897 GERMLINE_IMGT_V_REGION, GERMLINE_VDJ_V_REGION | |
| 898 | |
| 899 output fields: | |
| 900 None | |
| 901 ''') | |
| 902 | |
| 903 # Define ArgumentParser | |
| 904 parser = ArgumentParser(description=__doc__, epilog=fields, | |
| 905 formatter_class=CommonHelpFormatter) | |
| 906 parser.add_argument('--version', action='version', | |
| 907 version='%(prog)s:' + ' %s-%s' %(__version__, __date__)) | |
| 908 subparsers = parser.add_subparsers(title='subcommands', dest='command', metavar='', | |
| 909 help='Database operation') | |
| 910 # TODO: This is a temporary fix for Python issue 9253 | |
| 911 subparsers.required = True | |
| 912 | |
| 913 # Define parent parser | |
| 914 parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True, | |
| 915 failed=False, log=False) | |
| 916 | |
| 917 # Subparser to convert database entries to sequence file | |
| 918 parser_seq = subparsers.add_parser('fasta', parents=[parser_parent], | |
| 919 formatter_class=CommonHelpFormatter, | |
| 920 help='Creates a fasta file from database records.', | |
| 921 description='Creates a fasta file from database records.') | |
| 922 parser_seq.add_argument('--if', action='store', dest='id_field', | |
| 923 default=default_id_field, | |
| 924 help='The name of the field containing identifiers') | |
| 925 parser_seq.add_argument('--sf', action='store', dest='seq_field', | |
| 926 default=default_seq_field, | |
| 927 help='The name of the field containing sequences') | |
| 928 parser_seq.add_argument('--mf', nargs='+', action='store', dest='meta_fields', | |
| 929 help='List of annotation fields to add to the sequence description') | |
| 930 parser_seq.set_defaults(func=convertDbFasta) | |
| 931 | |
| 932 # Subparser to convert database entries to clip-fasta file | |
| 933 parser_baseln = subparsers.add_parser('baseline', parents=[parser_parent], | |
| 934 formatter_class=CommonHelpFormatter, | |
| 935 description='Creates a BASELINe fasta file from database records.', | |
| 936 help='''Creates a specially formatted fasta file | |
| 937 from database records for input into the BASELINe | |
| 938 website. The format groups clonally related sequences | |
| 939 sequentially, with the germline sequence preceding | |
| 940 each clone and denoted by headers starting with ">>".''') | |
| 941 parser_baseln.add_argument('--if', action='store', dest='id_field', | |
| 942 default=default_id_field, | |
| 943 help='The name of the field containing identifiers') | |
| 944 parser_baseln.add_argument('--sf', action='store', dest='seq_field', | |
| 945 default=default_seq_field, | |
| 946 help='The name of the field containing reads') | |
| 947 parser_baseln.add_argument('--gf', action='store', dest='germ_field', | |
| 948 default=default_germ_field, | |
| 949 help='The name of the field containing germline sequences') | |
| 950 parser_baseln.add_argument('--cf', action='store', dest='cluster_field', default=None, | |
| 951 help='The name of the field containing containing sorted clone IDs') | |
| 952 parser_baseln.add_argument('--mf', nargs='+', action='store', dest='meta_fields', | |
| 953 help='List of annotation fields to add to the sequence description') | |
| 954 parser_baseln.set_defaults(func=convertDbBaseline) | |
| 955 | |
| 956 # Subparser to partition files by annotation values | |
| 957 parser_split = subparsers.add_parser('split', parents=[parser_parent], | |
| 958 formatter_class=CommonHelpFormatter, | |
| 959 help='Splits database files by field values.', | |
| 960 description='Splits database files by field values') | |
| 961 parser_split.add_argument('-f', action='store', dest='field', type=str, required=True, | |
| 962 help='Annotation field by which to split database files.') | |
| 963 parser_split.add_argument('--num', action='store', dest='num_split', type=float, default=None, | |
| 964 help='''Specify to define the field as numeric and group | |
| 965 records by whether they are less than or at least | |
| 966 (greater than or equal to) the specified value.''') | |
| 967 parser_split.set_defaults(func=splitDbFile) | |
| 968 | |
| 969 # Subparser to add records | |
| 970 parser_add = subparsers.add_parser('add', parents=[parser_parent], | |
| 971 formatter_class=CommonHelpFormatter, | |
| 972 help='Adds field and value pairs.', | |
| 973 description='Adds field and value pairs.') | |
| 974 parser_add.add_argument('-f', nargs='+', action='store', dest='fields', required=True, | |
| 975 help='The name of the fields to add.') | |
| 976 parser_add.add_argument('-u', nargs='+', action='store', dest='values', required=True, | |
| 977 help='The value to assign to all rows for each field.') | |
| 978 parser_add.set_defaults(func=addDbFile) | |
| 979 | |
| 980 # Subparser to delete records | |
| 981 parser_delete = subparsers.add_parser('delete', parents=[parser_parent], | |
| 982 formatter_class=CommonHelpFormatter, | |
| 983 help='Deletes specific records.', | |
| 984 description='Deletes specific records.') | |
| 985 parser_delete.add_argument('-f', nargs='+', action='store', dest='fields', required=True, | |
| 986 help='The name of the fields to check for deletion criteria.') | |
| 987 parser_delete.add_argument('-u', nargs='+', action='store', dest='values', default=['', 'NA'], | |
| 988 help='''The values defining which records to delete. A value | |
| 989 may appear in any of the fields specified with -f.''') | |
| 990 parser_delete.add_argument('--logic', action='store', dest='logic', | |
| 991 choices=('any', 'all'), default='any', | |
| 992 help='''Defines whether a value may appear in any field (any) | |
| 993 or whether it must appear in all fields (all).''') | |
| 994 parser_delete.add_argument('--regex', action='store_true', dest='regex', | |
| 995 help='''If specified, treat values as regular expressions | |
| 996 and allow partial string matches.''') | |
| 997 parser_delete.set_defaults(func=deleteDbFile) | |
| 998 | |
| 999 # Subparser to drop fields | |
| 1000 parser_drop = subparsers.add_parser('drop', parents=[parser_parent], | |
| 1001 formatter_class=CommonHelpFormatter, | |
| 1002 help='Deletes entire fields.', | |
| 1003 description='Deletes specific records.') | |
| 1004 parser_drop.add_argument('-f', nargs='+', action='store', dest='fields', required=True, | |
| 1005 help='The name of the fields to delete from the database.') | |
| 1006 parser_drop.set_defaults(func=dropDbFile) | |
| 1007 | |
| 1008 # Subparser to index fields | |
| 1009 parser_index = subparsers.add_parser('index', parents=[parser_parent], | |
| 1010 formatter_class=CommonHelpFormatter, | |
| 1011 help='Adds a numeric index field.', | |
| 1012 description='Adds a numeric index field.') | |
| 1013 parser_index.add_argument('-f', action='store', dest='field', | |
| 1014 default=default_index_field, | |
| 1015 help='The name of the index field to add to the database.') | |
| 1016 parser_index.set_defaults(func=indexDbFile) | |
| 1017 | |
| 1018 # Subparser to rename fields | |
| 1019 parser_rename = subparsers.add_parser('rename', parents=[parser_parent], | |
| 1020 formatter_class=CommonHelpFormatter, | |
| 1021 help='Renames fields.', | |
| 1022 description='Renames fields.') | |
| 1023 parser_rename.add_argument('-f', nargs='+', action='store', dest='fields', required=True, | |
| 1024 help='List of fields to rename.') | |
| 1025 parser_rename.add_argument('-k', nargs='+', action='store', dest='names', required=True, | |
| 1026 help='List of new names for each field.') | |
| 1027 parser_rename.set_defaults(func=renameDbFile) | |
| 1028 | |
| 1029 # Subparser to select records | |
| 1030 parser_select = subparsers.add_parser('select', parents=[parser_parent], | |
| 1031 formatter_class=CommonHelpFormatter, | |
| 1032 help='Selects specific records.', | |
| 1033 description='Selects specific records.') | |
| 1034 parser_select.add_argument('-f', nargs='+', action='store', dest='fields', required=True, | |
| 1035 help='The name of the fields to check for selection criteria.') | |
| 1036 parser_select.add_argument('-u', nargs='+', action='store', dest='values', required=True, | |
| 1037 help='''The values defining with records to select. A value | |
| 1038 may appear in any of the fields specified with -f.''') | |
| 1039 parser_select.add_argument('--logic', action='store', dest='logic', | |
| 1040 choices=('any', 'all'), default='any', | |
| 1041 help='''Defines whether a value may appear in any field (any) | |
| 1042 or whether it must appear in all fields (all).''') | |
| 1043 parser_select.add_argument('--regex', action='store_true', dest='regex', | |
| 1044 help='''If specified, treat values as regular expressions | |
| 1045 and allow partial string matches.''') | |
| 1046 parser_select.set_defaults(func=selectDbFile) | |
| 1047 | |
| 1048 # Subparser to sort file by records | |
| 1049 parser_sort = subparsers.add_parser('sort', parents=[parser_parent], | |
| 1050 formatter_class=CommonHelpFormatter, | |
| 1051 help='Sorts records by field values.', | |
| 1052 description='Sorts records by field values.') | |
| 1053 parser_sort.add_argument('-f', action='store', dest='field', type=str, required=True, | |
| 1054 help='The annotation field by which to sort records.') | |
| 1055 parser_sort.add_argument('--num', action='store_true', dest='numeric', default=False, | |
| 1056 help='''Specify to define the sort column as numeric rather | |
| 1057 than textual.''') | |
| 1058 parser_sort.add_argument('--descend', action='store_true', dest='descend', | |
| 1059 help='''If specified, sort records in descending, rather | |
| 1060 than ascending, order by values in the target field.''') | |
| 1061 parser_sort.set_defaults(func=sortDbFile) | |
| 1062 | |
| 1063 # Subparser to update records | |
| 1064 parser_update = subparsers.add_parser('update', parents=[parser_parent], | |
| 1065 formatter_class=CommonHelpFormatter, | |
| 1066 help='Updates field and value pairs.', | |
| 1067 description='Updates field and value pairs.') | |
| 1068 parser_update.add_argument('-f', action='store', dest='field', required=True, | |
| 1069 help='The name of the field to update.') | |
| 1070 parser_update.add_argument('-u', nargs='+', action='store', dest='values', required=True, | |
| 1071 help='The values that will be replaced.') | |
| 1072 parser_update.add_argument('-t', nargs='+', action='store', dest='updates', required=True, | |
| 1073 help='''The new value to assign to each selected row.''') | |
| 1074 parser_update.set_defaults(func=updateDbFile) | |
| 1075 | |
| 1076 return parser | |
| 1077 | |
| 1078 | |
| 1079 if __name__ == '__main__': | |
| 1080 """ | |
| 1081 Parses command line arguments and calls main function | |
| 1082 """ | |
| 1083 # Parse arguments | |
| 1084 parser = getArgParser() | |
| 1085 checkArgs(parser) | |
| 1086 args = parser.parse_args() | |
| 1087 args_dict = parseCommonArgs(args) | |
| 1088 # Convert case of fields | |
| 1089 if 'id_field' in args_dict: | |
| 1090 args_dict['id_field'] = args_dict['id_field'].upper() | |
| 1091 if 'seq_field' in args_dict: | |
| 1092 args_dict['seq_field'] = args_dict['seq_field'].upper() | |
| 1093 if 'germ_field' in args_dict: | |
| 1094 args_dict['germ_field'] = args_dict['germ_field'].upper() | |
| 1095 if 'field' in args_dict: | |
| 1096 args_dict['field'] = args_dict['field'].upper() | |
| 1097 if 'cluster_field' in args_dict and args_dict['cluster_field'] is not None: | |
| 1098 args_dict['cluster_field'] = args_dict['cluster_field'].upper() | |
| 1099 if 'meta_fields' in args_dict and args_dict['meta_fields'] is not None: | |
| 1100 args_dict['meta_fields'] = [f.upper() for f in args_dict['meta_fields']] | |
| 1101 if 'fields' in args_dict: | |
| 1102 args_dict['fields'] = [f.upper() for f in args_dict['fields']] | |
| 1103 | |
| 1104 # Check modify_args arguments | |
| 1105 if args.command == 'add' and len(args_dict['fields']) != len(args_dict['values']): | |
| 1106 parser.error('You must specify exactly one value (-u) per field (-f)') | |
| 1107 elif args.command == 'rename' and len(args_dict['fields']) != len(args_dict['names']): | |
| 1108 parser.error('You must specify exactly one new name (-k) per field (-f)') | |
| 1109 elif args.command == 'update' and len(args_dict['values']) != len(args_dict['updates']): | |
| 1110 parser.error('You must specify exactly one value (-u) per replacement (-t)') | |
| 1111 | |
| 1112 # Call parser function for each database file | |
| 1113 del args_dict['command'] | |
| 1114 del args_dict['func'] | |
| 1115 del args_dict['db_files'] | |
| 1116 for f in args.__dict__['db_files']: | |
| 1117 args_dict['db_file'] = f | |
| 1118 args.func(**args_dict) | |
| 1119 |
