comparison search_model_validation.py @ 41:6546d7c9f08b draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 12:52:25 +0000
parents 4ecc0ce9d0a2
children
comparison
equal deleted inserted replaced
40:a07ab242b0b5 41:6546d7c9f08b
1 import argparse 1 import argparse
2 import collections
3 import json 2 import json
4 import os 3 import os
5 import pickle
6 import sys 4 import sys
7 import warnings 5 import warnings
6 from distutils.version import LooseVersion as Version
8 7
9 import imblearn 8 import imblearn
10 import joblib 9 import joblib
11 import numpy as np 10 import numpy as np
12 import pandas as pd 11 import pandas as pd
13 import skrebate 12 import skrebate
14 from galaxy_ml.utils import (clean_params, get_cv, 13 from galaxy_ml import __version__ as galaxy_ml_version
15 get_main_estimator, get_module, get_scoring, 14 from galaxy_ml.binarize_target import IRAPSClassifier
16 load_model, read_columns, SafeEval, try_get_attr) 15 from galaxy_ml.model_persist import dump_model_to_h5, load_model_from_h5
16 from galaxy_ml.utils import (
17 clean_params,
18 get_cv,
19 get_main_estimator,
20 get_module,
21 get_scoring,
22 read_columns,
23 SafeEval,
24 try_get_attr
25 )
17 from scipy.io import mmread 26 from scipy.io import mmread
18 from sklearn import (cluster, decomposition, feature_selection, 27 from sklearn import (
19 kernel_approximation, model_selection, preprocessing) 28 cluster,
29 decomposition,
30 feature_selection,
31 kernel_approximation,
32 model_selection,
33 preprocessing,
34 )
20 from sklearn.exceptions import FitFailedWarning 35 from sklearn.exceptions import FitFailedWarning
21 from sklearn.model_selection import _search, _validation 36 from sklearn.model_selection import _search, _validation
22 from sklearn.model_selection._validation import _score, cross_validate 37 from sklearn.model_selection._validation import _score, cross_validate
23 38 from sklearn.preprocessing import LabelEncoder
24 _fit_and_score = try_get_attr("galaxy_ml.model_validations", "_fit_and_score") 39 from skopt import BayesSearchCV
25 setattr(_search, "_fit_and_score", _fit_and_score)
26 setattr(_validation, "_fit_and_score", _fit_and_score)
27 40
28 N_JOBS = int(os.environ.get("GALAXY_SLOTS", 1)) 41 N_JOBS = int(os.environ.get("GALAXY_SLOTS", 1))
29 # handle disk cache 42 # handle disk cache
30 CACHE_DIR = os.path.join(os.getcwd(), "cached") 43 CACHE_DIR = os.path.join(os.getcwd(), "cached")
31 del os 44 NON_SEARCHABLE = (
32 NON_SEARCHABLE = ("n_jobs", "pre_dispatch", "memory", "_path", "nthread", "callbacks") 45 "n_jobs",
46 "pre_dispatch",
47 "memory",
48 "_path",
49 "_dir",
50 "nthread",
51 "callbacks",
52 )
33 53
34 54
35 def _eval_search_params(params_builder): 55 def _eval_search_params(params_builder):
36 search_params = {} 56 search_params = {}
37 57
98 skrebate.MultiSURFstar(n_jobs=N_JOBS), 118 skrebate.MultiSURFstar(n_jobs=N_JOBS),
99 imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS), 119 imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS),
100 imblearn.under_sampling.CondensedNearestNeighbour( 120 imblearn.under_sampling.CondensedNearestNeighbour(
101 random_state=0, n_jobs=N_JOBS 121 random_state=0, n_jobs=N_JOBS
102 ), 122 ),
103 imblearn.under_sampling.EditedNearestNeighbours( 123 imblearn.under_sampling.EditedNearestNeighbours(n_jobs=N_JOBS),
104 random_state=0, n_jobs=N_JOBS 124 imblearn.under_sampling.RepeatedEditedNearestNeighbours(n_jobs=N_JOBS),
105 ), 125 imblearn.under_sampling.AllKNN(n_jobs=N_JOBS),
106 imblearn.under_sampling.RepeatedEditedNearestNeighbours(
107 random_state=0, n_jobs=N_JOBS
108 ),
109 imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS),
110 imblearn.under_sampling.InstanceHardnessThreshold( 126 imblearn.under_sampling.InstanceHardnessThreshold(
111 random_state=0, n_jobs=N_JOBS 127 random_state=0, n_jobs=N_JOBS
112 ), 128 ),
113 imblearn.under_sampling.NearMiss(random_state=0, n_jobs=N_JOBS), 129 imblearn.under_sampling.NearMiss(n_jobs=N_JOBS),
114 imblearn.under_sampling.NeighbourhoodCleaningRule( 130 imblearn.under_sampling.NeighbourhoodCleaningRule(n_jobs=N_JOBS),
115 random_state=0, n_jobs=N_JOBS
116 ),
117 imblearn.under_sampling.OneSidedSelection( 131 imblearn.under_sampling.OneSidedSelection(
118 random_state=0, n_jobs=N_JOBS 132 random_state=0, n_jobs=N_JOBS
119 ), 133 ),
120 imblearn.under_sampling.RandomUnderSampler(random_state=0), 134 imblearn.under_sampling.RandomUnderSampler(random_state=0),
121 imblearn.under_sampling.TomekLinks(random_state=0, n_jobs=N_JOBS), 135 imblearn.under_sampling.TomekLinks(n_jobs=N_JOBS),
122 imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS), 136 imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS),
137 imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS),
138 imblearn.over_sampling.KMeansSMOTE(random_state=0, n_jobs=N_JOBS),
123 imblearn.over_sampling.RandomOverSampler(random_state=0), 139 imblearn.over_sampling.RandomOverSampler(random_state=0),
124 imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS), 140 imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS),
125 imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS), 141 imblearn.over_sampling.SMOTEN(random_state=0, n_jobs=N_JOBS),
126 imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS),
127 imblearn.over_sampling.SMOTENC( 142 imblearn.over_sampling.SMOTENC(
128 categorical_features=[], random_state=0, n_jobs=N_JOBS 143 categorical_features=[], random_state=0, n_jobs=N_JOBS
129 ), 144 ),
145 imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS),
130 imblearn.combine.SMOTEENN(random_state=0), 146 imblearn.combine.SMOTEENN(random_state=0),
131 imblearn.combine.SMOTETomek(random_state=0), 147 imblearn.combine.SMOTETomek(random_state=0),
132 ) 148 )
133 newlist = [] 149 newlist = []
134 for obj in ev: 150 for obj in ev:
286 else: 302 else:
287 infile2 = pd.read_csv(infile2, sep="\t", header=header, parse_dates=True) 303 infile2 = pd.read_csv(infile2, sep="\t", header=header, parse_dates=True)
288 loaded_df[df_key] = infile2 304 loaded_df[df_key] = infile2
289 305
290 y = read_columns( 306 y = read_columns(
291 infile2, c=c, c_option=column_option, sep="\t", header=header, parse_dates=True 307 infile2,
308 c=c,
309 c_option=column_option,
310 sep="\t",
311 header=header,
312 parse_dates=True,
292 ) 313 )
293 if len(y.shape) == 2 and y.shape[1] == 1: 314 if len(y.shape) == 2 and y.shape[1] == 1:
294 y = y.ravel() 315 y = y.ravel()
295 if input_type == "refseq_and_interval": 316 if input_type == "refseq_and_interval":
296 estimator.set_params(data_batch_generator__features=y.ravel().tolist()) 317 estimator.set_params(data_batch_generator__features=y.ravel().tolist())
414 pass 435 pass
415 for warning in w: 436 for warning in w:
416 print(repr(warning.message)) 437 print(repr(warning.message))
417 438
418 scorer_ = searcher.scorer_ 439 scorer_ = searcher.scorer_
419 if isinstance(scorer_, collections.Mapping):
420 is_multimetric = True
421 else:
422 is_multimetric = False
423 440
424 best_estimator_ = getattr(searcher, "best_estimator_") 441 best_estimator_ = getattr(searcher, "best_estimator_")
425 442
426 # TODO Solve deep learning models in pipeline 443 # TODO Solve deep learning models in pipeline
427 if best_estimator_.__class__.__name__ == "KerasGBatchClassifier": 444 if best_estimator_.__class__.__name__ == "KerasGBatchClassifier":
428 test_score = best_estimator_.evaluate( 445 test_score = best_estimator_.evaluate(
429 X_test, scorer=scorer_, is_multimetric=is_multimetric 446 X_test,
447 scorer=scorer_,
430 ) 448 )
431 else: 449 else:
432 test_score = _score( 450 test_score = _score(best_estimator_, X_test, y_test, scorer_)
433 best_estimator_, X_test, y_test, scorer_, is_multimetric=is_multimetric 451
434 ) 452 if not isinstance(scorer_, dict):
435
436 if not is_multimetric:
437 test_score = {primary_scoring: test_score} 453 test_score = {primary_scoring: test_score}
438 for key, value in test_score.items(): 454 for key, value in test_score.items():
439 test_score[key] = [value] 455 test_score[key] = [value]
440 result_df = pd.DataFrame(test_score) 456 result_df = pd.DataFrame(test_score)
441 result_df.to_csv(path_or_buf=outfile, sep="\t", header=True, index=False) 457 result_df.to_csv(path_or_buf=outfile, sep="\t", header=True, index=False)
442 458
443 return searcher 459 return searcher
460
461
462 def _set_memory(estimator, memory):
463 """set memeory cache
464
465 Parameters
466 ----------
467 estimator : python object
468 memory : joblib.Memory object
469
470 Returns
471 -------
472 estimator : estimator object after setting new attributes
473 """
474 if isinstance(estimator, IRAPSClassifier):
475 estimator.set_params(memory=memory)
476 return estimator
477
478 estimator_params = estimator.get_params()
479
480 new_params = {}
481 for k in estimator_params.keys():
482 if k.endswith("irapsclassifier__memory"):
483 new_params[k] = memory
484
485 estimator.set_params(**new_params)
486
487 return estimator
444 488
445 489
446 def main( 490 def main(
447 inputs, 491 inputs,
448 infile_estimator, 492 infile_estimator,
449 infile1, 493 infile1,
450 infile2, 494 infile2,
451 outfile_result, 495 outfile_result,
452 outfile_object=None, 496 outfile_object=None,
453 outfile_weights=None,
454 groups=None, 497 groups=None,
455 ref_seq=None, 498 ref_seq=None,
456 intervals=None, 499 intervals=None,
457 targets=None, 500 targets=None,
458 fasta_path=None, 501 fasta_path=None,
459 ): 502 ):
460 """ 503 """
461 Parameter 504 Parameter
462 --------- 505 ---------
463 inputs : str 506 inputs : str
464 File path to galaxy tool parameter 507 File path to galaxy tool parameter.
465 508
466 infile_estimator : str 509 infile_estimator : str
467 File path to estimator 510 File path to estimator.
468 511
469 infile1 : str 512 infile1 : str
470 File path to dataset containing features 513 File path to dataset containing features
471 514
472 infile2 : str 515 infile2 : str
475 outfile_result : str 518 outfile_result : str
476 File path to save the results, either cv_results or test result 519 File path to save the results, either cv_results or test result
477 520
478 outfile_object : str, optional 521 outfile_object : str, optional
479 File path to save searchCV object 522 File path to save searchCV object
480
481 outfile_weights : str, optional
482 File path to save model weights
483 523
484 groups : str 524 groups : str
485 File path to dataset containing groups labels 525 File path to dataset containing groups labels
486 526
487 ref_seq : str 527 ref_seq : str
503 543
504 with open(inputs, "r") as param_handler: 544 with open(inputs, "r") as param_handler:
505 params = json.load(param_handler) 545 params = json.load(param_handler)
506 546
507 # Override the refit parameter 547 # Override the refit parameter
508 params["search_schemes"]["options"]["refit"] = ( 548 params["options"]["refit"] = (
509 True if params["save"] != "nope" else False 549 True
550 if (
551 params["save"] != "nope"
552 or params["outer_split"]["split_mode"] == "nested_cv"
553 )
554 else False
510 ) 555 )
511 556
512 with open(infile_estimator, "rb") as estimator_handler: 557 estimator = load_model_from_h5(infile_estimator)
513 estimator = load_model(estimator_handler) 558
514 559 estimator = clean_params(estimator)
515 optimizer = params["search_schemes"]["selected_search_scheme"] 560
516 optimizer = getattr(model_selection, optimizer) 561 if estimator.__class__.__name__ == "KerasGBatchClassifier":
562 _fit_and_score = try_get_attr(
563 "galaxy_ml.model_validations",
564 "_fit_and_score",
565 )
566
567 setattr(_search, "_fit_and_score", _fit_and_score)
568 setattr(_validation, "_fit_and_score", _fit_and_score)
569
570 search_algos_and_options = params["search_algos"]
571 optimizer = search_algos_and_options.pop("selected_search_algo")
572 if optimizer == "skopt.BayesSearchCV":
573 optimizer = BayesSearchCV
574 else:
575 optimizer = getattr(model_selection, optimizer)
517 576
518 # handle gridsearchcv options 577 # handle gridsearchcv options
519 options = params["search_schemes"]["options"] 578 options = params["options"]
579 options.update(search_algos_and_options)
520 580
521 if groups: 581 if groups:
522 header = ( 582 header = (
523 "infer" if (options["cv_selector"]["groups_selector"]["header_g"]) else None 583 "infer" if (options["cv_selector"]["groups_selector"]["header_g"]) else None
524 ) 584 )
551 parse_dates=True, 611 parse_dates=True,
552 ) 612 )
553 groups = groups.ravel() 613 groups = groups.ravel()
554 options["cv_selector"]["groups_selector"] = groups 614 options["cv_selector"]["groups_selector"] = groups
555 615
556 splitter, groups = get_cv(options.pop("cv_selector")) 616 cv_selector = options.pop("cv_selector")
617 if Version(galaxy_ml_version) < Version("0.8.3"):
618 cv_selector.pop("n_stratification_bins", None)
619 splitter, groups = get_cv(cv_selector)
557 options["cv"] = splitter 620 options["cv"] = splitter
558 primary_scoring = options["scoring"]["primary_scoring"] 621 primary_scoring = options["scoring"]["primary_scoring"]
559 # get_scoring() expects secondary_scoring to be a comma separated string (not a list) 622 options["scoring"] = get_scoring(options["scoring"])
560 # Check if secondary_scoring is specified 623 # TODO make BayesSearchCV support multiple scoring
561 secondary_scoring = options["scoring"].get("secondary_scoring", None) 624 if optimizer == "skopt.BayesSearchCV" and isinstance(options["scoring"], dict):
562 if secondary_scoring is not None: 625 options["scoring"] = options["scoring"][primary_scoring]
563 # If secondary_scoring is specified, convert the list into comman separated string 626 warnings.warn(
564 options["scoring"]["secondary_scoring"] = ",".join( 627 "BayesSearchCV doesn't support multiple "
565 options["scoring"]["secondary_scoring"] 628 "scorings! Primary scoring is used."
566 ) 629 )
567 options["scoring"] = get_scoring(options["scoring"])
568 if options["error_score"]: 630 if options["error_score"]:
569 options["error_score"] = "raise" 631 options["error_score"] = "raise"
570 else: 632 else:
571 options["error_score"] = np.nan 633 options["error_score"] = np.NaN
572 if options["refit"] and isinstance(options["scoring"], dict): 634 if options["refit"] and isinstance(options["scoring"], dict):
573 options["refit"] = primary_scoring 635 options["refit"] = primary_scoring
574 if "pre_dispatch" in options and options["pre_dispatch"] == "": 636 if "pre_dispatch" in options and options["pre_dispatch"] == "":
575 options["pre_dispatch"] = None 637 options["pre_dispatch"] = None
576 638
577 params_builder = params["search_schemes"]["search_params_builder"] 639 params_builder = params["search_params_builder"]
578 param_grid = _eval_search_params(params_builder) 640 param_grid = _eval_search_params(params_builder)
579
580 estimator = clean_params(estimator)
581 641
582 # save the SearchCV object without fit 642 # save the SearchCV object without fit
583 if params["save"] == "save_no_fit": 643 if params["save"] == "save_no_fit":
584 searcher = optimizer(estimator, param_grid, **options) 644 searcher = optimizer(estimator, param_grid, **options)
585 print(searcher) 645 dump_model_to_h5(searcher, outfile_object)
586 with open(outfile_object, "wb") as output_handler:
587 pickle.dump(searcher, output_handler, pickle.HIGHEST_PROTOCOL)
588 return 0 646 return 0
589 647
590 # read inputs and loads new attributes, like paths 648 # read inputs and loads new attributes, like paths
591 estimator, X, y = _handle_X_y( 649 estimator, X, y = _handle_X_y(
592 estimator, 650 estimator,
598 intervals=intervals, 656 intervals=intervals,
599 targets=targets, 657 targets=targets,
600 fasta_path=fasta_path, 658 fasta_path=fasta_path,
601 ) 659 )
602 660
661 label_encoder = LabelEncoder()
662 if get_main_estimator(estimator).__class__.__name__ == "XGBClassifier":
663 y = label_encoder.fit_transform(y)
664
603 # cache iraps_core fits could increase search speed significantly 665 # cache iraps_core fits could increase search speed significantly
604 memory = joblib.Memory(location=CACHE_DIR, verbose=0) 666 memory = joblib.Memory(location=CACHE_DIR, verbose=0)
605 main_est = get_main_estimator(estimator) 667 estimator = _set_memory(estimator, memory)
606 if main_est.__class__.__name__ == "IRAPSClassifier":
607 main_est.set_params(memory=memory)
608 668
609 searcher = optimizer(estimator, param_grid, **options) 669 searcher = optimizer(estimator, param_grid, **options)
610 670
611 split_mode = params["outer_split"].pop("split_mode") 671 split_mode = params["outer_split"].pop("split_mode")
612 672
673 # Nested CV
613 if split_mode == "nested_cv": 674 if split_mode == "nested_cv":
614 # make sure refit is choosen 675 cv_selector = params["outer_split"]["cv_selector"]
615 # this could be True for sklearn models, but not the case for 676 if Version(galaxy_ml_version) < Version("0.8.3"):
616 # deep learning models 677 cv_selector.pop("n_stratification_bins", None)
617 if not options["refit"] and not all( 678 outer_cv, _ = get_cv(cv_selector)
618 hasattr(estimator, attr) for attr in ("config", "model_type")
619 ):
620 warnings.warn("Refit is change to `True` for nested validation!")
621 setattr(searcher, "refit", True)
622
623 outer_cv, _ = get_cv(params["outer_split"]["cv_selector"])
624 # nested CV, outer cv using cross_validate 679 # nested CV, outer cv using cross_validate
625 if options["error_score"] == "raise": 680 if options["error_score"] == "raise":
626 rval = cross_validate( 681 rval = cross_validate(
627 searcher, 682 searcher,
628 X, 683 X,
629 y, 684 y,
685 groups=groups,
630 scoring=options["scoring"], 686 scoring=options["scoring"],
631 cv=outer_cv, 687 cv=outer_cv,
632 n_jobs=N_JOBS, 688 n_jobs=N_JOBS,
633 verbose=options["verbose"], 689 verbose=options["verbose"],
690 fit_params={"groups": groups},
634 return_estimator=(params["save"] == "save_estimator"), 691 return_estimator=(params["save"] == "save_estimator"),
635 error_score=options["error_score"], 692 error_score=options["error_score"],
636 return_train_score=True, 693 return_train_score=True,
637 ) 694 )
638 else: 695 else:
641 try: 698 try:
642 rval = cross_validate( 699 rval = cross_validate(
643 searcher, 700 searcher,
644 X, 701 X,
645 y, 702 y,
703 groups=groups,
646 scoring=options["scoring"], 704 scoring=options["scoring"],
647 cv=outer_cv, 705 cv=outer_cv,
648 n_jobs=N_JOBS, 706 n_jobs=N_JOBS,
649 verbose=options["verbose"], 707 verbose=options["verbose"],
708 fit_params={"groups": groups},
650 return_estimator=(params["save"] == "save_estimator"), 709 return_estimator=(params["save"] == "save_estimator"),
651 error_score=options["error_score"], 710 error_score=options["error_score"],
652 return_train_score=True, 711 return_train_score=True,
653 ) 712 )
654 except ValueError: 713 except ValueError:
674 cv_results_ = pd.DataFrame(cv_results_) 733 cv_results_ = pd.DataFrame(cv_results_)
675 cv_results_ = cv_results_[sorted(cv_results_.columns)] 734 cv_results_ = cv_results_[sorted(cv_results_.columns)]
676 cv_results_.to_csv(target_path, sep="\t", header=True, index=False) 735 cv_results_.to_csv(target_path, sep="\t", header=True, index=False)
677 except Exception as e: 736 except Exception as e:
678 print(e) 737 print(e)
679 finally:
680 del os
681 738
682 keys = list(rval.keys()) 739 keys = list(rval.keys())
683 for k in keys: 740 for k in keys:
684 if k.startswith("test"): 741 if k.startswith("test"):
685 rval["mean_" + k] = np.mean(rval[k]) 742 rval["mean_" + k] = np.mean(rval[k])
687 if k.endswith("time"): 744 if k.endswith("time"):
688 rval.pop(k) 745 rval.pop(k)
689 rval = pd.DataFrame(rval) 746 rval = pd.DataFrame(rval)
690 rval = rval[sorted(rval.columns)] 747 rval = rval[sorted(rval.columns)]
691 rval.to_csv(path_or_buf=outfile_result, sep="\t", header=True, index=False) 748 rval.to_csv(path_or_buf=outfile_result, sep="\t", header=True, index=False)
749
750 return 0
751
692 # deprecate train test split mode 752 # deprecate train test split mode
693 """searcher = _do_train_test_split_val( 753 """searcher = _do_train_test_split_val(
694 searcher, X, y, params, 754 searcher, X, y, params,
695 primary_scoring=primary_scoring, 755 primary_scoring=primary_scoring,
696 error_score=options['error_score'], 756 error_score=options['error_score'],
697 groups=groups, 757 groups=groups,
698 outfile=outfile_result)""" 758 outfile=outfile_result)"""
699 return 0
700 759
701 # no outer split 760 # no outer split
702 else: 761 else:
703 searcher.set_params(n_jobs=N_JOBS) 762 searcher.set_params(n_jobs=N_JOBS)
704 if options["error_score"] == "raise": 763 if options["error_score"] == "raise":
730 "'best_estimator_', because either it's " 789 "'best_estimator_', because either it's "
731 "nested gridsearch or `refit` is False!" 790 "nested gridsearch or `refit` is False!"
732 ) 791 )
733 return 792 return
734 793
735 # clean prams 794 dump_model_to_h5(best_estimator_, outfile_object)
736 best_estimator_ = clean_params(best_estimator_)
737
738 main_est = get_main_estimator(best_estimator_)
739
740 if hasattr(main_est, "model_") and hasattr(main_est, "save_weights"):
741 if outfile_weights:
742 main_est.save_weights(outfile_weights)
743 del main_est.model_
744 del main_est.fit_params
745 del main_est.model_class_
746 del main_est.validation_data
747 if getattr(main_est, "data_generator_", None):
748 del main_est.data_generator_
749
750 with open(outfile_object, "wb") as output_handler:
751 print("Best estimator is saved: %s " % repr(best_estimator_))
752 pickle.dump(best_estimator_, output_handler, pickle.HIGHEST_PROTOCOL)
753 795
754 796
755 if __name__ == "__main__": 797 if __name__ == "__main__":
756 aparser = argparse.ArgumentParser() 798 aparser = argparse.ArgumentParser()
757 aparser.add_argument("-i", "--inputs", dest="inputs", required=True) 799 aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
758 aparser.add_argument("-e", "--estimator", dest="infile_estimator") 800 aparser.add_argument("-e", "--estimator", dest="infile_estimator")
759 aparser.add_argument("-X", "--infile1", dest="infile1") 801 aparser.add_argument("-X", "--infile1", dest="infile1")
760 aparser.add_argument("-y", "--infile2", dest="infile2") 802 aparser.add_argument("-y", "--infile2", dest="infile2")
761 aparser.add_argument("-O", "--outfile_result", dest="outfile_result") 803 aparser.add_argument("-O", "--outfile_result", dest="outfile_result")
762 aparser.add_argument("-o", "--outfile_object", dest="outfile_object") 804 aparser.add_argument("-o", "--outfile_object", dest="outfile_object")
763 aparser.add_argument("-w", "--outfile_weights", dest="outfile_weights")
764 aparser.add_argument("-g", "--groups", dest="groups") 805 aparser.add_argument("-g", "--groups", dest="groups")
765 aparser.add_argument("-r", "--ref_seq", dest="ref_seq") 806 aparser.add_argument("-r", "--ref_seq", dest="ref_seq")
766 aparser.add_argument("-b", "--intervals", dest="intervals") 807 aparser.add_argument("-b", "--intervals", dest="intervals")
767 aparser.add_argument("-t", "--targets", dest="targets") 808 aparser.add_argument("-t", "--targets", dest="targets")
768 aparser.add_argument("-f", "--fasta_path", dest="fasta_path") 809 aparser.add_argument("-f", "--fasta_path", dest="fasta_path")
769 args = aparser.parse_args() 810 args = aparser.parse_args()
770 811
771 main( 812 main(**vars(args))
772 args.inputs,
773 args.infile_estimator,
774 args.infile1,
775 args.infile2,
776 args.outfile_result,
777 outfile_object=args.outfile_object,
778 outfile_weights=args.outfile_weights,
779 groups=args.groups,
780 ref_seq=args.ref_seq,
781 intervals=args.intervals,
782 targets=args.targets,
783 fasta_path=args.fasta_path,
784 )