# HG changeset patch
# User bgruening
# Date 1618336335 0
# Node ID 1e99cfb71f4057db6ec3e80edc0173d22be40d26
# Parent  7068b5fcd623db8958749d228b6eb9261e86982e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
diff -r 7068b5fcd623 -r 1e99cfb71f40 fitted_model_eval.py
--- a/fitted_model_eval.py	Thu Oct 01 20:27:36 2020 +0000
+++ b/fitted_model_eval.py	Tue Apr 13 17:52:15 2021 +0000
@@ -11,7 +11,7 @@
 
 
 def _get_X_y(params, infile1, infile2):
-    """ read from inputs and output X and y
+    """read from inputs and output X and y
 
     Parameters
     ----------
@@ -26,35 +26,40 @@
     # store read dataframe object
     loaded_df = {}
 
-    input_type = params['input_options']['selected_input']
+    input_type = params["input_options"]["selected_input"]
     # tabular input
-    if input_type == 'tabular':
-        header = 'infer' if params['input_options']['header1'] else None
-        column_option = (params['input_options']['column_selector_options_1']
-                         ['selected_column_selector_option'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = params['input_options']['column_selector_options_1']['col1']
+    if input_type == "tabular":
+        header = "infer" if params["input_options"]["header1"] else None
+        column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = params["input_options"]["column_selector_options_1"]["col1"]
         else:
             c = None
 
         df_key = infile1 + repr(header)
-        df = pd.read_csv(infile1, sep='\t', header=header,
-                         parse_dates=True)
+        df = pd.read_csv(infile1, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = df
 
         X = read_columns(df, c=c, c_option=column_option).astype(float)
     # sparse input
-    elif input_type == 'sparse':
-        X = mmread(open(infile1, 'r'))
+    elif input_type == "sparse":
+        X = mmread(open(infile1, "r"))
 
     # Get target y
-    header = 'infer' if params['input_options']['header2'] else None
-    column_option = (params['input_options']['column_selector_options_2']
-                     ['selected_column_selector_option2'])
-    if column_option in ['by_index_number', 'all_but_by_index_number',
-                         'by_header_name', 'all_but_by_header_name']:
-        c = params['input_options']['column_selector_options_2']['col2']
+    header = "infer" if params["input_options"]["header2"] else None
+    column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
+    if column_option in [
+        "by_index_number",
+        "all_but_by_index_number",
+        "by_header_name",
+        "all_but_by_header_name",
+    ]:
+        c = params["input_options"]["column_selector_options_2"]["col2"]
     else:
         c = None
 
@@ -62,26 +67,24 @@
     if df_key in loaded_df:
         infile2 = loaded_df[df_key]
     else:
-        infile2 = pd.read_csv(infile2, sep='\t',
-                              header=header, parse_dates=True)
+        infile2 = pd.read_csv(infile2, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = infile2
 
-    y = read_columns(
-            infile2,
-            c=c,
-            c_option=column_option,
-            sep='\t',
-            header=header,
-            parse_dates=True)
+    y = read_columns(infile2, c=c, c_option=column_option, sep="\t", header=header, parse_dates=True)
     if len(y.shape) == 2 and y.shape[1] == 1:
         y = y.ravel()
 
     return X, y
 
 
-def main(inputs, infile_estimator, outfile_eval,
-         infile_weights=None, infile1=None,
-         infile2=None):
+def main(
+    inputs,
+    infile_estimator,
+    outfile_eval,
+    infile_weights=None,
+    infile1=None,
+    infile2=None,
+):
     """
     Parameter
     ---------
@@ -103,49 +106,55 @@
     infile2 : str
         File path to dataset containing target values
     """
-    warnings.filterwarnings('ignore')
+    warnings.filterwarnings("ignore")
 
-    with open(inputs, 'r') as param_handler:
+    with open(inputs, "r") as param_handler:
         params = json.load(param_handler)
 
     X_test, y_test = _get_X_y(params, infile1, infile2)
 
     # load model
-    with open(infile_estimator, 'rb') as est_handler:
+    with open(infile_estimator, "rb") as est_handler:
         estimator = load_model(est_handler)
 
     main_est = estimator
     if isinstance(estimator, Pipeline):
         main_est = estimator.steps[-1][-1]
-    if hasattr(main_est, 'config') and hasattr(main_est, 'load_weights'):
-        if not infile_weights or infile_weights == 'None':
-            raise ValueError("The selected model skeleton asks for weights, "
-                             "but no dataset for weights was provided!")
+    if hasattr(main_est, "config") and hasattr(main_est, "load_weights"):
+        if not infile_weights or infile_weights == "None":
+            raise ValueError(
+                "The selected model skeleton asks for weights, " "but no dataset for weights was provided!"
+            )
         main_est.load_weights(infile_weights)
 
     # handle scorer, convert to scorer dict
-    scoring = params['scoring']
+    # Check if scoring is specified
+    scoring = params["scoring"]
+    if scoring is not None:
+        # get_scoring() expects secondary_scoring to be a comma separated string (not a list)
+        # Check if secondary_scoring is specified
+        secondary_scoring = scoring.get("secondary_scoring", None)
+        if secondary_scoring is not None:
+            # If secondary_scoring is specified, convert the list into comman separated string
+            scoring["secondary_scoring"] = ",".join(scoring["secondary_scoring"])
+
     scorer = get_scoring(scoring)
     scorer, _ = _check_multimetric_scoring(estimator, scoring=scorer)
 
-    if hasattr(estimator, 'evaluate'):
-        scores = estimator.evaluate(X_test, y_test=y_test,
-                                    scorer=scorer,
-                                    is_multimetric=True)
+    if hasattr(estimator, "evaluate"):
+        scores = estimator.evaluate(X_test, y_test=y_test, scorer=scorer, is_multimetric=True)
     else:
-        scores = _score(estimator, X_test, y_test, scorer,
-                        is_multimetric=True)
+        scores = _score(estimator, X_test, y_test, scorer, is_multimetric=True)
 
     # handle output
     for name, score in scores.items():
         scores[name] = [score]
     df = pd.DataFrame(scores)
     df = df[sorted(df.columns)]
-    df.to_csv(path_or_buf=outfile_eval, sep='\t',
-              header=True, index=False)
+    df.to_csv(path_or_buf=outfile_eval, sep="\t", header=True, index=False)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-e", "--infile_estimator", dest="infile_estimator")
@@ -155,6 +164,11 @@
     aparser.add_argument("-O", "--outfile_eval", dest="outfile_eval")
     args = aparser.parse_args()
 
-    main(args.inputs, args.infile_estimator, args.outfile_eval,
-         infile_weights=args.infile_weights, infile1=args.infile1,
-         infile2=args.infile2)
+    main(
+        args.inputs,
+        args.infile_estimator,
+        args.outfile_eval,
+        infile_weights=args.infile_weights,
+        infile1=args.infile1,
+        infile2=args.infile2,
+    )
diff -r 7068b5fcd623 -r 1e99cfb71f40 keras_deep_learning.py
--- a/keras_deep_learning.py	Thu Oct 01 20:27:36 2020 +0000
+++ b/keras_deep_learning.py	Tue Apr 13 17:52:15 2021 +0000
@@ -177,11 +177,11 @@
         # merge layers
         if 'merging_layers' in options:
             idxs = literal_eval(options.pop('merging_layers'))
-            merging_layers = [all_layers[i-1] for i in idxs]
+            merging_layers = [all_layers[i - 1] for i in idxs]
             new_layer = klass(**options)(merging_layers)
         # non-input layers
         elif inbound_nodes is not None:
-            new_layer = klass(**options)(all_layers[inbound_nodes-1])
+            new_layer = klass(**options)(all_layers[inbound_nodes - 1])
         # input layers
         else:
             new_layer = klass(**options)
@@ -189,10 +189,10 @@
         all_layers.append(new_layer)
 
     input_indexes = _handle_shape(config['input_layers'])
-    input_layers = [all_layers[i-1] for i in input_indexes]
+    input_layers = [all_layers[i - 1] for i in input_indexes]
 
     output_indexes = _handle_shape(config['output_layers'])
-    output_layers = [all_layers[i-1] for i in output_indexes]
+    output_layers = [all_layers[i - 1] for i in output_indexes]
 
     return Model(inputs=input_layers, outputs=output_layers)
 
@@ -300,8 +300,7 @@
         options.update((inputs['mode_selection']['compile_params']
                         ['optimizer_selection']['optimizer_options']))
 
-        train_metrics = (inputs['mode_selection']['compile_params']
-                         ['metrics']).split(',')
+        train_metrics = inputs['mode_selection']['compile_params']['metrics']
         if train_metrics[-1] == 'none':
             train_metrics = train_metrics[:-1]
         options['metrics'] = train_metrics
diff -r 7068b5fcd623 -r 1e99cfb71f40 keras_train_and_eval.py
--- a/keras_train_and_eval.py	Thu Oct 01 20:27:36 2020 +0000
+++ b/keras_train_and_eval.py	Tue Apr 13 17:52:15 2021 +0000
@@ -10,7 +10,6 @@
 from scipy.io import mmread
 from sklearn.pipeline import Pipeline
 from sklearn.metrics.scorer import _check_multimetric_scoring
-from sklearn import model_selection
 from sklearn.model_selection._validation import _score
 from sklearn.model_selection import _search, _validation
 from sklearn.utils import indexable, safe_indexing
@@ -18,39 +17,49 @@
 from galaxy_ml.externals.selene_sdk.utils import compute_score
 from galaxy_ml.model_validations import train_test_split
 from galaxy_ml.keras_galaxy_models import _predict_generator
-from galaxy_ml.utils import (SafeEval, get_scoring, load_model,
-                             read_columns, try_get_attr, get_module,
-                             clean_params, get_main_estimator)
+from galaxy_ml.utils import (
+    SafeEval,
+    get_scoring,
+    load_model,
+    read_columns,
+    try_get_attr,
+    get_module,
+    clean_params,
+    get_main_estimator,
+)
 
 
-_fit_and_score = try_get_attr('galaxy_ml.model_validations', '_fit_and_score')
-setattr(_search, '_fit_and_score', _fit_and_score)
-setattr(_validation, '_fit_and_score', _fit_and_score)
+_fit_and_score = try_get_attr("galaxy_ml.model_validations", "_fit_and_score")
+setattr(_search, "_fit_and_score", _fit_and_score)
+setattr(_validation, "_fit_and_score", _fit_and_score)
 
-N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1))
-CACHE_DIR = os.path.join(os.getcwd(), 'cached')
+N_JOBS = int(os.environ.get("GALAXY_SLOTS", 1))
+CACHE_DIR = os.path.join(os.getcwd(), "cached")
 del os
-NON_SEARCHABLE = ('n_jobs', 'pre_dispatch', 'memory', '_path',
-                  'nthread', 'callbacks')
-ALLOWED_CALLBACKS = ('EarlyStopping', 'TerminateOnNaN', 'ReduceLROnPlateau',
-                     'CSVLogger', 'None')
+NON_SEARCHABLE = ("n_jobs", "pre_dispatch", "memory", "_path", "nthread", "callbacks")
+ALLOWED_CALLBACKS = (
+    "EarlyStopping",
+    "TerminateOnNaN",
+    "ReduceLROnPlateau",
+    "CSVLogger",
+    "None",
+)
 
 
 def _eval_swap_params(params_builder):
     swap_params = {}
 
-    for p in params_builder['param_set']:
-        swap_value = p['sp_value'].strip()
-        if swap_value == '':
+    for p in params_builder["param_set"]:
+        swap_value = p["sp_value"].strip()
+        if swap_value == "":
             continue
 
-        param_name = p['sp_name']
+        param_name = p["sp_name"]
         if param_name.lower().endswith(NON_SEARCHABLE):
-            warnings.warn("Warning: `%s` is not eligible for search and was "
-                          "omitted!" % param_name)
+            warnings.warn("Warning: `%s` is not eligible for search and was " "omitted!" % param_name)
             continue
 
-        if not swap_value.startswith(':'):
+        if not swap_value.startswith(":"):
             safe_eval = SafeEval(load_scipy=True, load_numpy=True)
             ev = safe_eval(swap_value)
         else:
@@ -77,34 +86,31 @@
         else:
             new_arrays.append(arr)
 
-    if kwargs['shuffle'] == 'None':
-        kwargs['shuffle'] = None
+    if kwargs["shuffle"] == "None":
+        kwargs["shuffle"] = None
 
-    group_names = kwargs.pop('group_names', None)
+    group_names = kwargs.pop("group_names", None)
 
     if group_names is not None and group_names.strip():
-        group_names = [name.strip() for name in
-                       group_names.split(',')]
+        group_names = [name.strip() for name in group_names.split(",")]
         new_arrays = indexable(*new_arrays)
-        groups = kwargs['labels']
+        groups = kwargs["labels"]
         n_samples = new_arrays[0].shape[0]
         index_arr = np.arange(n_samples)
         test = index_arr[np.isin(groups, group_names)]
         train = index_arr[~np.isin(groups, group_names)]
-        rval = list(chain.from_iterable(
-            (safe_indexing(a, train),
-             safe_indexing(a, test)) for a in new_arrays))
+        rval = list(chain.from_iterable((safe_indexing(a, train), safe_indexing(a, test)) for a in new_arrays))
     else:
         rval = train_test_split(*new_arrays, **kwargs)
 
     for pos in nones:
-        rval[pos * 2: 2] = [None, None]
+        rval[pos * 2 : 2] = [None, None]
 
     return rval
 
 
 def _evaluate(y_true, pred_probas, scorer, is_multimetric=True):
-    """ output scores based on input scorer
+    """output scores based on input scorer
 
     Parameters
     ----------
@@ -118,52 +124,55 @@
     """
     if y_true.ndim == 1 or y_true.shape[-1] == 1:
         pred_probas = pred_probas.ravel()
-        pred_labels = (pred_probas > 0.5).astype('int32')
-        targets = y_true.ravel().astype('int32')
+        pred_labels = (pred_probas > 0.5).astype("int32")
+        targets = y_true.ravel().astype("int32")
         if not is_multimetric:
-            preds = pred_labels if scorer.__class__.__name__ == \
-                '_PredictScorer' else pred_probas
+            preds = pred_labels if scorer.__class__.__name__ == "_PredictScorer" else pred_probas
             score = scorer._score_func(targets, preds, **scorer._kwargs)
 
             return score
         else:
             scores = {}
             for name, one_scorer in scorer.items():
-                preds = pred_labels if one_scorer.__class__.__name__\
-                    == '_PredictScorer' else pred_probas
-                score = one_scorer._score_func(targets, preds,
-                                               **one_scorer._kwargs)
+                preds = pred_labels if one_scorer.__class__.__name__ == "_PredictScorer" else pred_probas
+                score = one_scorer._score_func(targets, preds, **one_scorer._kwargs)
                 scores[name] = score
 
     # TODO: multi-class metrics
     # multi-label
     else:
-        pred_labels = (pred_probas > 0.5).astype('int32')
-        targets = y_true.astype('int32')
+        pred_labels = (pred_probas > 0.5).astype("int32")
+        targets = y_true.astype("int32")
         if not is_multimetric:
-            preds = pred_labels if scorer.__class__.__name__ == \
-                '_PredictScorer' else pred_probas
-            score, _ = compute_score(preds, targets,
-                                     scorer._score_func)
+            preds = pred_labels if scorer.__class__.__name__ == "_PredictScorer" else pred_probas
+            score, _ = compute_score(preds, targets, scorer._score_func)
             return score
         else:
             scores = {}
             for name, one_scorer in scorer.items():
-                preds = pred_labels if one_scorer.__class__.__name__\
-                    == '_PredictScorer' else pred_probas
-                score, _ = compute_score(preds, targets,
-                                         one_scorer._score_func)
+                preds = pred_labels if one_scorer.__class__.__name__ == "_PredictScorer" else pred_probas
+                score, _ = compute_score(preds, targets, one_scorer._score_func)
                 scores[name] = score
 
     return scores
 
 
-def main(inputs, infile_estimator, infile1, infile2,
-         outfile_result, outfile_object=None,
-         outfile_weights=None, outfile_y_true=None,
-         outfile_y_preds=None, groups=None,
-         ref_seq=None, intervals=None, targets=None,
-         fasta_path=None):
+def main(
+    inputs,
+    infile_estimator,
+    infile1,
+    infile2,
+    outfile_result,
+    outfile_object=None,
+    outfile_weights=None,
+    outfile_y_true=None,
+    outfile_y_preds=None,
+    groups=None,
+    ref_seq=None,
+    intervals=None,
+    targets=None,
+    fasta_path=None,
+):
     """
     Parameter
     ---------
@@ -209,19 +218,19 @@
     fasta_path : str
         File path to dataset containing fasta file
     """
-    warnings.simplefilter('ignore')
+    warnings.simplefilter("ignore")
 
-    with open(inputs, 'r') as param_handler:
+    with open(inputs, "r") as param_handler:
         params = json.load(param_handler)
 
     #  load estimator
-    with open(infile_estimator, 'rb') as estimator_handler:
+    with open(infile_estimator, "rb") as estimator_handler:
         estimator = load_model(estimator_handler)
 
     estimator = clean_params(estimator)
 
     # swap hyperparameter
-    swapping = params['experiment_schemes']['hyperparams_swapping']
+    swapping = params["experiment_schemes"]["hyperparams_swapping"]
     swap_params = _eval_swap_params(swapping)
     estimator.set_params(**swap_params)
 
@@ -230,38 +239,39 @@
     # store read dataframe object
     loaded_df = {}
 
-    input_type = params['input_options']['selected_input']
+    input_type = params["input_options"]["selected_input"]
     # tabular input
-    if input_type == 'tabular':
-        header = 'infer' if params['input_options']['header1'] else None
-        column_option = (params['input_options']['column_selector_options_1']
-                         ['selected_column_selector_option'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = params['input_options']['column_selector_options_1']['col1']
+    if input_type == "tabular":
+        header = "infer" if params["input_options"]["header1"] else None
+        column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = params["input_options"]["column_selector_options_1"]["col1"]
         else:
             c = None
 
         df_key = infile1 + repr(header)
-        df = pd.read_csv(infile1, sep='\t', header=header,
-                         parse_dates=True)
+        df = pd.read_csv(infile1, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = df
 
         X = read_columns(df, c=c, c_option=column_option).astype(float)
     # sparse input
-    elif input_type == 'sparse':
-        X = mmread(open(infile1, 'r'))
+    elif input_type == "sparse":
+        X = mmread(open(infile1, "r"))
 
     # fasta_file input
-    elif input_type == 'seq_fasta':
-        pyfaidx = get_module('pyfaidx')
+    elif input_type == "seq_fasta":
+        pyfaidx = get_module("pyfaidx")
         sequences = pyfaidx.Fasta(fasta_path)
         n_seqs = len(sequences.keys())
         X = np.arange(n_seqs)[:, np.newaxis]
         for param in estimator_params.keys():
-            if param.endswith('fasta_path'):
-                estimator.set_params(
-                    **{param: fasta_path})
+            if param.endswith("fasta_path"):
+                estimator.set_params(**{param: fasta_path})
                 break
         else:
             raise ValueError(
@@ -270,25 +280,29 @@
                 "KerasGBatchClassifier with "
                 "FastaDNABatchGenerator/FastaProteinBatchGenerator "
                 "or having GenomeOneHotEncoder/ProteinOneHotEncoder "
-                "in pipeline!")
+                "in pipeline!"
+            )
 
-    elif input_type == 'refseq_and_interval':
+    elif input_type == "refseq_and_interval":
         path_params = {
-            'data_batch_generator__ref_genome_path': ref_seq,
-            'data_batch_generator__intervals_path': intervals,
-            'data_batch_generator__target_path': targets
+            "data_batch_generator__ref_genome_path": ref_seq,
+            "data_batch_generator__intervals_path": intervals,
+            "data_batch_generator__target_path": targets,
         }
         estimator.set_params(**path_params)
         n_intervals = sum(1 for line in open(intervals))
         X = np.arange(n_intervals)[:, np.newaxis]
 
     # Get target y
-    header = 'infer' if params['input_options']['header2'] else None
-    column_option = (params['input_options']['column_selector_options_2']
-                     ['selected_column_selector_option2'])
-    if column_option in ['by_index_number', 'all_but_by_index_number',
-                         'by_header_name', 'all_but_by_header_name']:
-        c = params['input_options']['column_selector_options_2']['col2']
+    header = "infer" if params["input_options"]["header2"] else None
+    column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
+    if column_option in [
+        "by_index_number",
+        "all_but_by_index_number",
+        "by_header_name",
+        "all_but_by_header_name",
+    ]:
+        c = params["input_options"]["column_selector_options_2"]["col2"]
     else:
         c = None
 
@@ -296,37 +310,35 @@
     if df_key in loaded_df:
         infile2 = loaded_df[df_key]
     else:
-        infile2 = pd.read_csv(infile2, sep='\t',
-                              header=header, parse_dates=True)
+        infile2 = pd.read_csv(infile2, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = infile2
 
-    y = read_columns(
-            infile2,
-            c=c,
-            c_option=column_option,
-            sep='\t',
-            header=header,
-            parse_dates=True)
+    y = read_columns(infile2,
+                     c=c,
+                     c_option=column_option,
+                     sep='\t',
+                     header=header,
+                     parse_dates=True)
     if len(y.shape) == 2 and y.shape[1] == 1:
         y = y.ravel()
-    if input_type == 'refseq_and_interval':
-        estimator.set_params(
-            data_batch_generator__features=y.ravel().tolist())
+    if input_type == "refseq_and_interval":
+        estimator.set_params(data_batch_generator__features=y.ravel().tolist())
         y = None
     # end y
 
     # load groups
     if groups:
-        groups_selector = (params['experiment_schemes']['test_split']
-                                 ['split_algos']).pop('groups_selector')
+        groups_selector = (params["experiment_schemes"]["test_split"]["split_algos"]).pop("groups_selector")
 
-        header = 'infer' if groups_selector['header_g'] else None
-        column_option = \
-            (groups_selector['column_selector_options_g']
-                            ['selected_column_selector_option_g'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = groups_selector['column_selector_options_g']['col_g']
+        header = "infer" if groups_selector["header_g"] else None
+        column_option = groups_selector["column_selector_options_g"]["selected_column_selector_option_g"]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = groups_selector["column_selector_options_g"]["col_g"]
         else:
             c = None
 
@@ -334,13 +346,12 @@
         if df_key in loaded_df:
             groups = loaded_df[df_key]
 
-        groups = read_columns(
-                groups,
-                c=c,
-                c_option=column_option,
-                sep='\t',
-                header=header,
-                parse_dates=True)
+        groups = read_columns(groups,
+                              c=c,
+                              c_option=column_option,
+                              sep='\t',
+                              header=header,
+                              parse_dates=True)
         groups = groups.ravel()
 
     # del loaded_df
@@ -349,86 +360,99 @@
     # cache iraps_core fits could increase search speed significantly
     memory = joblib.Memory(location=CACHE_DIR, verbose=0)
     main_est = get_main_estimator(estimator)
-    if main_est.__class__.__name__ == 'IRAPSClassifier':
+    if main_est.__class__.__name__ == "IRAPSClassifier":
         main_est.set_params(memory=memory)
 
     # handle scorer, convert to scorer dict
     scoring = params['experiment_schemes']['metrics']['scoring']
+    if scoring is not None:
+        # get_scoring() expects secondary_scoring to be a comma separated string (not a list)
+        # Check if secondary_scoring is specified
+        secondary_scoring = scoring.get("secondary_scoring", None)
+        if secondary_scoring is not None:
+            # If secondary_scoring is specified, convert the list into comman separated string
+            scoring["secondary_scoring"] = ",".join(scoring["secondary_scoring"])
+
     scorer = get_scoring(scoring)
     scorer, _ = _check_multimetric_scoring(estimator, scoring=scorer)
 
     # handle test (first) split
-    test_split_options = (params['experiment_schemes']
-                                ['test_split']['split_algos'])
+    test_split_options = params["experiment_schemes"]["test_split"]["split_algos"]
 
-    if test_split_options['shuffle'] == 'group':
-        test_split_options['labels'] = groups
-    if test_split_options['shuffle'] == 'stratified':
+    if test_split_options["shuffle"] == "group":
+        test_split_options["labels"] = groups
+    if test_split_options["shuffle"] == "stratified":
         if y is not None:
-            test_split_options['labels'] = y
+            test_split_options["labels"] = y
         else:
-            raise ValueError("Stratified shuffle split is not "
-                             "applicable on empty target values!")
+            raise ValueError("Stratified shuffle split is not " "applicable on empty target values!")
 
-    X_train, X_test, y_train, y_test, groups_train, groups_test = \
-        train_test_split_none(X, y, groups, **test_split_options)
+    (
+        X_train,
+        X_test,
+        y_train,
+        y_test,
+        groups_train,
+        _groups_test,
+    ) = train_test_split_none(X, y, groups, **test_split_options)
 
-    exp_scheme = params['experiment_schemes']['selected_exp_scheme']
+    exp_scheme = params["experiment_schemes"]["selected_exp_scheme"]
 
     # handle validation (second) split
-    if exp_scheme == 'train_val_test':
-        val_split_options = (params['experiment_schemes']
-                                   ['val_split']['split_algos'])
+    if exp_scheme == "train_val_test":
+        val_split_options = params["experiment_schemes"]["val_split"]["split_algos"]
 
-        if val_split_options['shuffle'] == 'group':
-            val_split_options['labels'] = groups_train
-        if val_split_options['shuffle'] == 'stratified':
+        if val_split_options["shuffle"] == "group":
+            val_split_options["labels"] = groups_train
+        if val_split_options["shuffle"] == "stratified":
             if y_train is not None:
-                val_split_options['labels'] = y_train
+                val_split_options["labels"] = y_train
             else:
-                raise ValueError("Stratified shuffle split is not "
-                                 "applicable on empty target values!")
+                raise ValueError("Stratified shuffle split is not " "applicable on empty target values!")
 
-        X_train, X_val, y_train, y_val, groups_train, groups_val = \
-            train_test_split_none(X_train, y_train, groups_train,
-                                  **val_split_options)
+        (
+            X_train,
+            X_val,
+            y_train,
+            y_val,
+            groups_train,
+            _groups_val,
+        ) = train_test_split_none(X_train, y_train, groups_train, **val_split_options)
 
     # train and eval
-    if hasattr(estimator, 'validation_data'):
-        if exp_scheme == 'train_val_test':
-            estimator.fit(X_train, y_train,
-                          validation_data=(X_val, y_val))
+    if hasattr(estimator, "validation_data"):
+        if exp_scheme == "train_val_test":
+            estimator.fit(X_train, y_train, validation_data=(X_val, y_val))
         else:
-            estimator.fit(X_train, y_train,
-                          validation_data=(X_test, y_test))
+            estimator.fit(X_train, y_train, validation_data=(X_test, y_test))
     else:
         estimator.fit(X_train, y_train)
 
-    if hasattr(estimator, 'evaluate'):
+    if hasattr(estimator, "evaluate"):
         steps = estimator.prediction_steps
         batch_size = estimator.batch_size
-        generator = estimator.data_generator_.flow(X_test, y=y_test,
-                                                   batch_size=batch_size)
-        predictions, y_true = _predict_generator(estimator.model_, generator,
-                                                 steps=steps)
+        generator = estimator.data_generator_.flow(X_test, y=y_test, batch_size=batch_size)
+        predictions, y_true = _predict_generator(estimator.model_, generator, steps=steps)
         scores = _evaluate(y_true, predictions, scorer, is_multimetric=True)
 
     else:
-        if hasattr(estimator, 'predict_proba'):
+        if hasattr(estimator, "predict_proba"):
             predictions = estimator.predict_proba(X_test)
         else:
             predictions = estimator.predict(X_test)
 
         y_true = y_test
-        scores = _score(estimator, X_test, y_test, scorer,
-                        is_multimetric=True)
+        scores = _score(estimator, X_test, y_test, scorer, is_multimetric=True)
     if outfile_y_true:
         try:
-            pd.DataFrame(y_true).to_csv(outfile_y_true, sep='\t',
-                                        index=False)
+            pd.DataFrame(y_true).to_csv(outfile_y_true, sep="\t", index=False)
             pd.DataFrame(predictions).astype(np.float32).to_csv(
-                outfile_y_preds, sep='\t', index=False,
-                float_format='%g', chunksize=10000)
+                outfile_y_preds,
+                sep="\t",
+                index=False,
+                float_format="%g",
+                chunksize=10000,
+            )
         except Exception as e:
             print("Error in saving predictions: %s" % e)
 
@@ -437,8 +461,7 @@
         scores[name] = [score]
     df = pd.DataFrame(scores)
     df = df[sorted(df.columns)]
-    df.to_csv(path_or_buf=outfile_result, sep='\t',
-              header=True, index=False)
+    df.to_csv(path_or_buf=outfile_result, sep="\t", header=True, index=False)
 
     memory.clear(warn=False)
 
@@ -447,23 +470,22 @@
         if isinstance(estimator, Pipeline):
             main_est = estimator.steps[-1][-1]
 
-        if hasattr(main_est, 'model_') \
-                and hasattr(main_est, 'save_weights'):
+        if hasattr(main_est, "model_") and hasattr(main_est, "save_weights"):
             if outfile_weights:
                 main_est.save_weights(outfile_weights)
             del main_est.model_
             del main_est.fit_params
             del main_est.model_class_
-            del main_est.validation_data
-            if getattr(main_est, 'data_generator_', None):
+            if getattr(main_est, "validation_data", None):
+                del main_est.validation_data
+            if getattr(main_est, "data_generator_", None):
                 del main_est.data_generator_
 
-        with open(outfile_object, 'wb') as output_handler:
-            pickle.dump(estimator, output_handler,
-                        pickle.HIGHEST_PROTOCOL)
+        with open(outfile_object, "wb") as output_handler:
+            pickle.dump(estimator, output_handler, pickle.HIGHEST_PROTOCOL)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-e", "--estimator", dest="infile_estimator")
@@ -481,11 +503,19 @@
     aparser.add_argument("-f", "--fasta_path", dest="fasta_path")
     args = aparser.parse_args()
 
-    main(args.inputs, args.infile_estimator, args.infile1, args.infile2,
-         args.outfile_result, outfile_object=args.outfile_object,
-         outfile_weights=args.outfile_weights,
-         outfile_y_true=args.outfile_y_true,
-         outfile_y_preds=args.outfile_y_preds,
-         groups=args.groups,
-         ref_seq=args.ref_seq, intervals=args.intervals,
-         targets=args.targets, fasta_path=args.fasta_path)
+    main(
+        args.inputs,
+        args.infile_estimator,
+        args.infile1,
+        args.infile2,
+        args.outfile_result,
+        outfile_object=args.outfile_object,
+        outfile_weights=args.outfile_weights,
+        outfile_y_true=args.outfile_y_true,
+        outfile_y_preds=args.outfile_y_preds,
+        groups=args.groups,
+        ref_seq=args.ref_seq,
+        intervals=args.intervals,
+        targets=args.targets,
+        fasta_path=args.fasta_path,
+    )
diff -r 7068b5fcd623 -r 1e99cfb71f40 main_macros.xml
--- a/main_macros.xml	Thu Oct 01 20:27:36 2020 +0000
+++ b/main_macros.xml	Tue Apr 13 17:52:15 2021 +0000
@@ -1,1952 +1,1940 @@
 
-  1.0.8.2
+    1.0.8.3
 
-  
-      
-          python
-          Galaxy-ML
-          
-      
-  
+    
+        
+            Galaxy-ML
+            
+        
+    
 
-  
-    
-        
-    
-  
+    
+        
+            
+        
+    
 
 
-  
+    
 
-  
-    
-        
-            
-            
-        
-        
-            
-            
-            
-            
-                
-                    
-                    
-                
-                
-                
-                
-                
-            
-        
-        
-            
-                
-            
-        
-    
-  
+    
+        
+            
+                
+                
+            
+            
+                
+                
+                
+                
+                    
+                        
+                        
+                    
+                    
+                    
+                    
+                    
+                
+            
+            
+                
+                    
+                
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
 
-  
-  
-    
-        
-        
-        
-        
-        
-    
-  
+    
+    
+        
+            
+            
+            
+            
+            
+        
+    
 
-  
-    
-        
-        
-        
-        
-        
-    
-  
+    
+        
+            
+            
+            
+            
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-        
-        
-        
-        
-    
-  
+    
+        
+            
+            
+            
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-    
-    
-    
-    
-    
-    
-    
-        
-        
-        
-        
-        
-        
-    
-    
-  
+    
+        
+        
+        
+        
+        
+        
+        
+        
+            
+            
+            
+            
+            
+            
+        
+        
+    
 
-  
-  
-    
-  
+    
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-        
-        
-        
-    
-  
+    
+        
+            
+            
+            
+        
+    
 
-  
-    
-      
-      
-      
-    
-  
+    
+        
+            
+            
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-      
-        
-        
-        
-        
-      
-      
-      
-      
-      
-      
-      
-      
-        
-      
-    
-  
+    
+        
+            
+                
+                
+                
+                
+            
+            
+            
+            
+            
+            
+            
+            
+                
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-      
-      
-      
-    
-  
+    
+        
+            
+            
+            
+        
+    
 
-  
-  
-    
-  
-  
-  
-    
-  
+    
+    
+        
+    
+
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
 
-  
-  
-        
-  
+    
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-      
-  
+    
+        
+    
 
-  
-      
-  
+    
+        
+    
 
-  
-      
-  
+    
+        
+    
 
-  
-      
-          
-          
-      
-  
+    
+        
+            
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-      
-      
-      
-      
-      
-      
-    
-  
+    
+        
+            
+            
+            
+            
+            
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
 
-  
+    
+
+    
+        
+        
+        
+            
+        
+        
+        
+        
+            
+        
+        
+    
+
+    
+        
+            
+            
+            
+            
+            
+        
+        
+            
+        
+        
+            
+        
+        
+            
+        
+        
+            
+        
+        
+        
+    
 
-  
-    
-    
-    
-      
-    
-    
-    
-    
-      
-    
-    
-  
+    
+        
+            
+                
+                
+            
+            
+                
+                
+            
+            
+                
+            
+        
+        
+            
+                
+                
+            
+            
+                
+                
+            
+            
+                
+            
+        
+    
+
+    
+        
+        
+        
+            
+        
+        
+        
+        
+            
+        
+    
+
+    
+        
+            
+        
+    
+
+    
+        
+        
+    
+
+    
+        
+            
+            
+        
+    
+
+    
+        
+            
+                
+                
+            
+            
+                
+                    
+                
+                
+                    
+                
+            
+        
+    
+
+    
+        
+            
+            
+            
+        
+    
 
-  
-    
-      
-      
-      
-      
-      
-    
-    
-      
-    
-    
-      
-    
-    
-      
-    
-    
-      
-    
-    
-    
-  
+    
+        
+            
+        
+        
+            
+        
+        
+    
+
+    
+        
+        
+        
+            
+        
+    
+
+    
+        
+        
+    
+
+    
+        
+        
+        
+        
+        
+        
+            
+        
+    
+
+    
+    
+        
+            
+            
+                
+                
+            
+            
+                
+                
+                
+                
+            
+            
+            
+            
+            
+        
+    
+
+    
+        
+            
+            
+                
+                
+                
+                
+                
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+        
+    
+
+    
+        
+            
+            
+                
+                
+                
+                
+            
+            
+            
+            
+            
+                
+                
+                
+            
+            
+            
+            
+                
+                
+            
+            
+            
+            
+        
+    
+
+    
+        
+    
+
+    
+        
+    
 
-  
-    
-      
-          
-          
-      
-      
-        
-        
-      
-      
-          
-      
-    
-    
-      
-          
-          
-      
-      
-        
-        
-      
-      
-          
-      
-    
-  
+    
+        
+            
+            
+            
+        
+    
+
+    
+        
+    
+
+    
+        
+            
+            
+            
+            
+                
+                
+                
+                
+            
+            
+        
+    
+
+    
+        
+            
+                
+                
+                
+                
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
+
+    
+        
+            
+            
+            
+            
+            
+            
+            
+        
+    
+
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
+
+    
+        
+            
+            
+            
+            
+            
+            
+        
+    
+
+    
+        
+            
+            
+            
+            
+        
+    
+
+    
+        
+        
+        
+        
+        
+        
+        
+        
+    
 
-  
-    
-    
-    
-      
-    
-    
-    
-    
-      
-    
-  
+    
+        
+            
+        
+        
+            
+        
+    
 
-  
-    
-        
-    
-  
+    
+        
+            
+        
+    
+
+    
+        
+            
+            
+            
+            
+            
+        
+    
+
+    
+        
+            
+            
+            
+            
+            
+            
+            
+        
+    
 
-  
-    
-    
-  
-
-  
-    
-        
-        
-    
-  
+    
+        
+            
+        
+        
+            
+        
+        
+            
+        
+        
+            
+        
+        
+    
 
-  
-    
-        
-            
-            
-        
-        
-            
-                
+    
+        
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
             
-            
-                
+            
+                
+                    
+                    
+                        
+                        
+                    
+                    
+                    
+                    
+                
+            
+            
+                
+                    
+                        
+                        
+                    
+                    
+                
+            
+            
+                
+                    
+                    
+                        
+                        
+                        
+                    
+                    
+                        
+                        
+                        
+                    
+                
             
         
-    
-  
-
-  
-    
-        
-        
-        
-    
-  
+    
 
-  
-    
-        
-    
-    
-        
-    
-    
-  
-
-  
-    
-    
-    
-      
-    
-  
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
 
-  
-    
-    
-  
-
-  
-    
-    
-    
-    
-    
-    
-      
-    
-  
-
-  
-  
-    
-      
-      
-          
-          
-      
-      
-          
-          
-          
-          
-      
-      
-      
-      
-      
-    
-  
+    
+        
+            
+        
+        
+            
+            
+            
+        
+        
+            
+            
+            
+        
+        
+        
+        
+            
+        
+        
+            
+            
+            
+        
+        
+            
+            
+            
+        
+        
+            
+            
+            
+        
+        
+            
+            
+            
+        
+        
+            
+            
+        
+        
+            
+        
+        
+            
+            
+            
+        
+        
+            
+            
+            
+        
+        
+    
 
-  
-    
-        
-        
-            
-            
-            
-            
-            
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-    
-  
+    
+        
+            
+                
+                    
+                    
+                    
+                    
+                
+            
+            
+                
+                    
+                    
+                
+                
+                    
+                    
+                    
+                    
+                
+                
+                    
+                
+                
+                    
+                    
+                
+            
+        
+    
+
+    
+        
+            
+                
+            
+            
+        
+    
+
+    
+        
+    
+
+    
+        
+    
+
+    
+        
+    
 
-  
-    
-        
-        
-            
-            
-            
-            
-        
-        
-        
-        
-        
-            
-            
-            
-        
-        
-        
-        
-            
-            
-        
-        
-        
-        
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+            
+                
+                
+                
+                
+            
+            
+                
+            
+            
+                
+                
+            
+            
+                
+                
+            
+            
+                
+                
+                
+                
+            
+        
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-      
-      
-      
-    
-  
-
-  
-    
-  
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
 
-  
-    
-      
-      
-      
-      
-          
-          
-          
-          
-      
-      
-    
-  
-
-  
-    
-      
-          
-          
-          
-          
-          
-      
-      
-          
-      
-      
-          
-      
-      
-          
-      
-      
-          
-      
-      
-          
-      
-    
-  
+    
+        
+            
+            
+                
+                    
+                    
+                    
+                    
+                    
+                
+                
+            
+        
+        
+            
+            
+        
+        
+            
+            
+        
+        
+            
+            
+        
+        
+            
+            
+        
+        
+            
+            
+        
+        
+            
+        
+    
 
-  
-    
-      
-      
-      
-      
-      
-      
-      
-    
-  
+    
+        
+            
+                
+                    
+                    
+                
+                
+                    
+                
+                
+                    
+                
+            
+            
+        
+    
 
-  
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-  
+    
+        
+            
+                
+                    
+                
+                
+                    
+                
+            
+            
+        
+    
 
-  
-    
-      
-      
-      
-      
-      
-      
-    
-  
+    
+        
+    
 
-  
-    
-      
-      
-      
-      
-    
-  
-
-  
-    
-    
-    
-    
-    
-    
-    
-    
-  
+    
+        
+            
+            
+        
+    
 
-  
-    
-      
-    
-    
-      
-    
-  
+    
+        
+            
+            
+        
+    
 
-  
-    
-      
-    
-  
+    
+        
+            
+            
+        
+    
 
-  
-    
-      
-      
-      
-      
-      
-    
-  
-
-  
-    
-      
-      
-      
-      
-      
-      
-      
-    
-  
+    
+        
+            
+            
+                
+                    
+                        
+                            
+                            
+                        
+                    
+                
+                
+                
+                
+                
+            
+        
+    
 
-  
-    
-        
-    
-    
-      
-    
-    
-      
-    
-    
-      
-        
-          
-          
-          
+    
+        
+        
+            
+                
+            
+            
+            
+            
+                
+            
+            
+                
+            
+            
+        
+    
+
+    
+        
+            
+                
+                    
+                
+            
+            
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
+
+    
+        
+            
+            
+            
+            
+            
         
-        
-      
-    
-    
-  
+    
 
-  
-    
-      
-        
-      
-      
-          
-      
-      
-          
-      
-      
-          
-      
-      
-          
-              
-              
-                  
-                  
-              
-              
-              
-              
-          
-      
-      
-          
-              
-                  
-                  
-              
-              
-          
-      
-      
-          
-              
-              
-                  
-                  
-                  
-              
-              
-                  
-                  
-                  
-              
-          
-      
-    
-  
-
-  
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-  
-
-  
-    
-      
-    
-    
-      
-      
-      
-    
-    
-      
-      
-      
-    
-    
-    
-    
-      
-    
-    
-      
-      
-      
-    
-    
-      
-      
-      
-    
-    
-      
-      
-      
-    
-    
-      
-      
-      
-    
-    
-      
-      
-    
-    
-      
-    
-    
-      
-      
-      
-    
-    
-      
-      
-      
-    
-    
-  
+    
+        
+        
+        
+    
 
-  
-    
-      
-        
-          
-          
-          
-          
-        
-      
-      
-        
-          
-          
-        
-        
-          
-          
-          
-          
-        
-        
-          
-        
-        
-          
-          
-        
-      
-    
-  
-
-  
-    
-      
-        
-      
-      
-    
-  
-
-  
-    
-  
-
-  
-    
-  
-
-  
-    
-  
-
-  
-    
-  
-
-  
-    
-      
-        
-        
-        
-        
-      
-      
-        
-      
-      
-        
-        
-      
-      
-        
-        
-      
-      
-        
-        
-        
-        
-      
-    
-    
-  
-
-  
-    
-  
-
-  
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-  
-
-  
-    
-      
-      
-        
-          
-          
-          
-          
-          
-        
-        
-      
-    
-    
-      
-      
-    
-    
-      
-      
-    
-    
-      
-      
-    
-    
-      
-      
-    
-    
-      
-      
-    
-    
-      
-    
-  
-
-  
-    
-      
-        
-          
-          
-        
-        
-          
-        
-        
-          
-        
-      
-      
-    
-  
-
-  
-    
-      
-        
-          
-        
-        
-          
-        
-      
-      
-    
-  
-
-  
-    
-  
-
-  
-    
-      
-      
-    
-  
+    
+        
+            
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+            
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
 
-  
-    
-      
-      
-    
-  
-
-  
-    
-      
-      
-    
-  
+    
+        
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+        
+    
 
-  
-    
-      
-      
-        
-          
-            
-              
-              
-            
-          
+    
+        
+            
+            
+            
+            
+            
+            
+            
         
-        
-        
-        
-        
-      
-    
-  
+    
 
-  
-    
-    
-      
-        
-      
-      
-      
-      
-        
-        
-      
-        
-      
-      
-    
-  
+    
+        
+            
+            
+        
+    
 
-  
-    
-      
-        
-          
-        
-      
-      
-      
-      
-        
-        
-      
-        
-      
-      
-        
-      
-    
-  
-
-  
-    
-      
-      
-      
-      
-      
-    
-  
-
-  
-    
-    
-    
-  
+    
+        
+    
 
-  
-    
-      
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-    
-  
+    
+        
+        
+            
+            
+                
+                    
+                        
+                        
+                        
+                    
+                
+                
+                    
+                        
+                            
+                            
+                        
+                    
+                
+            
+        
+    
+
+    
+        
+        
+        
+        
+        
+        
+        
+    
+
+    
+        
+        
+        
+        
+        
+        
+        
+        
+    
 
-  
-    
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-    
-  
-
-  
-    
-      
-      
-      
-      
-      
-      
-      
-    
-  
-
-  
-    
-      
-      
-    
-  
-
-  
-    
-  
-
-  
-    
-    
-      
-      
-          
-            
-              
-              
-              
-            
-          
-          
-            
-              
-                
-                
-              
-            
-          
-      
-    
-  
-
-  
-      
-      
-      
-      
-      
-      
-      
-  
-
-  
-      
-      
-      
-      
-      
-      
-      
-      
-  
+    
+        
+            
+                
+                
+                
+                
+                
+                
+                
+            
+            
+        
+        
+            
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+            
+            
+        
+        
+            
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+            
+            
+        
+        
+            
+                
+                
+                
+            
+            
+        
+        
+            
+                
+                
+                
+                
+            
+            
+        
+        
+            
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+            
+            
+        
+        
+            
+                
+                
+            
+            
+        
+        
+    
 
-  
-      
-        
-          
-          
-          
-          
-          
-          
-          
-        
-        
-      
-      
-        
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-        
-        
-      
-      
-        
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-        
-        
-      
-      
-        
-          
-          
-          
-        
-        
-      
-      
-        
-          
-          
-          
-          
-        
-        
-      
-      
-        
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-        
-        
-      
-      
-        
-          
-          
+    
+        
+            
+                
+            
+            
+        
+    
+
+    
+        
+            
+                
+                    
+                
+            
+            
+                
+                    
+                
+            
+        
+    
+
+    
+        
+            
+                
+                    
+                
+            
         
-        
-      
-      
-  
-
-  
-    
-      
-        
-      
-      
-    
-  
-
-  
-    
-      
-        
-            
-        
-      
-      
-        
-            
-        
-      
-    
-  
+    
 
-  
-    
-      
-        
-          
-        
-      
-    
-  
-
-  
-    
-      
-        
-        
-        
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-    
-  
+    
+        
+            
+                
+                
+                
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
 
-  
-    
-      
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-    
-  
+    
+        
+            
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
 
-  
-    
-      
-        
-      
-      
-        
-      
-    
-  
+    
+        
+            
+                
+            
+            
+                
+            
+        
+    
 
-  
-    
-      
-        
-        
-        
-        
-        
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
-    
-  
+        
+    
 
-  
-    
-      
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-    
-  
+    
+        
+            
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-        
-            
-                
+    
+        
+            
+                
+                    
+                
+            
+            
+                
+                    
+                
             
-        
-        
-            
-                
-            
-        
-    
-  
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-        
-            
-            
-        
-        
-            
-        
-        
-            
-        
-    
-  
+    
+        
+            
+                
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
 
-  
-    
-    
-  
+    
+        
+        
+    
 
-  
+    
 
-  
-    
-      
-          selected_tasks['selected_task'] == 'load'
-      
-      
-          selected_tasks['selected_task'] == 'train'
-      
-    
-  
+    
+        
+            
+                selected_tasks['selected_task'] == 'load'
+            
+            
+                selected_tasks['selected_task'] == 'train'
+            
+        
+    
 
-  
-  
-    
-        10.5281/zenodo.15094
-    
-  
+    
+    
+        
+            10.5281/zenodo.15094
+        
+    
 
-  
-    
-        
-          @article{scikit-learn,
-            title={Scikit-learn: Machine Learning in {P}ython},
-            author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
+    
+        
+            
+          @article{scikit-learn, title={Scikit-learn: Machine Learning in {P}ython}, author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
                     and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
                     and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
-                    Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
-            journal={Journal of Machine Learning Research},
-            volume={12},
-            pages={2825--2830},
-            year={2011}
+                    Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, journal={Journal of Machine Learning Research}, volume={12}, pages={2825--2830}, year={2011}
           }
-        
-        
-    
-  
+            
+            
+        
+    
 
-  
-    
-        
+    
+        
+            
           @Misc{,
           author =    {Eric Jones and Travis Oliphant and Pearu Peterson and others},
           title =     {{SciPy}: Open source scientific tools for {Python}},
@@ -1954,12 +1942,12 @@
           url = "http://www.scipy.org/",
           note = {[Online; accessed 2016-04-09]}
         }
-        
-    
-  
+            
+        
+    
 
-  
-    
+    
+        
       @article{DBLP:journals/corr/abs-1711-08477,
         author    = {Ryan J. Urbanowicz and
                     Randal S. Olson and
@@ -1977,11 +1965,11 @@
         biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1711-08477},
         bibsource = {dblp computer science bibliography, https://dblp.org}
       }
-    
-  
+        
+    
 
-  
-    
+    
+        
       @inproceedings{Chen:2016:XST:2939672.2939785,
         author = {Chen, Tianqi and Guestrin, Carlos},
         title = {{XGBoost}: A Scalable Tree Boosting System},
@@ -1999,11 +1987,11 @@
         address = {New York, NY, USA},
         keywords = {large-scale machine learning},
       }
-    
-  
+        
+    
 
-  
-    
+    
+        
       @article{JMLR:v18:16-365,
         author  = {Guillaume  Lema{{\^i}}tre and Fernando Nogueira and Christos K. Aridas},
         title   = {Imbalanced-learn: A Python Toolbox to Tackle the Curse of Imbalanced Datasets in Machine Learning},
@@ -2014,22 +2002,14 @@
         pages   = {1-5},
         url     = {http://jmlr.org/papers/v18/16-365.html}
       }
-    
-  
+        
+    
 
-  
-    
-      @article{chen2019selene,
-        title={Selene: a PyTorch-based deep learning library for sequence data},
-        author={Chen, Kathleen M and Cofer, Evan M and Zhou, Jian and Troyanskaya, Olga G},
-        journal={Nature methods},
-        volume={16},
-        number={4},
-        pages={315},
-        year={2019},
-        publisher={Nature Publishing Group}
+    
+        
+      @article{chen2019selene, title={Selene: a PyTorch-based deep learning library for sequence data}, author={Chen, Kathleen M and Cofer, Evan M and Zhou, Jian and Troyanskaya, Olga G}, journal={Nature methods}, volume={16}, number={4}, pages={315}, year={2019}, publisher={Nature Publishing Group}
       }
-    
-  
+        
+    
 
 
diff -r 7068b5fcd623 -r 1e99cfb71f40 ml_visualization_ex.py
--- a/ml_visualization_ex.py	Thu Oct 01 20:27:36 2020 +0000
+++ b/ml_visualization_ex.py	Tue Apr 13 17:52:15 2021 +0000
@@ -22,16 +22,16 @@
 
 # plotly default colors
 default_colors = [
-    '#1f77b4',  # muted blue
-    '#ff7f0e',  # safety orange
-    '#2ca02c',  # cooked asparagus green
-    '#d62728',  # brick red
-    '#9467bd',  # muted purple
-    '#8c564b',  # chestnut brown
-    '#e377c2',  # raspberry yogurt pink
-    '#7f7f7f',  # middle gray
-    '#bcbd22',  # curry yellow-green
-    '#17becf'   # blue-teal
+    "#1f77b4",  # muted blue
+    "#ff7f0e",  # safety orange
+    "#2ca02c",  # cooked asparagus green
+    "#d62728",  # brick red
+    "#9467bd",  # muted purple
+    "#8c564b",  # chestnut brown
+    "#e377c2",  # raspberry yogurt pink
+    "#7f7f7f",  # middle gray
+    "#bcbd22",  # curry yellow-green
+    "#17becf",  # blue-teal
 ]
 
 
@@ -52,46 +52,31 @@
         y_true = df1.iloc[:, idx].values
         y_score = df2.iloc[:, idx].values
 
-        precision, recall, _ = precision_recall_curve(
-            y_true, y_score, pos_label=pos_label)
-        ap = average_precision_score(
-            y_true, y_score, pos_label=pos_label or 1)
+        precision, recall, _ = precision_recall_curve(y_true, y_score, pos_label=pos_label)
+        ap = average_precision_score(y_true, y_score, pos_label=pos_label or 1)
 
         trace = go.Scatter(
             x=recall,
             y=precision,
-            mode='lines',
-            marker=dict(
-                color=default_colors[idx % len(default_colors)]
-            ),
-            name='%s (area = %.3f)' % (idx, ap)
+            mode="lines",
+            marker=dict(color=default_colors[idx % len(default_colors)]),
+            name="%s (area = %.3f)" % (idx, ap),
         )
         data.append(trace)
 
     layout = go.Layout(
-        xaxis=dict(
-            title='Recall',
-            linecolor='lightslategray',
-            linewidth=1
-        ),
-        yaxis=dict(
-            title='Precision',
-            linecolor='lightslategray',
-            linewidth=1
-        ),
+        xaxis=dict(title="Recall", linecolor="lightslategray", linewidth=1),
+        yaxis=dict(title="Precision", linecolor="lightslategray", linewidth=1),
         title=dict(
-            text=title or 'Precision-Recall Curve',
+            text=title or "Precision-Recall Curve",
             x=0.5,
             y=0.92,
-            xanchor='center',
-            yanchor='top'
+            xanchor="center",
+            yanchor="top",
         ),
-        font=dict(
-            family="sans-serif",
-            size=11
-        ),
+        font=dict(family="sans-serif", size=11),
         # control backgroud colors
-        plot_bgcolor='rgba(255,255,255,0)'
+        plot_bgcolor="rgba(255,255,255,0)",
     )
     """
     legend=dict(
@@ -112,45 +97,47 @@
 
     plotly.offline.plot(fig, filename="output.html", auto_open=False)
     # to be discovered by `from_work_dir`
-    os.rename('output.html', 'output')
+    os.rename("output.html", "output")
 
 
 def visualize_pr_curve_matplotlib(df1, df2, pos_label, title=None):
-    """visualize pr-curve using matplotlib and output svg image
-    """
+    """visualize pr-curve using matplotlib and output svg image"""
     backend = matplotlib.get_backend()
     if "inline" not in backend:
         matplotlib.use("SVG")
-    plt.style.use('seaborn-colorblind')
+    plt.style.use("seaborn-colorblind")
     plt.figure()
 
     for idx in range(df1.shape[1]):
         y_true = df1.iloc[:, idx].values
         y_score = df2.iloc[:, idx].values
 
-        precision, recall, _ = precision_recall_curve(
-            y_true, y_score, pos_label=pos_label)
-        ap = average_precision_score(
-            y_true, y_score, pos_label=pos_label or 1)
+        precision, recall, _ = precision_recall_curve(y_true, y_score, pos_label=pos_label)
+        ap = average_precision_score(y_true, y_score, pos_label=pos_label or 1)
 
-        plt.step(recall, precision, 'r-', color="black", alpha=0.3,
-                 lw=1, where="post", label='%s (area = %.3f)' % (idx, ap))
+        plt.step(
+            recall,
+            precision,
+            "r-",
+            color="black",
+            alpha=0.3,
+            lw=1,
+            where="post",
+            label="%s (area = %.3f)" % (idx, ap),
+        )
 
     plt.xlim([0.0, 1.0])
     plt.ylim([0.0, 1.05])
-    plt.xlabel('Recall')
-    plt.ylabel('Precision')
-    title = title or 'Precision-Recall Curve'
+    plt.xlabel("Recall")
+    plt.ylabel("Precision")
+    title = title or "Precision-Recall Curve"
     plt.title(title)
     folder = os.getcwd()
     plt.savefig(os.path.join(folder, "output.svg"), format="svg")
-    os.rename(os.path.join(folder, "output.svg"),
-              os.path.join(folder, "output"))
+    os.rename(os.path.join(folder, "output.svg"), os.path.join(folder, "output"))
 
 
-def visualize_roc_curve_plotly(df1, df2, pos_label,
-                               drop_intermediate=True,
-                               title=None):
+def visualize_roc_curve_plotly(df1, df2, pos_label, drop_intermediate=True, title=None):
     """output roc-curve in html using plotly
 
     df1 : pandas.DataFrame
@@ -169,45 +156,31 @@
         y_true = df1.iloc[:, idx].values
         y_score = df2.iloc[:, idx].values
 
-        fpr, tpr, _ = roc_curve(y_true, y_score, pos_label=pos_label,
-                                drop_intermediate=drop_intermediate)
+        fpr, tpr, _ = roc_curve(y_true, y_score, pos_label=pos_label, drop_intermediate=drop_intermediate)
         roc_auc = auc(fpr, tpr)
 
         trace = go.Scatter(
             x=fpr,
             y=tpr,
-            mode='lines',
-            marker=dict(
-                color=default_colors[idx % len(default_colors)]
-            ),
-            name='%s (area = %.3f)' % (idx, roc_auc)
+            mode="lines",
+            marker=dict(color=default_colors[idx % len(default_colors)]),
+            name="%s (area = %.3f)" % (idx, roc_auc),
         )
         data.append(trace)
 
     layout = go.Layout(
-        xaxis=dict(
-            title='False Positive Rate',
-            linecolor='lightslategray',
-            linewidth=1
-        ),
-        yaxis=dict(
-            title='True Positive Rate',
-            linecolor='lightslategray',
-            linewidth=1
-        ),
+        xaxis=dict(title="False Positive Rate", linecolor="lightslategray", linewidth=1),
+        yaxis=dict(title="True Positive Rate", linecolor="lightslategray", linewidth=1),
         title=dict(
-            text=title or 'Receiver Operating Characteristic (ROC) Curve',
+            text=title or "Receiver Operating Characteristic (ROC) Curve",
             x=0.5,
             y=0.92,
-            xanchor='center',
-            yanchor='top'
+            xanchor="center",
+            yanchor="top",
         ),
-        font=dict(
-            family="sans-serif",
-            size=11
-        ),
+        font=dict(family="sans-serif", size=11),
         # control backgroud colors
-        plot_bgcolor='rgba(255,255,255,0)'
+        plot_bgcolor="rgba(255,255,255,0)",
     )
     """
     # legend=dict(
@@ -229,66 +202,84 @@
 
     plotly.offline.plot(fig, filename="output.html", auto_open=False)
     # to be discovered by `from_work_dir`
-    os.rename('output.html', 'output')
+    os.rename("output.html", "output")
 
 
-def visualize_roc_curve_matplotlib(df1, df2, pos_label,
-                                   drop_intermediate=True,
-                                   title=None):
-    """visualize roc-curve using matplotlib and output svg image
-    """
+def visualize_roc_curve_matplotlib(df1, df2, pos_label, drop_intermediate=True, title=None):
+    """visualize roc-curve using matplotlib and output svg image"""
     backend = matplotlib.get_backend()
     if "inline" not in backend:
         matplotlib.use("SVG")
-    plt.style.use('seaborn-colorblind')
+    plt.style.use("seaborn-colorblind")
     plt.figure()
 
     for idx in range(df1.shape[1]):
         y_true = df1.iloc[:, idx].values
         y_score = df2.iloc[:, idx].values
 
-        fpr, tpr, _ = roc_curve(y_true, y_score, pos_label=pos_label,
-                                drop_intermediate=drop_intermediate)
+        fpr, tpr, _ = roc_curve(y_true, y_score, pos_label=pos_label, drop_intermediate=drop_intermediate)
         roc_auc = auc(fpr, tpr)
 
-        plt.step(fpr, tpr, 'r-', color="black", alpha=0.3, lw=1,
-                 where="post", label='%s (area = %.3f)' % (idx, roc_auc))
+        plt.step(
+            fpr,
+            tpr,
+            "r-",
+            color="black",
+            alpha=0.3,
+            lw=1,
+            where="post",
+            label="%s (area = %.3f)" % (idx, roc_auc),
+        )
 
     plt.xlim([0.0, 1.0])
     plt.ylim([0.0, 1.05])
-    plt.xlabel('False Positive Rate')
-    plt.ylabel('True Positive Rate')
-    title = title or 'Receiver Operating Characteristic (ROC) Curve'
+    plt.xlabel("False Positive Rate")
+    plt.ylabel("True Positive Rate")
+    title = title or "Receiver Operating Characteristic (ROC) Curve"
     plt.title(title)
     folder = os.getcwd()
     plt.savefig(os.path.join(folder, "output.svg"), format="svg")
-    os.rename(os.path.join(folder, "output.svg"),
-              os.path.join(folder, "output"))
+    os.rename(os.path.join(folder, "output.svg"), os.path.join(folder, "output"))
 
 
 def get_dataframe(file_path, plot_selection, header_name, column_name):
-    header = 'infer' if plot_selection[header_name] else None
+    header = "infer" if plot_selection[header_name] else None
     column_option = plot_selection[column_name]["selected_column_selector_option"]
-    if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
+    if column_option in [
+        "by_index_number",
+        "all_but_by_index_number",
+        "by_header_name",
+        "all_but_by_header_name",
+    ]:
         col = plot_selection[column_name]["col1"]
     else:
         col = None
     _, input_df = read_columns(file_path, c=col,
-                                   c_option=column_option,
-                                   return_df=True,
-                                   sep='\t', header=header,
-                                   parse_dates=True)
+                               c_option=column_option,
+                               return_df=True,
+                               sep='\t', header=header,
+                               parse_dates=True)
     return input_df
 
 
-def main(inputs, infile_estimator=None, infile1=None,
-         infile2=None, outfile_result=None,
-         outfile_object=None, groups=None,
-         ref_seq=None, intervals=None,
-         targets=None, fasta_path=None,
-         model_config=None, true_labels=None,
-         predicted_labels=None, plot_color=None,
-         title=None):
+def main(
+    inputs,
+    infile_estimator=None,
+    infile1=None,
+    infile2=None,
+    outfile_result=None,
+    outfile_object=None,
+    groups=None,
+    ref_seq=None,
+    intervals=None,
+    targets=None,
+    fasta_path=None,
+    model_config=None,
+    true_labels=None,
+    predicted_labels=None,
+    plot_color=None,
+    title=None,
+):
     """
     Parameter
     ---------
@@ -341,34 +332,39 @@
     title : str, default is None
         Title of the confusion matrix heatmap
     """
-    warnings.simplefilter('ignore')
+    warnings.simplefilter("ignore")
 
-    with open(inputs, 'r') as param_handler:
+    with open(inputs, "r") as param_handler:
         params = json.load(param_handler)
 
-    title = params['plotting_selection']['title'].strip()
-    plot_type = params['plotting_selection']['plot_type']
-    plot_format = params['plotting_selection']['plot_format']
+    title = params["plotting_selection"]["title"].strip()
+    plot_type = params["plotting_selection"]["plot_type"]
+    plot_format = params["plotting_selection"]["plot_format"]
 
-    if plot_type == 'feature_importances':
-        with open(infile_estimator, 'rb') as estimator_handler:
+    if plot_type == "feature_importances":
+        with open(infile_estimator, "rb") as estimator_handler:
             estimator = load_model(estimator_handler)
 
-        column_option = (params['plotting_selection']
-                               ['column_selector_options']
-                               ['selected_column_selector_option'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = (params['plotting_selection']
-                       ['column_selector_options']['col1'])
+        column_option = params["plotting_selection"]["column_selector_options"]["selected_column_selector_option"]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = params["plotting_selection"]["column_selector_options"]["col1"]
         else:
             c = None
 
-        _, input_df = read_columns(infile1, c=c,
-                                   c_option=column_option,
-                                   return_df=True,
-                                   sep='\t', header='infer',
-                                   parse_dates=True)
+        _, input_df = read_columns(
+            infile1,
+            c=c,
+            c_option=column_option,
+            return_df=True,
+            sep="\t",
+            header="infer",
+            parse_dates=True,
+        )
 
         feature_names = input_df.columns.values
 
@@ -379,16 +375,14 @@
                     feature_names = feature_names[mask]
             estimator = estimator.steps[-1][-1]
 
-        if hasattr(estimator, 'coef_'):
+        if hasattr(estimator, "coef_"):
             coefs = estimator.coef_
         else:
-            coefs = getattr(estimator, 'feature_importances_', None)
+            coefs = getattr(estimator, "feature_importances_", None)
         if coefs is None:
-            raise RuntimeError('The classifier does not expose '
-                               '"coef_" or "feature_importances_" '
-                               'attributes')
+            raise RuntimeError("The classifier does not expose " '"coef_" or "feature_importances_" ' "attributes")
 
-        threshold = params['plotting_selection']['threshold']
+        threshold = params["plotting_selection"]["threshold"]
         if threshold is not None:
             mask = (coefs > threshold) | (coefs < -threshold)
             coefs = coefs[mask]
@@ -397,80 +391,74 @@
         # sort
         indices = np.argsort(coefs)[::-1]
 
-        trace = go.Bar(x=feature_names[indices],
-                       y=coefs[indices])
+        trace = go.Bar(x=feature_names[indices], y=coefs[indices])
         layout = go.Layout(title=title or "Feature Importances")
         fig = go.Figure(data=[trace], layout=layout)
 
-        plotly.offline.plot(fig, filename="output.html",
-                            auto_open=False)
+        plotly.offline.plot(fig, filename="output.html", auto_open=False)
         # to be discovered by `from_work_dir`
-        os.rename('output.html', 'output')
+        os.rename("output.html", "output")
 
         return 0
 
-    elif plot_type in ('pr_curve', 'roc_curve'):
-        df1 = pd.read_csv(infile1, sep='\t', header='infer')
-        df2 = pd.read_csv(infile2, sep='\t', header='infer').astype(np.float32)
+    elif plot_type in ("pr_curve", "roc_curve"):
+        df1 = pd.read_csv(infile1, sep="\t", header="infer")
+        df2 = pd.read_csv(infile2, sep="\t", header="infer").astype(np.float32)
 
-        minimum = params['plotting_selection']['report_minimum_n_positives']
+        minimum = params["plotting_selection"]["report_minimum_n_positives"]
         # filter out columns whose n_positives is beblow the threhold
         if minimum:
             mask = df1.sum(axis=0) >= minimum
             df1 = df1.loc[:, mask]
             df2 = df2.loc[:, mask]
 
-        pos_label = params['plotting_selection']['pos_label'].strip() \
-            or None
+        pos_label = params["plotting_selection"]["pos_label"].strip() or None
 
-        if plot_type == 'pr_curve':
-            if plot_format == 'plotly_html':
+        if plot_type == "pr_curve":
+            if plot_format == "plotly_html":
                 visualize_pr_curve_plotly(df1, df2, pos_label, title=title)
             else:
                 visualize_pr_curve_matplotlib(df1, df2, pos_label, title)
-        else:          # 'roc_curve'
-            drop_intermediate = (params['plotting_selection']
-                                       ['drop_intermediate'])
-            if plot_format == 'plotly_html':
-                visualize_roc_curve_plotly(df1, df2, pos_label,
-                                           drop_intermediate=drop_intermediate,
-                                           title=title)
+        else:  # 'roc_curve'
+            drop_intermediate = params["plotting_selection"]["drop_intermediate"]
+            if plot_format == "plotly_html":
+                visualize_roc_curve_plotly(
+                    df1,
+                    df2,
+                    pos_label,
+                    drop_intermediate=drop_intermediate,
+                    title=title,
+                )
             else:
                 visualize_roc_curve_matplotlib(
-                    df1, df2, pos_label,
+                    df1,
+                    df2,
+                    pos_label,
                     drop_intermediate=drop_intermediate,
-                    title=title)
+                    title=title,
+                )
 
         return 0
 
-    elif plot_type == 'rfecv_gridscores':
-        input_df = pd.read_csv(infile1, sep='\t', header='infer')
+    elif plot_type == "rfecv_gridscores":
+        input_df = pd.read_csv(infile1, sep="\t", header="infer")
         scores = input_df.iloc[:, 0]
-        steps = params['plotting_selection']['steps'].strip()
+        steps = params["plotting_selection"]["steps"].strip()
         steps = safe_eval(steps)
 
         data = go.Scatter(
             x=list(range(len(scores))),
             y=scores,
             text=[str(_) for _ in steps] if steps else None,
-            mode='lines'
+            mode="lines",
         )
         layout = go.Layout(
             xaxis=dict(title="Number of features selected"),
             yaxis=dict(title="Cross validation score"),
-            title=dict(
-                text=title or None,
-                x=0.5,
-                y=0.92,
-                xanchor='center',
-                yanchor='top'
-            ),
-            font=dict(
-                family="sans-serif",
-                size=11
-            ),
+            title=dict(text=title or None, x=0.5, y=0.92, xanchor="center", yanchor="top"),
+            font=dict(family="sans-serif", size=11),
             # control backgroud colors
-            plot_bgcolor='rgba(255,255,255,0)'
+            plot_bgcolor="rgba(255,255,255,0)",
         )
         """
         # legend=dict(
@@ -489,55 +477,43 @@
         """
 
         fig = go.Figure(data=[data], layout=layout)
-        plotly.offline.plot(fig, filename="output.html",
-                            auto_open=False)
+        plotly.offline.plot(fig, filename="output.html", auto_open=False)
         # to be discovered by `from_work_dir`
-        os.rename('output.html', 'output')
+        os.rename("output.html", "output")
 
         return 0
 
-    elif plot_type == 'learning_curve':
-        input_df = pd.read_csv(infile1, sep='\t', header='infer')
-        plot_std_err = params['plotting_selection']['plot_std_err']
+    elif plot_type == "learning_curve":
+        input_df = pd.read_csv(infile1, sep="\t", header="infer")
+        plot_std_err = params["plotting_selection"]["plot_std_err"]
         data1 = go.Scatter(
-            x=input_df['train_sizes_abs'],
-            y=input_df['mean_train_scores'],
-            error_y=dict(
-                array=input_df['std_train_scores']
-            ) if plot_std_err else None,
-            mode='lines',
+            x=input_df["train_sizes_abs"],
+            y=input_df["mean_train_scores"],
+            error_y=dict(array=input_df["std_train_scores"]) if plot_std_err else None,
+            mode="lines",
             name="Train Scores",
         )
         data2 = go.Scatter(
-            x=input_df['train_sizes_abs'],
-            y=input_df['mean_test_scores'],
-            error_y=dict(
-                array=input_df['std_test_scores']
-            ) if plot_std_err else None,
-            mode='lines',
+            x=input_df["train_sizes_abs"],
+            y=input_df["mean_test_scores"],
+            error_y=dict(array=input_df["std_test_scores"]) if plot_std_err else None,
+            mode="lines",
             name="Test Scores",
         )
         layout = dict(
-            xaxis=dict(
-                title='No. of samples'
-            ),
-            yaxis=dict(
-                title='Performance Score'
-            ),
+            xaxis=dict(title="No. of samples"),
+            yaxis=dict(title="Performance Score"),
             # modify these configurations to customize image
             title=dict(
-                text=title or 'Learning Curve',
+                text=title or "Learning Curve",
                 x=0.5,
                 y=0.92,
-                xanchor='center',
-                yanchor='top'
+                xanchor="center",
+                yanchor="top",
             ),
-            font=dict(
-                family="sans-serif",
-                size=11
-            ),
+            font=dict(family="sans-serif", size=11),
             # control backgroud colors
-            plot_bgcolor='rgba(255,255,255,0)'
+            plot_bgcolor="rgba(255,255,255,0)",
         )
         """
         # legend=dict(
@@ -556,27 +532,26 @@
         """
 
         fig = go.Figure(data=[data1, data2], layout=layout)
-        plotly.offline.plot(fig, filename="output.html",
-                            auto_open=False)
+        plotly.offline.plot(fig, filename="output.html", auto_open=False)
         # to be discovered by `from_work_dir`
-        os.rename('output.html', 'output')
+        os.rename("output.html", "output")
 
         return 0
 
-    elif plot_type == 'keras_plot_model':
-        with open(model_config, 'r') as f:
+    elif plot_type == "keras_plot_model":
+        with open(model_config, "r") as f:
             model_str = f.read()
         model = model_from_json(model_str)
         plot_model(model, to_file="output.png")
-        os.rename('output.png', 'output')
+        os.rename("output.png", "output")
 
         return 0
 
-    elif plot_type == 'classification_confusion_matrix':
+    elif plot_type == "classification_confusion_matrix":
         plot_selection = params["plotting_selection"]
         input_true = get_dataframe(true_labels, plot_selection, "header_true", "column_selector_options_true")
-        header_predicted = 'infer' if plot_selection["header_predicted"] else None
-        input_predicted = pd.read_csv(predicted_labels, sep='\t', parse_dates=True, header=header_predicted)
+        header_predicted = "infer" if plot_selection["header_predicted"] else None
+        input_predicted = pd.read_csv(predicted_labels, sep="\t", parse_dates=True, header=header_predicted)
         true_classes = input_true.iloc[:, -1].copy()
         predicted_classes = input_predicted.iloc[:, -1].copy()
         axis_labels = list(set(true_classes))
@@ -586,15 +561,15 @@
         for i in range(len(c_matrix)):
             for j in range(len(c_matrix)):
                 ax.text(j, i, c_matrix[i, j], ha="center", va="center", color="k")
-        ax.set_ylabel('True class labels')
-        ax.set_xlabel('Predicted class labels')
+        ax.set_ylabel("True class labels")
+        ax.set_xlabel("Predicted class labels")
         ax.set_title(title)
         ax.set_xticks(axis_labels)
         ax.set_yticks(axis_labels)
         fig.colorbar(im, ax=ax)
         fig.tight_layout()
         plt.savefig("output.png", dpi=125)
-        os.rename('output.png', 'output')
+        os.rename("output.png", "output")
 
         return 0
 
@@ -603,7 +578,7 @@
     # fig.write_image("image.pdf", format='pdf', width=340*2, height=226*2)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-e", "--estimator", dest="infile_estimator")
@@ -623,11 +598,21 @@
     aparser.add_argument("-pt", "--title", dest="title")
     args = aparser.parse_args()
 
-    main(args.inputs, args.infile_estimator, args.infile1, args.infile2,
-         args.outfile_result, outfile_object=args.outfile_object,
-         groups=args.groups, ref_seq=args.ref_seq, intervals=args.intervals,
-         targets=args.targets, fasta_path=args.fasta_path,
-         model_config=args.model_config, true_labels=args.true_labels,
-         predicted_labels=args.predicted_labels,
-         plot_color=args.plot_color,
-         title=args.title)
+    main(
+        args.inputs,
+        args.infile_estimator,
+        args.infile1,
+        args.infile2,
+        args.outfile_result,
+        outfile_object=args.outfile_object,
+        groups=args.groups,
+        ref_seq=args.ref_seq,
+        intervals=args.intervals,
+        targets=args.targets,
+        fasta_path=args.fasta_path,
+        model_config=args.model_config,
+        true_labels=args.true_labels,
+        predicted_labels=args.predicted_labels,
+        plot_color=args.plot_color,
+        title=args.title,
+    )
diff -r 7068b5fcd623 -r 1e99cfb71f40 model_prediction.py
--- a/model_prediction.py	Thu Oct 01 20:27:36 2020 +0000
+++ b/model_prediction.py	Tue Apr 13 17:52:15 2021 +0000
@@ -1,23 +1,29 @@
 import argparse
 import json
+import warnings
+
 import numpy as np
 import pandas as pd
-import warnings
-
 from scipy.io import mmread
 from sklearn.pipeline import Pipeline
 
-from galaxy_ml.utils import (load_model, read_columns,
-                             get_module, try_get_attr)
+from galaxy_ml.utils import (get_module, load_model,
+                             read_columns, try_get_attr)
+
+
+N_JOBS = int(__import__("os").environ.get("GALAXY_SLOTS", 1))
 
 
-N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
-
-
-def main(inputs, infile_estimator, outfile_predict,
-         infile_weights=None, infile1=None,
-         fasta_path=None, ref_seq=None,
-         vcf_path=None):
+def main(
+    inputs,
+    infile_estimator,
+    outfile_predict,
+    infile_weights=None,
+    infile1=None,
+    fasta_path=None,
+    ref_seq=None,
+    vcf_path=None,
+):
     """
     Parameter
     ---------
@@ -45,96 +51,94 @@
     vcf_path : str
         File path to dataset containing variants info.
     """
-    warnings.filterwarnings('ignore')
+    warnings.filterwarnings("ignore")
 
-    with open(inputs, 'r') as param_handler:
+    with open(inputs, "r") as param_handler:
         params = json.load(param_handler)
 
     # load model
-    with open(infile_estimator, 'rb') as est_handler:
+    with open(infile_estimator, "rb") as est_handler:
         estimator = load_model(est_handler)
 
     main_est = estimator
     if isinstance(estimator, Pipeline):
         main_est = estimator.steps[-1][-1]
-    if hasattr(main_est, 'config') and hasattr(main_est, 'load_weights'):
-        if not infile_weights or infile_weights == 'None':
-            raise ValueError("The selected model skeleton asks for weights, "
-                             "but dataset for weights wan not selected!")
+    if hasattr(main_est, "config") and hasattr(main_est, "load_weights"):
+        if not infile_weights or infile_weights == "None":
+            raise ValueError(
+                "The selected model skeleton asks for weights, " "but dataset for weights wan not selected!"
+            )
         main_est.load_weights(infile_weights)
 
     # handle data input
-    input_type = params['input_options']['selected_input']
+    input_type = params["input_options"]["selected_input"]
     # tabular input
-    if input_type == 'tabular':
-        header = 'infer' if params['input_options']['header1'] else None
-        column_option = (params['input_options']
-                               ['column_selector_options_1']
-                               ['selected_column_selector_option'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = params['input_options']['column_selector_options_1']['col1']
+    if input_type == "tabular":
+        header = "infer" if params["input_options"]["header1"] else None
+        column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = params["input_options"]["column_selector_options_1"]["col1"]
         else:
             c = None
 
-        df = pd.read_csv(infile1, sep='\t', header=header, parse_dates=True)
+        df = pd.read_csv(infile1, sep="\t", header=header, parse_dates=True)
 
         X = read_columns(df, c=c, c_option=column_option).astype(float)
 
-        if params['method'] == 'predict':
+        if params["method"] == "predict":
             preds = estimator.predict(X)
         else:
             preds = estimator.predict_proba(X)
 
     # sparse input
-    elif input_type == 'sparse':
-        X = mmread(open(infile1, 'r'))
-        if params['method'] == 'predict':
+    elif input_type == "sparse":
+        X = mmread(open(infile1, "r"))
+        if params["method"] == "predict":
             preds = estimator.predict(X)
         else:
             preds = estimator.predict_proba(X)
 
     # fasta input
-    elif input_type == 'seq_fasta':
-        if not hasattr(estimator, 'data_batch_generator'):
+    elif input_type == "seq_fasta":
+        if not hasattr(estimator, "data_batch_generator"):
             raise ValueError(
                 "To do prediction on sequences in fasta input, "
                 "the estimator must be a `KerasGBatchClassifier`"
-                "equipped with data_batch_generator!")
-        pyfaidx = get_module('pyfaidx')
+                "equipped with data_batch_generator!"
+            )
+        pyfaidx = get_module("pyfaidx")
         sequences = pyfaidx.Fasta(fasta_path)
         n_seqs = len(sequences.keys())
         X = np.arange(n_seqs)[:, np.newaxis]
         seq_length = estimator.data_batch_generator.seq_length
-        batch_size = getattr(estimator, 'batch_size', 32)
+        batch_size = getattr(estimator, "batch_size", 32)
         steps = (n_seqs + batch_size - 1) // batch_size
 
-        seq_type = params['input_options']['seq_type']
-        klass = try_get_attr(
-            'galaxy_ml.preprocessors', seq_type)
+        seq_type = params["input_options"]["seq_type"]
+        klass = try_get_attr("galaxy_ml.preprocessors", seq_type)
 
-        pred_data_generator = klass(
-            fasta_path, seq_length=seq_length)
+        pred_data_generator = klass(fasta_path, seq_length=seq_length)
 
-        if params['method'] == 'predict':
-            preds = estimator.predict(
-                X, data_generator=pred_data_generator, steps=steps)
+        if params["method"] == "predict":
+            preds = estimator.predict(X, data_generator=pred_data_generator, steps=steps)
         else:
-            preds = estimator.predict_proba(
-                X, data_generator=pred_data_generator, steps=steps)
+            preds = estimator.predict_proba(X, data_generator=pred_data_generator, steps=steps)
 
     # vcf input
-    elif input_type == 'variant_effect':
-        klass = try_get_attr('galaxy_ml.preprocessors',
-                             'GenomicVariantBatchGenerator')
+    elif input_type == "variant_effect":
+        klass = try_get_attr("galaxy_ml.preprocessors", "GenomicVariantBatchGenerator")
 
-        options = params['input_options']
-        options.pop('selected_input')
-        if options['blacklist_regions'] == 'none':
-            options['blacklist_regions'] = None
+        options = params["input_options"]
+        options.pop("selected_input")
+        if options["blacklist_regions"] == "none":
+            options["blacklist_regions"] = None
 
-        pred_data_generator = klass(
-            ref_genome_path=ref_seq, vcf_path=vcf_path, **options)
+        pred_data_generator = klass(ref_genome_path=ref_seq, vcf_path=vcf_path, **options)
 
         pred_data_generator.set_processing_attrs()
 
@@ -143,9 +147,8 @@
         # predict 1600 sample at once then write to file
         gen_flow = pred_data_generator.flow(batch_size=1600)
 
-        file_writer = open(outfile_predict, 'w')
-        header_row = '\t'.join(['chrom', 'pos', 'name', 'ref',
-                                'alt', 'strand'])
+        file_writer = open(outfile_predict, "w")
+        header_row = "\t".join(["chrom", "pos", "name", "ref", "alt", "strand"])
         file_writer.write(header_row)
         header_done = False
 
@@ -155,23 +158,24 @@
         try:
             while steps_done < len(gen_flow):
                 index_array = next(gen_flow.index_generator)
-                batch_X = gen_flow._get_batches_of_transformed_samples(
-                    index_array)
+                batch_X = gen_flow._get_batches_of_transformed_samples(index_array)
 
-                if params['method'] == 'predict':
+                if params["method"] == "predict":
                     batch_preds = estimator.predict(
                         batch_X,
                         # The presence of `pred_data_generator` below is to
                         # override model carrying data_generator if there
                         # is any.
-                        data_generator=pred_data_generator)
+                        data_generator=pred_data_generator,
+                    )
                 else:
                     batch_preds = estimator.predict_proba(
                         batch_X,
                         # The presence of `pred_data_generator` below is to
                         # override model carrying data_generator if there
                         # is any.
-                        data_generator=pred_data_generator)
+                        data_generator=pred_data_generator,
+                    )
 
                 if batch_preds.ndim == 1:
                     batch_preds = batch_preds[:, np.newaxis]
@@ -181,12 +185,12 @@
 
                 if not header_done:
                     heads = np.arange(batch_preds.shape[-1]).astype(str)
-                    heads_str = '\t'.join(heads)
+                    heads_str = "\t".join(heads)
                     file_writer.write("\t%s\n" % heads_str)
                     header_done = True
 
                 for row in batch_out:
-                    row_str = '\t'.join(row)
+                    row_str = "\t".join(row)
                     file_writer.write("%s\n" % row_str)
 
                 steps_done += 1
@@ -200,14 +204,14 @@
 
     # output
     if len(preds.shape) == 1:
-        rval = pd.DataFrame(preds, columns=['Predicted'])
+        rval = pd.DataFrame(preds, columns=["Predicted"])
     else:
         rval = pd.DataFrame(preds)
 
-    rval.to_csv(outfile_predict, sep='\t', header=True, index=False)
+    rval.to_csv(outfile_predict, sep="\t", header=True, index=False)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-e", "--infile_estimator", dest="infile_estimator")
@@ -219,7 +223,13 @@
     aparser.add_argument("-v", "--vcf_path", dest="vcf_path")
     args = aparser.parse_args()
 
-    main(args.inputs, args.infile_estimator, args.outfile_predict,
-         infile_weights=args.infile_weights, infile1=args.infile1,
-         fasta_path=args.fasta_path, ref_seq=args.ref_seq,
-         vcf_path=args.vcf_path)
+    main(
+        args.inputs,
+        args.infile_estimator,
+        args.outfile_predict,
+        infile_weights=args.infile_weights,
+        infile1=args.infile1,
+        fasta_path=args.fasta_path,
+        ref_seq=args.ref_seq,
+        vcf_path=args.vcf_path,
+    )
diff -r 7068b5fcd623 -r 1e99cfb71f40 pca.py
--- a/pca.py	Thu Oct 01 20:27:36 2020 +0000
+++ b/pca.py	Tue Apr 13 17:52:15 2021 +0000
@@ -1,98 +1,185 @@
 import argparse
+
 import numpy as np
-from sklearn.decomposition import PCA, IncrementalPCA, KernelPCA
 from galaxy_ml.utils import read_columns
+from sklearn.decomposition import IncrementalPCA, KernelPCA, PCA
+
 
 def main():
-    parser = argparse.ArgumentParser(description='RDKit screen')
-    parser.add_argument('-i', '--infile',
-                        help="Input file")
-    parser.add_argument('--header', action='store_true', help="Include the header row or skip it")
-    parser.add_argument('-c', '--columns', type=str.lower, default='all', choices=['by_index_number', 'all_but_by_index_number',\
-                        'by_header_name', 'all_but_by_header_name', 'all_columns'],
-                        help="Choose to select all columns, or exclude/include some")
-    parser.add_argument('-ci', '--column_indices', type=str.lower,
-                        help="Choose to select all columns, or exclude/include some")
-    parser.add_argument('-n', '--number', nargs='?', type=int, default=None,\
-                        help="Number of components to keep. If not set, all components are kept")
-    parser.add_argument('--whiten', action='store_true', help="Whiten the components")
-    parser.add_argument('-t', '--pca_type', type=str.lower, default='classical', choices=['classical', 'incremental', 'kernel'],
-                        help="Choose which flavour of PCA to use")
-    parser.add_argument('-s', '--svd_solver', type=str.lower, default='auto', choices=['auto', 'full', 'arpack', 'randomized'],
-                        help="Choose the type of svd solver.")
-    parser.add_argument('-b', '--batch_size', nargs='?', type=int, default=None,\
-                        help="The number of samples to use for each batch")
-    parser.add_argument('-k', '--kernel', type=str.lower, default='linear',\
-                        choices=['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'],
-                        help="Choose the type of kernel.")
-    parser.add_argument('-g', '--gamma', nargs='?', type=float, default=None,
-                        help='Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels')
-    parser.add_argument('-tol', '--tolerance', type=float, default=0.0,
-                        help='Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack')
-    parser.add_argument('-mi', '--max_iter', nargs='?', type=int, default=None,\
-                        help="Maximum number of iterations for arpack")
-    parser.add_argument('-d', '--degree', type=int, default=3,\
-                        help="Degree for poly kernels. Ignored by other kernels")
-    parser.add_argument('-cf', '--coef0', type=float, default=1.0,
-                        help='Independent term in poly and sigmoid kernels')
-    parser.add_argument('-e', '--eigen_solver', type=str.lower, default='auto', choices=['auto', 'dense', 'arpack'],
-                        help="Choose the type of eigen solver.")
-    parser.add_argument('-o', '--outfile',
-                        help="Base name for output file (no extension).")
+    parser = argparse.ArgumentParser(description="RDKit screen")
+    parser.add_argument("-i", "--infile", help="Input file")
+    parser.add_argument(
+        "--header", action="store_true", help="Include the header row or skip it"
+    )
+    parser.add_argument(
+        "-c",
+        "--columns",
+        type=str.lower,
+        default="all",
+        choices=[
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+            "all_columns",
+        ],
+        help="Choose to select all columns, or exclude/include some",
+    )
+    parser.add_argument(
+        "-ci",
+        "--column_indices",
+        type=str.lower,
+        help="Choose to select all columns, or exclude/include some",
+    )
+    parser.add_argument(
+        "-n",
+        "--number",
+        nargs="?",
+        type=int,
+        default=None,
+        help="Number of components to keep. If not set, all components are kept",
+    )
+    parser.add_argument("--whiten", action="store_true", help="Whiten the components")
+    parser.add_argument(
+        "-t",
+        "--pca_type",
+        type=str.lower,
+        default="classical",
+        choices=["classical", "incremental", "kernel"],
+        help="Choose which flavour of PCA to use",
+    )
+    parser.add_argument(
+        "-s",
+        "--svd_solver",
+        type=str.lower,
+        default="auto",
+        choices=["auto", "full", "arpack", "randomized"],
+        help="Choose the type of svd solver.",
+    )
+    parser.add_argument(
+        "-b",
+        "--batch_size",
+        nargs="?",
+        type=int,
+        default=None,
+        help="The number of samples to use for each batch",
+    )
+    parser.add_argument(
+        "-k",
+        "--kernel",
+        type=str.lower,
+        default="linear",
+        choices=["linear", "poly", "rbf", "sigmoid", "cosine", "precomputed"],
+        help="Choose the type of kernel.",
+    )
+    parser.add_argument(
+        "-g",
+        "--gamma",
+        nargs="?",
+        type=float,
+        default=None,
+        help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels",
+    )
+    parser.add_argument(
+        "-tol",
+        "--tolerance",
+        type=float,
+        default=0.0,
+        help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack",
+    )
+    parser.add_argument(
+        "-mi",
+        "--max_iter",
+        nargs="?",
+        type=int,
+        default=None,
+        help="Maximum number of iterations for arpack",
+    )
+    parser.add_argument(
+        "-d",
+        "--degree",
+        type=int,
+        default=3,
+        help="Degree for poly kernels. Ignored by other kernels",
+    )
+    parser.add_argument(
+        "-cf",
+        "--coef0",
+        type=float,
+        default=1.0,
+        help="Independent term in poly and sigmoid kernels",
+    )
+    parser.add_argument(
+        "-e",
+        "--eigen_solver",
+        type=str.lower,
+        default="auto",
+        choices=["auto", "dense", "arpack"],
+        help="Choose the type of eigen solver.",
+    )
+    parser.add_argument(
+        "-o", "--outfile", help="Base name for output file (no extension)."
+    )
     args = parser.parse_args()
 
     usecols = None
-    cols = []
     pca_params = {}
 
-    if args.columns == 'by_index_number' or args.columns == 'all_but_by_index_number':
-        usecols = [int(i) for i in args.column_indices.split(',')]
-    elif args.columns == 'by_header_name' or args.columns == 'all_but_by_header_name':
+    if args.columns == "by_index_number" or args.columns == "all_but_by_index_number":
+        usecols = [int(i) for i in args.column_indices.split(",")]
+    elif args.columns == "by_header_name" or args.columns == "all_but_by_header_name":
         usecols = args.column_indices
 
-    header = 'infer' if args.header else None
+    header = "infer" if args.header else None
 
     pca_input = read_columns(
         f=args.infile,
         c=usecols,
         c_option=args.columns,
-        sep='\t',
+        sep="\t",
         header=header,
         parse_dates=True,
         encoding=None,
-        index_col=None)
+        index_col=None,
+    )
 
-    pca_params.update({'n_components': args.number})
+    pca_params.update({"n_components": args.number})
 
-    if args.pca_type == 'classical':
-        pca_params.update({'svd_solver': args.svd_solver, 'whiten': args.whiten})
-        if args.svd_solver == 'arpack':
-            pca_params.update({'tol': args.tolerance})
+    if args.pca_type == "classical":
+        pca_params.update({"svd_solver": args.svd_solver, "whiten": args.whiten})
+        if args.svd_solver == "arpack":
+            pca_params.update({"tol": args.tolerance})
         pca = PCA()
 
-    elif args.pca_type == 'incremental':
-        pca_params.update({'batch_size': args.batch_size, 'whiten': args.whiten})
+    elif args.pca_type == "incremental":
+        pca_params.update({"batch_size": args.batch_size, "whiten": args.whiten})
         pca = IncrementalPCA()
 
-    elif args.pca_type == 'kernel':
-        pca_params.update({'kernel': args.kernel, 'eigen_solver': args.eigen_solver, 'gamma': args.gamma})
+    elif args.pca_type == "kernel":
+        pca_params.update(
+            {
+                "kernel": args.kernel,
+                "eigen_solver": args.eigen_solver,
+                "gamma": args.gamma,
+            }
+        )
 
-        if args.kernel == 'poly':
-            pca_params.update({'degree': args.degree, 'coef0': args.coef0})
-        elif args.kernel == 'sigmoid':
-            pca_params.update({'coef0': args.coef0})
-        elif args.kernel == 'precomputed':
+        if args.kernel == "poly":
+            pca_params.update({"degree": args.degree, "coef0": args.coef0})
+        elif args.kernel == "sigmoid":
+            pca_params.update({"coef0": args.coef0})
+        elif args.kernel == "precomputed":
             pca_input = np.dot(pca_input, pca_input.T)
 
-        if args.eigen_solver == 'arpack':
-            pca_params.update({'tol': args.tolerance, 'max_iter': args.max_iter})
+        if args.eigen_solver == "arpack":
+            pca_params.update({"tol": args.tolerance, "max_iter": args.max_iter})
 
         pca = KernelPCA()
 
     print(pca_params)
     pca.set_params(**pca_params)
     pca_output = pca.fit_transform(pca_input)
-    np.savetxt(fname=args.outfile, X=pca_output, fmt='%.4f', delimiter='\t')
+    np.savetxt(fname=args.outfile, X=pca_output, fmt="%.4f", delimiter="\t")
 
 
 if __name__ == "__main__":
diff -r 7068b5fcd623 -r 1e99cfb71f40 sample_generator.xml
--- a/sample_generator.xml	Thu Oct 01 20:27:36 2020 +0000
+++ b/sample_generator.xml	Tue Apr 13 17:52:15 2021 +0000
@@ -1,10 +1,10 @@
-
+
     random samples with controlled size and complexity
     
         main_macros.xml
     
-    
-    
+    
+    
     echo "@VERSION@"
     
         
             
                 
             
             
                 
-                    
-                    
-                    
-                    
-                    
-                    
-                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
                     
-                    
+                    
                     
                     
                     
                     
-                    
-                    
+                    
+                    
                 
             
             
                 
             
             
                 
             
             
                 
             
             
                 
             
             
                 
-                    
-                    
-                    
-                    
-                    
+                    
+                    
+                    
+                    
+                    
                     
                     
                     
-                    
-                    
+                    
+