# HG changeset patch
# User bgruening
# Date 1618334941 0
# Node ID 0a3f113397b2e75ccba3fe052ec6e85cc42bebee
# Parent  508ce0649bec2a600c43c93c297d4ede5627a0e5
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
diff -r 508ce0649bec -r 0a3f113397b2 fitted_model_eval.py
--- a/fitted_model_eval.py	Thu Oct 01 20:02:43 2020 +0000
+++ b/fitted_model_eval.py	Tue Apr 13 17:29:01 2021 +0000
@@ -11,7 +11,7 @@
 
 
 def _get_X_y(params, infile1, infile2):
-    """ read from inputs and output X and y
+    """read from inputs and output X and y
 
     Parameters
     ----------
@@ -26,35 +26,40 @@
     # store read dataframe object
     loaded_df = {}
 
-    input_type = params['input_options']['selected_input']
+    input_type = params["input_options"]["selected_input"]
     # tabular input
-    if input_type == 'tabular':
-        header = 'infer' if params['input_options']['header1'] else None
-        column_option = (params['input_options']['column_selector_options_1']
-                         ['selected_column_selector_option'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = params['input_options']['column_selector_options_1']['col1']
+    if input_type == "tabular":
+        header = "infer" if params["input_options"]["header1"] else None
+        column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = params["input_options"]["column_selector_options_1"]["col1"]
         else:
             c = None
 
         df_key = infile1 + repr(header)
-        df = pd.read_csv(infile1, sep='\t', header=header,
-                         parse_dates=True)
+        df = pd.read_csv(infile1, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = df
 
         X = read_columns(df, c=c, c_option=column_option).astype(float)
     # sparse input
-    elif input_type == 'sparse':
-        X = mmread(open(infile1, 'r'))
+    elif input_type == "sparse":
+        X = mmread(open(infile1, "r"))
 
     # Get target y
-    header = 'infer' if params['input_options']['header2'] else None
-    column_option = (params['input_options']['column_selector_options_2']
-                     ['selected_column_selector_option2'])
-    if column_option in ['by_index_number', 'all_but_by_index_number',
-                         'by_header_name', 'all_but_by_header_name']:
-        c = params['input_options']['column_selector_options_2']['col2']
+    header = "infer" if params["input_options"]["header2"] else None
+    column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
+    if column_option in [
+        "by_index_number",
+        "all_but_by_index_number",
+        "by_header_name",
+        "all_but_by_header_name",
+    ]:
+        c = params["input_options"]["column_selector_options_2"]["col2"]
     else:
         c = None
 
@@ -62,26 +67,24 @@
     if df_key in loaded_df:
         infile2 = loaded_df[df_key]
     else:
-        infile2 = pd.read_csv(infile2, sep='\t',
-                              header=header, parse_dates=True)
+        infile2 = pd.read_csv(infile2, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = infile2
 
-    y = read_columns(
-            infile2,
-            c=c,
-            c_option=column_option,
-            sep='\t',
-            header=header,
-            parse_dates=True)
+    y = read_columns(infile2, c=c, c_option=column_option, sep="\t", header=header, parse_dates=True)
     if len(y.shape) == 2 and y.shape[1] == 1:
         y = y.ravel()
 
     return X, y
 
 
-def main(inputs, infile_estimator, outfile_eval,
-         infile_weights=None, infile1=None,
-         infile2=None):
+def main(
+    inputs,
+    infile_estimator,
+    outfile_eval,
+    infile_weights=None,
+    infile1=None,
+    infile2=None,
+):
     """
     Parameter
     ---------
@@ -103,49 +106,55 @@
     infile2 : str
         File path to dataset containing target values
     """
-    warnings.filterwarnings('ignore')
+    warnings.filterwarnings("ignore")
 
-    with open(inputs, 'r') as param_handler:
+    with open(inputs, "r") as param_handler:
         params = json.load(param_handler)
 
     X_test, y_test = _get_X_y(params, infile1, infile2)
 
     # load model
-    with open(infile_estimator, 'rb') as est_handler:
+    with open(infile_estimator, "rb") as est_handler:
         estimator = load_model(est_handler)
 
     main_est = estimator
     if isinstance(estimator, Pipeline):
         main_est = estimator.steps[-1][-1]
-    if hasattr(main_est, 'config') and hasattr(main_est, 'load_weights'):
-        if not infile_weights or infile_weights == 'None':
-            raise ValueError("The selected model skeleton asks for weights, "
-                             "but no dataset for weights was provided!")
+    if hasattr(main_est, "config") and hasattr(main_est, "load_weights"):
+        if not infile_weights or infile_weights == "None":
+            raise ValueError(
+                "The selected model skeleton asks for weights, " "but no dataset for weights was provided!"
+            )
         main_est.load_weights(infile_weights)
 
     # handle scorer, convert to scorer dict
-    scoring = params['scoring']
+    # Check if scoring is specified
+    scoring = params["scoring"]
+    if scoring is not None:
+        # get_scoring() expects secondary_scoring to be a comma separated string (not a list)
+        # Check if secondary_scoring is specified
+        secondary_scoring = scoring.get("secondary_scoring", None)
+        if secondary_scoring is not None:
+            # If secondary_scoring is specified, convert the list into comman separated string
+            scoring["secondary_scoring"] = ",".join(scoring["secondary_scoring"])
+
     scorer = get_scoring(scoring)
     scorer, _ = _check_multimetric_scoring(estimator, scoring=scorer)
 
-    if hasattr(estimator, 'evaluate'):
-        scores = estimator.evaluate(X_test, y_test=y_test,
-                                    scorer=scorer,
-                                    is_multimetric=True)
+    if hasattr(estimator, "evaluate"):
+        scores = estimator.evaluate(X_test, y_test=y_test, scorer=scorer, is_multimetric=True)
     else:
-        scores = _score(estimator, X_test, y_test, scorer,
-                        is_multimetric=True)
+        scores = _score(estimator, X_test, y_test, scorer, is_multimetric=True)
 
     # handle output
     for name, score in scores.items():
         scores[name] = [score]
     df = pd.DataFrame(scores)
     df = df[sorted(df.columns)]
-    df.to_csv(path_or_buf=outfile_eval, sep='\t',
-              header=True, index=False)
+    df.to_csv(path_or_buf=outfile_eval, sep="\t", header=True, index=False)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-e", "--infile_estimator", dest="infile_estimator")
@@ -155,6 +164,11 @@
     aparser.add_argument("-O", "--outfile_eval", dest="outfile_eval")
     args = aparser.parse_args()
 
-    main(args.inputs, args.infile_estimator, args.outfile_eval,
-         infile_weights=args.infile_weights, infile1=args.infile1,
-         infile2=args.infile2)
+    main(
+        args.inputs,
+        args.infile_estimator,
+        args.outfile_eval,
+        infile_weights=args.infile_weights,
+        infile1=args.infile1,
+        infile2=args.infile2,
+    )
diff -r 508ce0649bec -r 0a3f113397b2 keras_batch_models.xml
--- a/keras_batch_models.xml	Thu Oct 01 20:02:43 2020 +0000
+++ b/keras_batch_models.xml	Tue Apr 13 17:29:01 2021 +0000
@@ -1,14 +1,14 @@
-
-  with online data generator for Genomic/Protein sequences and images
-  
-    main_macros.xml
-    keras_macros.xml
-  
-  
-  
-  echo "@KERAS_VERSION@"
-  
-    
+    with online data generator for Genomic/Protein sequences and images
+    
+        main_macros.xml
+        keras_macros.xml
+    
+    
+    
+    echo "@KERAS_VERSION@"
+    
+        
-  
-  
-    
-  
-  
-    
-      
-        
-      
-      
-        
-        
-          
-        
-        
-          
-            
-            
-            
-            
-            
-          
-          
-            
-          
-          
-            
-          
-          
-            
-          
-          
-            
-          
-          
+    
+    
+        
+    
+    
+        
+            
+                
+            
+            
+                
+                
+                    
+                
+                
+                    
+                        
+                        
+                        
+                        
+                        
+                    
+                    
+                        
+                    
+                    
+                        
+                    
+                    
+                        
+                    
+                    
+                        
+                    
+                    
+                
+                
+                
+                
+                
+                
+            
         
-        
-        
-        
-        
-        
-      
-    
-    
-  
-  
-    
-    
-      get_params
-    
-  
-  
-    
-      
-        
-        
-        
-          
-        
-        
-          
-          
-            
-              
+        
+    
+    
+        
+        
+            get_params
+        
+    
+    
+        
+            
+                
+                
+                
+                    
+                
+                
             
-          
-        
-      
-      
-      
-      
-    
-    
-      
-        
-        
-        
-          
-          
-        
-        
-      
-      
-    
-    
-      
-        
-        
-        
-          
-          
-        
-        
+        
+            
+                
+                
+                
+                    
+                    
+                
+                
             
-          
-        
-      
-      
-    
-    
-      
-        
-        
-        
-          
-          
-        
-        
+        
+            
+                
+                
+                
+                    
+                    
+                
+                
             
-          
-        
-      
-      
-      
-      
-    
-  
-  
-      
+        
+        
+            
+                
+                
+                
+                    
+                    
+                
+                
+            
+            
+            
+            
+        
+    
+    
+        
-  
-  
-    
-    
-  
+    
+    
+        
+        
+    
 
diff -r 508ce0649bec -r 0a3f113397b2 keras_deep_learning.py
--- a/keras_deep_learning.py	Thu Oct 01 20:02:43 2020 +0000
+++ b/keras_deep_learning.py	Tue Apr 13 17:29:01 2021 +0000
@@ -177,11 +177,11 @@
         # merge layers
         if 'merging_layers' in options:
             idxs = literal_eval(options.pop('merging_layers'))
-            merging_layers = [all_layers[i-1] for i in idxs]
+            merging_layers = [all_layers[i - 1] for i in idxs]
             new_layer = klass(**options)(merging_layers)
         # non-input layers
         elif inbound_nodes is not None:
-            new_layer = klass(**options)(all_layers[inbound_nodes-1])
+            new_layer = klass(**options)(all_layers[inbound_nodes - 1])
         # input layers
         else:
             new_layer = klass(**options)
@@ -189,10 +189,10 @@
         all_layers.append(new_layer)
 
     input_indexes = _handle_shape(config['input_layers'])
-    input_layers = [all_layers[i-1] for i in input_indexes]
+    input_layers = [all_layers[i - 1] for i in input_indexes]
 
     output_indexes = _handle_shape(config['output_layers'])
-    output_layers = [all_layers[i-1] for i in output_indexes]
+    output_layers = [all_layers[i - 1] for i in output_indexes]
 
     return Model(inputs=input_layers, outputs=output_layers)
 
@@ -300,8 +300,7 @@
         options.update((inputs['mode_selection']['compile_params']
                         ['optimizer_selection']['optimizer_options']))
 
-        train_metrics = (inputs['mode_selection']['compile_params']
-                         ['metrics']).split(',')
+        train_metrics = inputs['mode_selection']['compile_params']['metrics']
         if train_metrics[-1] == 'none':
             train_metrics = train_metrics[:-1]
         options['metrics'] = train_metrics
diff -r 508ce0649bec -r 0a3f113397b2 keras_train_and_eval.py
--- a/keras_train_and_eval.py	Thu Oct 01 20:02:43 2020 +0000
+++ b/keras_train_and_eval.py	Tue Apr 13 17:29:01 2021 +0000
@@ -10,7 +10,6 @@
 from scipy.io import mmread
 from sklearn.pipeline import Pipeline
 from sklearn.metrics.scorer import _check_multimetric_scoring
-from sklearn import model_selection
 from sklearn.model_selection._validation import _score
 from sklearn.model_selection import _search, _validation
 from sklearn.utils import indexable, safe_indexing
@@ -18,39 +17,49 @@
 from galaxy_ml.externals.selene_sdk.utils import compute_score
 from galaxy_ml.model_validations import train_test_split
 from galaxy_ml.keras_galaxy_models import _predict_generator
-from galaxy_ml.utils import (SafeEval, get_scoring, load_model,
-                             read_columns, try_get_attr, get_module,
-                             clean_params, get_main_estimator)
+from galaxy_ml.utils import (
+    SafeEval,
+    get_scoring,
+    load_model,
+    read_columns,
+    try_get_attr,
+    get_module,
+    clean_params,
+    get_main_estimator,
+)
 
 
-_fit_and_score = try_get_attr('galaxy_ml.model_validations', '_fit_and_score')
-setattr(_search, '_fit_and_score', _fit_and_score)
-setattr(_validation, '_fit_and_score', _fit_and_score)
+_fit_and_score = try_get_attr("galaxy_ml.model_validations", "_fit_and_score")
+setattr(_search, "_fit_and_score", _fit_and_score)
+setattr(_validation, "_fit_and_score", _fit_and_score)
 
-N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1))
-CACHE_DIR = os.path.join(os.getcwd(), 'cached')
+N_JOBS = int(os.environ.get("GALAXY_SLOTS", 1))
+CACHE_DIR = os.path.join(os.getcwd(), "cached")
 del os
-NON_SEARCHABLE = ('n_jobs', 'pre_dispatch', 'memory', '_path',
-                  'nthread', 'callbacks')
-ALLOWED_CALLBACKS = ('EarlyStopping', 'TerminateOnNaN', 'ReduceLROnPlateau',
-                     'CSVLogger', 'None')
+NON_SEARCHABLE = ("n_jobs", "pre_dispatch", "memory", "_path", "nthread", "callbacks")
+ALLOWED_CALLBACKS = (
+    "EarlyStopping",
+    "TerminateOnNaN",
+    "ReduceLROnPlateau",
+    "CSVLogger",
+    "None",
+)
 
 
 def _eval_swap_params(params_builder):
     swap_params = {}
 
-    for p in params_builder['param_set']:
-        swap_value = p['sp_value'].strip()
-        if swap_value == '':
+    for p in params_builder["param_set"]:
+        swap_value = p["sp_value"].strip()
+        if swap_value == "":
             continue
 
-        param_name = p['sp_name']
+        param_name = p["sp_name"]
         if param_name.lower().endswith(NON_SEARCHABLE):
-            warnings.warn("Warning: `%s` is not eligible for search and was "
-                          "omitted!" % param_name)
+            warnings.warn("Warning: `%s` is not eligible for search and was " "omitted!" % param_name)
             continue
 
-        if not swap_value.startswith(':'):
+        if not swap_value.startswith(":"):
             safe_eval = SafeEval(load_scipy=True, load_numpy=True)
             ev = safe_eval(swap_value)
         else:
@@ -77,34 +86,31 @@
         else:
             new_arrays.append(arr)
 
-    if kwargs['shuffle'] == 'None':
-        kwargs['shuffle'] = None
+    if kwargs["shuffle"] == "None":
+        kwargs["shuffle"] = None
 
-    group_names = kwargs.pop('group_names', None)
+    group_names = kwargs.pop("group_names", None)
 
     if group_names is not None and group_names.strip():
-        group_names = [name.strip() for name in
-                       group_names.split(',')]
+        group_names = [name.strip() for name in group_names.split(",")]
         new_arrays = indexable(*new_arrays)
-        groups = kwargs['labels']
+        groups = kwargs["labels"]
         n_samples = new_arrays[0].shape[0]
         index_arr = np.arange(n_samples)
         test = index_arr[np.isin(groups, group_names)]
         train = index_arr[~np.isin(groups, group_names)]
-        rval = list(chain.from_iterable(
-            (safe_indexing(a, train),
-             safe_indexing(a, test)) for a in new_arrays))
+        rval = list(chain.from_iterable((safe_indexing(a, train), safe_indexing(a, test)) for a in new_arrays))
     else:
         rval = train_test_split(*new_arrays, **kwargs)
 
     for pos in nones:
-        rval[pos * 2: 2] = [None, None]
+        rval[pos * 2 : 2] = [None, None]
 
     return rval
 
 
 def _evaluate(y_true, pred_probas, scorer, is_multimetric=True):
-    """ output scores based on input scorer
+    """output scores based on input scorer
 
     Parameters
     ----------
@@ -118,52 +124,55 @@
     """
     if y_true.ndim == 1 or y_true.shape[-1] == 1:
         pred_probas = pred_probas.ravel()
-        pred_labels = (pred_probas > 0.5).astype('int32')
-        targets = y_true.ravel().astype('int32')
+        pred_labels = (pred_probas > 0.5).astype("int32")
+        targets = y_true.ravel().astype("int32")
         if not is_multimetric:
-            preds = pred_labels if scorer.__class__.__name__ == \
-                '_PredictScorer' else pred_probas
+            preds = pred_labels if scorer.__class__.__name__ == "_PredictScorer" else pred_probas
             score = scorer._score_func(targets, preds, **scorer._kwargs)
 
             return score
         else:
             scores = {}
             for name, one_scorer in scorer.items():
-                preds = pred_labels if one_scorer.__class__.__name__\
-                    == '_PredictScorer' else pred_probas
-                score = one_scorer._score_func(targets, preds,
-                                               **one_scorer._kwargs)
+                preds = pred_labels if one_scorer.__class__.__name__ == "_PredictScorer" else pred_probas
+                score = one_scorer._score_func(targets, preds, **one_scorer._kwargs)
                 scores[name] = score
 
     # TODO: multi-class metrics
     # multi-label
     else:
-        pred_labels = (pred_probas > 0.5).astype('int32')
-        targets = y_true.astype('int32')
+        pred_labels = (pred_probas > 0.5).astype("int32")
+        targets = y_true.astype("int32")
         if not is_multimetric:
-            preds = pred_labels if scorer.__class__.__name__ == \
-                '_PredictScorer' else pred_probas
-            score, _ = compute_score(preds, targets,
-                                     scorer._score_func)
+            preds = pred_labels if scorer.__class__.__name__ == "_PredictScorer" else pred_probas
+            score, _ = compute_score(preds, targets, scorer._score_func)
             return score
         else:
             scores = {}
             for name, one_scorer in scorer.items():
-                preds = pred_labels if one_scorer.__class__.__name__\
-                    == '_PredictScorer' else pred_probas
-                score, _ = compute_score(preds, targets,
-                                         one_scorer._score_func)
+                preds = pred_labels if one_scorer.__class__.__name__ == "_PredictScorer" else pred_probas
+                score, _ = compute_score(preds, targets, one_scorer._score_func)
                 scores[name] = score
 
     return scores
 
 
-def main(inputs, infile_estimator, infile1, infile2,
-         outfile_result, outfile_object=None,
-         outfile_weights=None, outfile_y_true=None,
-         outfile_y_preds=None, groups=None,
-         ref_seq=None, intervals=None, targets=None,
-         fasta_path=None):
+def main(
+    inputs,
+    infile_estimator,
+    infile1,
+    infile2,
+    outfile_result,
+    outfile_object=None,
+    outfile_weights=None,
+    outfile_y_true=None,
+    outfile_y_preds=None,
+    groups=None,
+    ref_seq=None,
+    intervals=None,
+    targets=None,
+    fasta_path=None,
+):
     """
     Parameter
     ---------
@@ -209,19 +218,19 @@
     fasta_path : str
         File path to dataset containing fasta file
     """
-    warnings.simplefilter('ignore')
+    warnings.simplefilter("ignore")
 
-    with open(inputs, 'r') as param_handler:
+    with open(inputs, "r") as param_handler:
         params = json.load(param_handler)
 
     #  load estimator
-    with open(infile_estimator, 'rb') as estimator_handler:
+    with open(infile_estimator, "rb") as estimator_handler:
         estimator = load_model(estimator_handler)
 
     estimator = clean_params(estimator)
 
     # swap hyperparameter
-    swapping = params['experiment_schemes']['hyperparams_swapping']
+    swapping = params["experiment_schemes"]["hyperparams_swapping"]
     swap_params = _eval_swap_params(swapping)
     estimator.set_params(**swap_params)
 
@@ -230,38 +239,39 @@
     # store read dataframe object
     loaded_df = {}
 
-    input_type = params['input_options']['selected_input']
+    input_type = params["input_options"]["selected_input"]
     # tabular input
-    if input_type == 'tabular':
-        header = 'infer' if params['input_options']['header1'] else None
-        column_option = (params['input_options']['column_selector_options_1']
-                         ['selected_column_selector_option'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = params['input_options']['column_selector_options_1']['col1']
+    if input_type == "tabular":
+        header = "infer" if params["input_options"]["header1"] else None
+        column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = params["input_options"]["column_selector_options_1"]["col1"]
         else:
             c = None
 
         df_key = infile1 + repr(header)
-        df = pd.read_csv(infile1, sep='\t', header=header,
-                         parse_dates=True)
+        df = pd.read_csv(infile1, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = df
 
         X = read_columns(df, c=c, c_option=column_option).astype(float)
     # sparse input
-    elif input_type == 'sparse':
-        X = mmread(open(infile1, 'r'))
+    elif input_type == "sparse":
+        X = mmread(open(infile1, "r"))
 
     # fasta_file input
-    elif input_type == 'seq_fasta':
-        pyfaidx = get_module('pyfaidx')
+    elif input_type == "seq_fasta":
+        pyfaidx = get_module("pyfaidx")
         sequences = pyfaidx.Fasta(fasta_path)
         n_seqs = len(sequences.keys())
         X = np.arange(n_seqs)[:, np.newaxis]
         for param in estimator_params.keys():
-            if param.endswith('fasta_path'):
-                estimator.set_params(
-                    **{param: fasta_path})
+            if param.endswith("fasta_path"):
+                estimator.set_params(**{param: fasta_path})
                 break
         else:
             raise ValueError(
@@ -270,25 +280,29 @@
                 "KerasGBatchClassifier with "
                 "FastaDNABatchGenerator/FastaProteinBatchGenerator "
                 "or having GenomeOneHotEncoder/ProteinOneHotEncoder "
-                "in pipeline!")
+                "in pipeline!"
+            )
 
-    elif input_type == 'refseq_and_interval':
+    elif input_type == "refseq_and_interval":
         path_params = {
-            'data_batch_generator__ref_genome_path': ref_seq,
-            'data_batch_generator__intervals_path': intervals,
-            'data_batch_generator__target_path': targets
+            "data_batch_generator__ref_genome_path": ref_seq,
+            "data_batch_generator__intervals_path": intervals,
+            "data_batch_generator__target_path": targets,
         }
         estimator.set_params(**path_params)
         n_intervals = sum(1 for line in open(intervals))
         X = np.arange(n_intervals)[:, np.newaxis]
 
     # Get target y
-    header = 'infer' if params['input_options']['header2'] else None
-    column_option = (params['input_options']['column_selector_options_2']
-                     ['selected_column_selector_option2'])
-    if column_option in ['by_index_number', 'all_but_by_index_number',
-                         'by_header_name', 'all_but_by_header_name']:
-        c = params['input_options']['column_selector_options_2']['col2']
+    header = "infer" if params["input_options"]["header2"] else None
+    column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
+    if column_option in [
+        "by_index_number",
+        "all_but_by_index_number",
+        "by_header_name",
+        "all_but_by_header_name",
+    ]:
+        c = params["input_options"]["column_selector_options_2"]["col2"]
     else:
         c = None
 
@@ -296,37 +310,35 @@
     if df_key in loaded_df:
         infile2 = loaded_df[df_key]
     else:
-        infile2 = pd.read_csv(infile2, sep='\t',
-                              header=header, parse_dates=True)
+        infile2 = pd.read_csv(infile2, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = infile2
 
-    y = read_columns(
-            infile2,
-            c=c,
-            c_option=column_option,
-            sep='\t',
-            header=header,
-            parse_dates=True)
+    y = read_columns(infile2,
+                     c=c,
+                     c_option=column_option,
+                     sep='\t',
+                     header=header,
+                     parse_dates=True)
     if len(y.shape) == 2 and y.shape[1] == 1:
         y = y.ravel()
-    if input_type == 'refseq_and_interval':
-        estimator.set_params(
-            data_batch_generator__features=y.ravel().tolist())
+    if input_type == "refseq_and_interval":
+        estimator.set_params(data_batch_generator__features=y.ravel().tolist())
         y = None
     # end y
 
     # load groups
     if groups:
-        groups_selector = (params['experiment_schemes']['test_split']
-                                 ['split_algos']).pop('groups_selector')
+        groups_selector = (params["experiment_schemes"]["test_split"]["split_algos"]).pop("groups_selector")
 
-        header = 'infer' if groups_selector['header_g'] else None
-        column_option = \
-            (groups_selector['column_selector_options_g']
-                            ['selected_column_selector_option_g'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = groups_selector['column_selector_options_g']['col_g']
+        header = "infer" if groups_selector["header_g"] else None
+        column_option = groups_selector["column_selector_options_g"]["selected_column_selector_option_g"]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = groups_selector["column_selector_options_g"]["col_g"]
         else:
             c = None
 
@@ -334,13 +346,12 @@
         if df_key in loaded_df:
             groups = loaded_df[df_key]
 
-        groups = read_columns(
-                groups,
-                c=c,
-                c_option=column_option,
-                sep='\t',
-                header=header,
-                parse_dates=True)
+        groups = read_columns(groups,
+                              c=c,
+                              c_option=column_option,
+                              sep='\t',
+                              header=header,
+                              parse_dates=True)
         groups = groups.ravel()
 
     # del loaded_df
@@ -349,86 +360,99 @@
     # cache iraps_core fits could increase search speed significantly
     memory = joblib.Memory(location=CACHE_DIR, verbose=0)
     main_est = get_main_estimator(estimator)
-    if main_est.__class__.__name__ == 'IRAPSClassifier':
+    if main_est.__class__.__name__ == "IRAPSClassifier":
         main_est.set_params(memory=memory)
 
     # handle scorer, convert to scorer dict
     scoring = params['experiment_schemes']['metrics']['scoring']
+    if scoring is not None:
+        # get_scoring() expects secondary_scoring to be a comma separated string (not a list)
+        # Check if secondary_scoring is specified
+        secondary_scoring = scoring.get("secondary_scoring", None)
+        if secondary_scoring is not None:
+            # If secondary_scoring is specified, convert the list into comman separated string
+            scoring["secondary_scoring"] = ",".join(scoring["secondary_scoring"])
+
     scorer = get_scoring(scoring)
     scorer, _ = _check_multimetric_scoring(estimator, scoring=scorer)
 
     # handle test (first) split
-    test_split_options = (params['experiment_schemes']
-                                ['test_split']['split_algos'])
+    test_split_options = params["experiment_schemes"]["test_split"]["split_algos"]
 
-    if test_split_options['shuffle'] == 'group':
-        test_split_options['labels'] = groups
-    if test_split_options['shuffle'] == 'stratified':
+    if test_split_options["shuffle"] == "group":
+        test_split_options["labels"] = groups
+    if test_split_options["shuffle"] == "stratified":
         if y is not None:
-            test_split_options['labels'] = y
+            test_split_options["labels"] = y
         else:
-            raise ValueError("Stratified shuffle split is not "
-                             "applicable on empty target values!")
+            raise ValueError("Stratified shuffle split is not " "applicable on empty target values!")
 
-    X_train, X_test, y_train, y_test, groups_train, groups_test = \
-        train_test_split_none(X, y, groups, **test_split_options)
+    (
+        X_train,
+        X_test,
+        y_train,
+        y_test,
+        groups_train,
+        _groups_test,
+    ) = train_test_split_none(X, y, groups, **test_split_options)
 
-    exp_scheme = params['experiment_schemes']['selected_exp_scheme']
+    exp_scheme = params["experiment_schemes"]["selected_exp_scheme"]
 
     # handle validation (second) split
-    if exp_scheme == 'train_val_test':
-        val_split_options = (params['experiment_schemes']
-                                   ['val_split']['split_algos'])
+    if exp_scheme == "train_val_test":
+        val_split_options = params["experiment_schemes"]["val_split"]["split_algos"]
 
-        if val_split_options['shuffle'] == 'group':
-            val_split_options['labels'] = groups_train
-        if val_split_options['shuffle'] == 'stratified':
+        if val_split_options["shuffle"] == "group":
+            val_split_options["labels"] = groups_train
+        if val_split_options["shuffle"] == "stratified":
             if y_train is not None:
-                val_split_options['labels'] = y_train
+                val_split_options["labels"] = y_train
             else:
-                raise ValueError("Stratified shuffle split is not "
-                                 "applicable on empty target values!")
+                raise ValueError("Stratified shuffle split is not " "applicable on empty target values!")
 
-        X_train, X_val, y_train, y_val, groups_train, groups_val = \
-            train_test_split_none(X_train, y_train, groups_train,
-                                  **val_split_options)
+        (
+            X_train,
+            X_val,
+            y_train,
+            y_val,
+            groups_train,
+            _groups_val,
+        ) = train_test_split_none(X_train, y_train, groups_train, **val_split_options)
 
     # train and eval
-    if hasattr(estimator, 'validation_data'):
-        if exp_scheme == 'train_val_test':
-            estimator.fit(X_train, y_train,
-                          validation_data=(X_val, y_val))
+    if hasattr(estimator, "validation_data"):
+        if exp_scheme == "train_val_test":
+            estimator.fit(X_train, y_train, validation_data=(X_val, y_val))
         else:
-            estimator.fit(X_train, y_train,
-                          validation_data=(X_test, y_test))
+            estimator.fit(X_train, y_train, validation_data=(X_test, y_test))
     else:
         estimator.fit(X_train, y_train)
 
-    if hasattr(estimator, 'evaluate'):
+    if hasattr(estimator, "evaluate"):
         steps = estimator.prediction_steps
         batch_size = estimator.batch_size
-        generator = estimator.data_generator_.flow(X_test, y=y_test,
-                                                   batch_size=batch_size)
-        predictions, y_true = _predict_generator(estimator.model_, generator,
-                                                 steps=steps)
+        generator = estimator.data_generator_.flow(X_test, y=y_test, batch_size=batch_size)
+        predictions, y_true = _predict_generator(estimator.model_, generator, steps=steps)
         scores = _evaluate(y_true, predictions, scorer, is_multimetric=True)
 
     else:
-        if hasattr(estimator, 'predict_proba'):
+        if hasattr(estimator, "predict_proba"):
             predictions = estimator.predict_proba(X_test)
         else:
             predictions = estimator.predict(X_test)
 
         y_true = y_test
-        scores = _score(estimator, X_test, y_test, scorer,
-                        is_multimetric=True)
+        scores = _score(estimator, X_test, y_test, scorer, is_multimetric=True)
     if outfile_y_true:
         try:
-            pd.DataFrame(y_true).to_csv(outfile_y_true, sep='\t',
-                                        index=False)
+            pd.DataFrame(y_true).to_csv(outfile_y_true, sep="\t", index=False)
             pd.DataFrame(predictions).astype(np.float32).to_csv(
-                outfile_y_preds, sep='\t', index=False,
-                float_format='%g', chunksize=10000)
+                outfile_y_preds,
+                sep="\t",
+                index=False,
+                float_format="%g",
+                chunksize=10000,
+            )
         except Exception as e:
             print("Error in saving predictions: %s" % e)
 
@@ -437,8 +461,7 @@
         scores[name] = [score]
     df = pd.DataFrame(scores)
     df = df[sorted(df.columns)]
-    df.to_csv(path_or_buf=outfile_result, sep='\t',
-              header=True, index=False)
+    df.to_csv(path_or_buf=outfile_result, sep="\t", header=True, index=False)
 
     memory.clear(warn=False)
 
@@ -447,23 +470,22 @@
         if isinstance(estimator, Pipeline):
             main_est = estimator.steps[-1][-1]
 
-        if hasattr(main_est, 'model_') \
-                and hasattr(main_est, 'save_weights'):
+        if hasattr(main_est, "model_") and hasattr(main_est, "save_weights"):
             if outfile_weights:
                 main_est.save_weights(outfile_weights)
             del main_est.model_
             del main_est.fit_params
             del main_est.model_class_
-            del main_est.validation_data
-            if getattr(main_est, 'data_generator_', None):
+            if getattr(main_est, "validation_data", None):
+                del main_est.validation_data
+            if getattr(main_est, "data_generator_", None):
                 del main_est.data_generator_
 
-        with open(outfile_object, 'wb') as output_handler:
-            pickle.dump(estimator, output_handler,
-                        pickle.HIGHEST_PROTOCOL)
+        with open(outfile_object, "wb") as output_handler:
+            pickle.dump(estimator, output_handler, pickle.HIGHEST_PROTOCOL)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-e", "--estimator", dest="infile_estimator")
@@ -481,11 +503,19 @@
     aparser.add_argument("-f", "--fasta_path", dest="fasta_path")
     args = aparser.parse_args()
 
-    main(args.inputs, args.infile_estimator, args.infile1, args.infile2,
-         args.outfile_result, outfile_object=args.outfile_object,
-         outfile_weights=args.outfile_weights,
-         outfile_y_true=args.outfile_y_true,
-         outfile_y_preds=args.outfile_y_preds,
-         groups=args.groups,
-         ref_seq=args.ref_seq, intervals=args.intervals,
-         targets=args.targets, fasta_path=args.fasta_path)
+    main(
+        args.inputs,
+        args.infile_estimator,
+        args.infile1,
+        args.infile2,
+        args.outfile_result,
+        outfile_object=args.outfile_object,
+        outfile_weights=args.outfile_weights,
+        outfile_y_true=args.outfile_y_true,
+        outfile_y_preds=args.outfile_y_preds,
+        groups=args.groups,
+        ref_seq=args.ref_seq,
+        intervals=args.intervals,
+        targets=args.targets,
+        fasta_path=args.fasta_path,
+    )
diff -r 508ce0649bec -r 0a3f113397b2 main_macros.xml
--- a/main_macros.xml	Thu Oct 01 20:02:43 2020 +0000
+++ b/main_macros.xml	Tue Apr 13 17:29:01 2021 +0000
@@ -1,1952 +1,1940 @@
 
-  1.0.8.2
+    1.0.8.3
 
-  
-      
-          python
-          Galaxy-ML
-          
-      
-  
+    
+        
+            Galaxy-ML
+            
+        
+    
 
-  
-    
-        
-    
-  
+    
+        
+            
+        
+    
 
 
-  
+    
 
-  
-    
-        
-            
-            
-        
-        
-            
-            
-            
-            
-                
-                    
-                    
-                
-                
-                
-                
-                
-            
-        
-        
-            
-                
-            
-        
-    
-  
+    
+        
+            
+                
+                
+            
+            
+                
+                
+                
+                
+                    
+                        
+                        
+                    
+                    
+                    
+                    
+                    
+                
+            
+            
+                
+                    
+                
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
 
-  
-  
-    
-        
-        
-        
-        
-        
-    
-  
+    
+    
+        
+            
+            
+            
+            
+            
+        
+    
 
-  
-    
-        
-        
-        
-        
-        
-    
-  
+    
+        
+            
+            
+            
+            
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-        
-        
-        
-        
-    
-  
+    
+        
+            
+            
+            
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-    
-    
-    
-    
-    
-    
-    
-        
-        
-        
-        
-        
-        
-    
-    
-  
+    
+        
+        
+        
+        
+        
+        
+        
+        
+            
+            
+            
+            
+            
+            
+        
+        
+    
 
-  
-  
-    
-  
+    
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-        
-        
-        
-    
-  
+    
+        
+            
+            
+            
+        
+    
 
-  
-    
-      
-      
-      
-    
-  
+    
+        
+            
+            
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-      
-        
-        
-        
-        
-      
-      
-      
-      
-      
-      
-      
-      
-        
-      
-    
-  
+    
+        
+            
+                
+                
+                
+                
+            
+            
+            
+            
+            
+            
+            
+            
+                
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-      
-      
-      
-    
-  
+    
+        
+            
+            
+            
+        
+    
 
-  
-  
-    
-  
-  
-  
-    
-  
+    
+    
+        
+    
+
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
 
-  
-  
-        
-  
+    
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-      
-  
+    
+        
+    
 
-  
-      
-  
+    
+        
+    
 
-  
-      
-  
+    
+        
+    
 
-  
-      
-          
-          
-      
-  
+    
+        
+            
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-      
-      
-      
-      
-      
-      
-    
-  
+    
+        
+            
+            
+            
+            
+            
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
 
-  
+    
+
+    
+        
+        
+        
+            
+        
+        
+        
+        
+            
+        
+        
+    
+
+    
+        
+            
+            
+            
+            
+            
+        
+        
+            
+        
+        
+            
+        
+        
+            
+        
+        
+            
+        
+        
+        
+    
 
-  
-    
-    
-    
-      
-    
-    
-    
-    
-      
-    
-    
-  
+    
+        
+            
+                
+                
+            
+            
+                
+                
+            
+            
+                
+            
+        
+        
+            
+                
+                
+            
+            
+                
+                
+            
+            
+                
+            
+        
+    
+
+    
+        
+        
+        
+            
+        
+        
+        
+        
+            
+        
+    
+
+    
+        
+            
+        
+    
+
+    
+        
+        
+    
+
+    
+        
+            
+            
+        
+    
+
+    
+        
+            
+                
+                
+            
+            
+                
+                    
+                
+                
+                    
+                
+            
+        
+    
+
+    
+        
+            
+            
+            
+        
+    
 
-  
-    
-      
-      
-      
-      
-      
-    
-    
-      
-    
-    
-      
-    
-    
-      
-    
-    
-      
-    
-    
-    
-  
+    
+        
+            
+        
+        
+            
+        
+        
+    
+
+    
+        
+        
+        
+            
+        
+    
+
+    
+        
+        
+    
+
+    
+        
+        
+        
+        
+        
+        
+            
+        
+    
+
+    
+    
+        
+            
+            
+                
+                
+            
+            
+                
+                
+                
+                
+            
+            
+            
+            
+            
+        
+    
+
+    
+        
+            
+            
+                
+                
+                
+                
+                
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+        
+    
+
+    
+        
+            
+            
+                
+                
+                
+                
+            
+            
+            
+            
+            
+                
+                
+                
+            
+            
+            
+            
+                
+                
+            
+            
+            
+            
+        
+    
+
+    
+        
+    
+
+    
+        
+    
 
-  
-    
-      
-          
-          
-      
-      
-        
-        
-      
-      
-          
-      
-    
-    
-      
-          
-          
-      
-      
-        
-        
-      
-      
-          
-      
-    
-  
+    
+        
+            
+            
+            
+        
+    
+
+    
+        
+    
+
+    
+        
+            
+            
+            
+            
+                
+                
+                
+                
+            
+            
+        
+    
+
+    
+        
+            
+                
+                
+                
+                
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
+
+    
+        
+            
+            
+            
+            
+            
+            
+            
+        
+    
+
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
+
+    
+        
+            
+            
+            
+            
+            
+            
+        
+    
+
+    
+        
+            
+            
+            
+            
+        
+    
+
+    
+        
+        
+        
+        
+        
+        
+        
+        
+    
 
-  
-    
-    
-    
-      
-    
-    
-    
-    
-      
-    
-  
+    
+        
+            
+        
+        
+            
+        
+    
 
-  
-    
-        
-    
-  
+    
+        
+            
+        
+    
+
+    
+        
+            
+            
+            
+            
+            
+        
+    
+
+    
+        
+            
+            
+            
+            
+            
+            
+            
+        
+    
 
-  
-    
-    
-  
-
-  
-    
-        
-        
-    
-  
+    
+        
+            
+        
+        
+            
+        
+        
+            
+        
+        
+            
+        
+        
+    
 
-  
-    
-        
-            
-            
-        
-        
-            
-                
+    
+        
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
             
-            
-                
+            
+                
+                    
+                    
+                        
+                        
+                    
+                    
+                    
+                    
+                
+            
+            
+                
+                    
+                        
+                        
+                    
+                    
+                
+            
+            
+                
+                    
+                    
+                        
+                        
+                        
+                    
+                    
+                        
+                        
+                        
+                    
+                
             
         
-    
-  
-
-  
-    
-        
-        
-        
-    
-  
+    
 
-  
-    
-        
-    
-    
-        
-    
-    
-  
-
-  
-    
-    
-    
-      
-    
-  
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
 
-  
-    
-    
-  
-
-  
-    
-    
-    
-    
-    
-    
-      
-    
-  
-
-  
-  
-    
-      
-      
-          
-          
-      
-      
-          
-          
-          
-          
-      
-      
-      
-      
-      
-    
-  
+    
+        
+            
+        
+        
+            
+            
+            
+        
+        
+            
+            
+            
+        
+        
+        
+        
+            
+        
+        
+            
+            
+            
+        
+        
+            
+            
+            
+        
+        
+            
+            
+            
+        
+        
+            
+            
+            
+        
+        
+            
+            
+        
+        
+            
+        
+        
+            
+            
+            
+        
+        
+            
+            
+            
+        
+        
+    
 
-  
-    
-        
-        
-            
-            
-            
-            
-            
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-    
-  
+    
+        
+            
+                
+                    
+                    
+                    
+                    
+                
+            
+            
+                
+                    
+                    
+                
+                
+                    
+                    
+                    
+                    
+                
+                
+                    
+                
+                
+                    
+                    
+                
+            
+        
+    
+
+    
+        
+            
+                
+            
+            
+        
+    
+
+    
+        
+    
+
+    
+        
+    
+
+    
+        
+    
 
-  
-    
-        
-        
-            
-            
-            
-            
-        
-        
-        
-        
-        
-            
-            
-            
-        
-        
-        
-        
-            
-            
-        
-        
-        
-        
-    
-  
+    
+        
+    
 
-  
-    
-  
+    
+        
+            
+                
+                
+                
+                
+            
+            
+                
+            
+            
+                
+                
+            
+            
+                
+                
+            
+            
+                
+                
+                
+                
+            
+        
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-      
-      
-      
-    
-  
-
-  
-    
-  
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
 
-  
-    
-      
-      
-      
-      
-          
-          
-          
-          
-      
-      
-    
-  
-
-  
-    
-      
-          
-          
-          
-          
-          
-      
-      
-          
-      
-      
-          
-      
-      
-          
-      
-      
-          
-      
-      
-          
-      
-    
-  
+    
+        
+            
+            
+                
+                    
+                    
+                    
+                    
+                    
+                
+                
+            
+        
+        
+            
+            
+        
+        
+            
+            
+        
+        
+            
+            
+        
+        
+            
+            
+        
+        
+            
+            
+        
+        
+            
+        
+    
 
-  
-    
-      
-      
-      
-      
-      
-      
-      
-    
-  
+    
+        
+            
+                
+                    
+                    
+                
+                
+                    
+                
+                
+                    
+                
+            
+            
+        
+    
 
-  
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-  
+    
+        
+            
+                
+                    
+                
+                
+                    
+                
+            
+            
+        
+    
 
-  
-    
-      
-      
-      
-      
-      
-      
-    
-  
+    
+        
+    
 
-  
-    
-      
-      
-      
-      
-    
-  
-
-  
-    
-    
-    
-    
-    
-    
-    
-    
-  
+    
+        
+            
+            
+        
+    
 
-  
-    
-      
-    
-    
-      
-    
-  
+    
+        
+            
+            
+        
+    
 
-  
-    
-      
-    
-  
+    
+        
+            
+            
+        
+    
 
-  
-    
-      
-      
-      
-      
-      
-    
-  
-
-  
-    
-      
-      
-      
-      
-      
-      
-      
-    
-  
+    
+        
+            
+            
+                
+                    
+                        
+                            
+                            
+                        
+                    
+                
+                
+                
+                
+                
+            
+        
+    
 
-  
-    
-        
-    
-    
-      
-    
-    
-      
-    
-    
-      
-        
-          
-          
-          
+    
+        
+        
+            
+                
+            
+            
+            
+            
+                
+            
+            
+                
+            
+            
+        
+    
+
+    
+        
+            
+                
+                    
+                
+            
+            
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
+
+    
+        
+            
+            
+            
+            
+            
         
-        
-      
-    
-    
-  
+    
 
-  
-    
-      
-        
-      
-      
-          
-      
-      
-          
-      
-      
-          
-      
-      
-          
-              
-              
-                  
-                  
-              
-              
-              
-              
-          
-      
-      
-          
-              
-                  
-                  
-              
-              
-          
-      
-      
-          
-              
-              
-                  
-                  
-                  
-              
-              
-                  
-                  
-                  
-              
-          
-      
-    
-  
-
-  
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-  
-
-  
-    
-      
-    
-    
-      
-      
-      
-    
-    
-      
-      
-      
-    
-    
-    
-    
-      
-    
-    
-      
-      
-      
-    
-    
-      
-      
-      
-    
-    
-      
-      
-      
-    
-    
-      
-      
-      
-    
-    
-      
-      
-    
-    
-      
-    
-    
-      
-      
-      
-    
-    
-      
-      
-      
-    
-    
-  
+    
+        
+        
+        
+    
 
-  
-    
-      
-        
-          
-          
-          
-          
-        
-      
-      
-        
-          
-          
-        
-        
-          
-          
-          
-          
-        
-        
-          
-        
-        
-          
-          
-        
-      
-    
-  
-
-  
-    
-      
-        
-      
-      
-    
-  
-
-  
-    
-  
-
-  
-    
-  
-
-  
-    
-  
-
-  
-    
-  
-
-  
-    
-      
-        
-        
-        
-        
-      
-      
-        
-      
-      
-        
-        
-      
-      
-        
-        
-      
-      
-        
-        
-        
-        
-      
-    
-    
-  
-
-  
-    
-  
-
-  
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-  
-
-  
-    
-      
-      
-        
-          
-          
-          
-          
-          
-        
-        
-      
-    
-    
-      
-      
-    
-    
-      
-      
-    
-    
-      
-      
-    
-    
-      
-      
-    
-    
-      
-      
-    
-    
-      
-    
-  
-
-  
-    
-      
-        
-          
-          
-        
-        
-          
-        
-        
-          
-        
-      
-      
-    
-  
-
-  
-    
-      
-        
-          
-        
-        
-          
-        
-      
-      
-    
-  
-
-  
-    
-  
-
-  
-    
-      
-      
-    
-  
+    
+        
+            
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+            
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
 
-  
-    
-      
-      
-    
-  
-
-  
-    
-      
-      
-    
-  
+    
+        
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+        
+    
 
-  
-    
-      
-      
-        
-          
-            
-              
-              
-            
-          
+    
+        
+            
+            
+            
+            
+            
+            
+            
         
-        
-        
-        
-        
-      
-    
-  
+    
 
-  
-    
-    
-      
-        
-      
-      
-      
-      
-        
-        
-      
-        
-      
-      
-    
-  
+    
+        
+            
+            
+        
+    
 
-  
-    
-      
-        
-          
-        
-      
-      
-      
-      
-        
-        
-      
-        
-      
-      
-        
-      
-    
-  
-
-  
-    
-      
-      
-      
-      
-      
-    
-  
-
-  
-    
-    
-    
-  
+    
+        
+    
 
-  
-    
-      
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-    
-  
+    
+        
+        
+            
+            
+                
+                    
+                        
+                        
+                        
+                    
+                
+                
+                    
+                        
+                            
+                            
+                        
+                    
+                
+            
+        
+    
+
+    
+        
+        
+        
+        
+        
+        
+        
+    
+
+    
+        
+        
+        
+        
+        
+        
+        
+        
+    
 
-  
-    
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-      
-    
-  
-
-  
-    
-      
-      
-      
-      
-      
-      
-      
-    
-  
-
-  
-    
-      
-      
-    
-  
-
-  
-    
-  
-
-  
-    
-    
-      
-      
-          
-            
-              
-              
-              
-            
-          
-          
-            
-              
-                
-                
-              
-            
-          
-      
-    
-  
-
-  
-      
-      
-      
-      
-      
-      
-      
-  
-
-  
-      
-      
-      
-      
-      
-      
-      
-      
-  
+    
+        
+            
+                
+                
+                
+                
+                
+                
+                
+            
+            
+        
+        
+            
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+            
+            
+        
+        
+            
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+            
+            
+        
+        
+            
+                
+                
+                
+            
+            
+        
+        
+            
+                
+                
+                
+                
+            
+            
+        
+        
+            
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+            
+            
+        
+        
+            
+                
+                
+            
+            
+        
+        
+    
 
-  
-      
-        
-          
-          
-          
-          
-          
-          
-          
-        
-        
-      
-      
-        
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-        
-        
-      
-      
-        
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-        
-        
-      
-      
-        
-          
-          
-          
-        
-        
-      
-      
-        
-          
-          
-          
-          
-        
-        
-      
-      
-        
-          
-          
-          
-          
-          
-          
-          
-          
-          
-          
-        
-        
-      
-      
-        
-          
-          
+    
+        
+            
+                
+            
+            
+        
+    
+
+    
+        
+            
+                
+                    
+                
+            
+            
+                
+                    
+                
+            
+        
+    
+
+    
+        
+            
+                
+                    
+                
+            
         
-        
-      
-      
-  
-
-  
-    
-      
-        
-      
-      
-    
-  
-
-  
-    
-      
-        
-            
-        
-      
-      
-        
-            
-        
-      
-    
-  
+    
 
-  
-    
-      
-        
-          
-        
-      
-    
-  
-
-  
-    
-      
-        
-        
-        
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-    
-  
+    
+        
+            
+                
+                
+                
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
 
-  
-    
-      
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-    
-  
+    
+        
+            
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
 
-  
-    
-      
-        
-      
-      
-        
-      
-    
-  
+    
+        
+            
+                
+            
+            
+                
+            
+        
+    
 
-  
-    
-      
-        
-        
-        
-        
-        
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
-    
-  
+        
+    
 
-  
-    
-      
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-      
-        
-      
-    
-  
+    
+        
+            
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-        
-            
-                
+    
+        
+            
+                
+                    
+                
+            
+            
+                
+                    
+                
             
-        
-        
-            
-                
-            
-        
-    
-  
+        
+    
 
-  
-    
-  
+    
+        
+    
 
-  
-    
-        
-            
-            
-        
-        
-            
-        
-        
-            
-        
-    
-  
+    
+        
+            
+                
+                
+            
+            
+                
+            
+            
+                
+            
+        
+    
 
-  
-    
-    
-  
+    
+        
+        
+    
 
-  
+    
 
-  
-    
-      
-          selected_tasks['selected_task'] == 'load'
-      
-      
-          selected_tasks['selected_task'] == 'train'
-      
-    
-  
+    
+        
+            
+                selected_tasks['selected_task'] == 'load'
+            
+            
+                selected_tasks['selected_task'] == 'train'
+            
+        
+    
 
-  
-  
-    
-        10.5281/zenodo.15094
-    
-  
+    
+    
+        
+            10.5281/zenodo.15094
+        
+    
 
-  
-    
-        
-          @article{scikit-learn,
-            title={Scikit-learn: Machine Learning in {P}ython},
-            author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
+    
+        
+            
+          @article{scikit-learn, title={Scikit-learn: Machine Learning in {P}ython}, author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
                     and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
                     and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
-                    Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
-            journal={Journal of Machine Learning Research},
-            volume={12},
-            pages={2825--2830},
-            year={2011}
+                    Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, journal={Journal of Machine Learning Research}, volume={12}, pages={2825--2830}, year={2011}
           }
-        
-        
-    
-  
+            
+            
+        
+    
 
-  
-    
-        
+    
+        
+            
           @Misc{,
           author =    {Eric Jones and Travis Oliphant and Pearu Peterson and others},
           title =     {{SciPy}: Open source scientific tools for {Python}},
@@ -1954,12 +1942,12 @@
           url = "http://www.scipy.org/",
           note = {[Online; accessed 2016-04-09]}
         }
-        
-    
-  
+            
+        
+    
 
-  
-    
+    
+        
       @article{DBLP:journals/corr/abs-1711-08477,
         author    = {Ryan J. Urbanowicz and
                     Randal S. Olson and
@@ -1977,11 +1965,11 @@
         biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1711-08477},
         bibsource = {dblp computer science bibliography, https://dblp.org}
       }
-    
-  
+        
+    
 
-  
-    
+    
+        
       @inproceedings{Chen:2016:XST:2939672.2939785,
         author = {Chen, Tianqi and Guestrin, Carlos},
         title = {{XGBoost}: A Scalable Tree Boosting System},
@@ -1999,11 +1987,11 @@
         address = {New York, NY, USA},
         keywords = {large-scale machine learning},
       }
-    
-  
+        
+    
 
-  
-    
+    
+        
       @article{JMLR:v18:16-365,
         author  = {Guillaume  Lema{{\^i}}tre and Fernando Nogueira and Christos K. Aridas},
         title   = {Imbalanced-learn: A Python Toolbox to Tackle the Curse of Imbalanced Datasets in Machine Learning},
@@ -2014,22 +2002,14 @@
         pages   = {1-5},
         url     = {http://jmlr.org/papers/v18/16-365.html}
       }
-    
-  
+        
+    
 
-  
-    
-      @article{chen2019selene,
-        title={Selene: a PyTorch-based deep learning library for sequence data},
-        author={Chen, Kathleen M and Cofer, Evan M and Zhou, Jian and Troyanskaya, Olga G},
-        journal={Nature methods},
-        volume={16},
-        number={4},
-        pages={315},
-        year={2019},
-        publisher={Nature Publishing Group}
+    
+        
+      @article{chen2019selene, title={Selene: a PyTorch-based deep learning library for sequence data}, author={Chen, Kathleen M and Cofer, Evan M and Zhou, Jian and Troyanskaya, Olga G}, journal={Nature methods}, volume={16}, number={4}, pages={315}, year={2019}, publisher={Nature Publishing Group}
       }
-    
-  
+        
+    
 
 
diff -r 508ce0649bec -r 0a3f113397b2 ml_visualization_ex.py
--- a/ml_visualization_ex.py	Thu Oct 01 20:02:43 2020 +0000
+++ b/ml_visualization_ex.py	Tue Apr 13 17:29:01 2021 +0000
@@ -22,16 +22,16 @@
 
 # plotly default colors
 default_colors = [
-    '#1f77b4',  # muted blue
-    '#ff7f0e',  # safety orange
-    '#2ca02c',  # cooked asparagus green
-    '#d62728',  # brick red
-    '#9467bd',  # muted purple
-    '#8c564b',  # chestnut brown
-    '#e377c2',  # raspberry yogurt pink
-    '#7f7f7f',  # middle gray
-    '#bcbd22',  # curry yellow-green
-    '#17becf'   # blue-teal
+    "#1f77b4",  # muted blue
+    "#ff7f0e",  # safety orange
+    "#2ca02c",  # cooked asparagus green
+    "#d62728",  # brick red
+    "#9467bd",  # muted purple
+    "#8c564b",  # chestnut brown
+    "#e377c2",  # raspberry yogurt pink
+    "#7f7f7f",  # middle gray
+    "#bcbd22",  # curry yellow-green
+    "#17becf",  # blue-teal
 ]
 
 
@@ -52,46 +52,31 @@
         y_true = df1.iloc[:, idx].values
         y_score = df2.iloc[:, idx].values
 
-        precision, recall, _ = precision_recall_curve(
-            y_true, y_score, pos_label=pos_label)
-        ap = average_precision_score(
-            y_true, y_score, pos_label=pos_label or 1)
+        precision, recall, _ = precision_recall_curve(y_true, y_score, pos_label=pos_label)
+        ap = average_precision_score(y_true, y_score, pos_label=pos_label or 1)
 
         trace = go.Scatter(
             x=recall,
             y=precision,
-            mode='lines',
-            marker=dict(
-                color=default_colors[idx % len(default_colors)]
-            ),
-            name='%s (area = %.3f)' % (idx, ap)
+            mode="lines",
+            marker=dict(color=default_colors[idx % len(default_colors)]),
+            name="%s (area = %.3f)" % (idx, ap),
         )
         data.append(trace)
 
     layout = go.Layout(
-        xaxis=dict(
-            title='Recall',
-            linecolor='lightslategray',
-            linewidth=1
-        ),
-        yaxis=dict(
-            title='Precision',
-            linecolor='lightslategray',
-            linewidth=1
-        ),
+        xaxis=dict(title="Recall", linecolor="lightslategray", linewidth=1),
+        yaxis=dict(title="Precision", linecolor="lightslategray", linewidth=1),
         title=dict(
-            text=title or 'Precision-Recall Curve',
+            text=title or "Precision-Recall Curve",
             x=0.5,
             y=0.92,
-            xanchor='center',
-            yanchor='top'
+            xanchor="center",
+            yanchor="top",
         ),
-        font=dict(
-            family="sans-serif",
-            size=11
-        ),
+        font=dict(family="sans-serif", size=11),
         # control backgroud colors
-        plot_bgcolor='rgba(255,255,255,0)'
+        plot_bgcolor="rgba(255,255,255,0)",
     )
     """
     legend=dict(
@@ -112,45 +97,47 @@
 
     plotly.offline.plot(fig, filename="output.html", auto_open=False)
     # to be discovered by `from_work_dir`
-    os.rename('output.html', 'output')
+    os.rename("output.html", "output")
 
 
 def visualize_pr_curve_matplotlib(df1, df2, pos_label, title=None):
-    """visualize pr-curve using matplotlib and output svg image
-    """
+    """visualize pr-curve using matplotlib and output svg image"""
     backend = matplotlib.get_backend()
     if "inline" not in backend:
         matplotlib.use("SVG")
-    plt.style.use('seaborn-colorblind')
+    plt.style.use("seaborn-colorblind")
     plt.figure()
 
     for idx in range(df1.shape[1]):
         y_true = df1.iloc[:, idx].values
         y_score = df2.iloc[:, idx].values
 
-        precision, recall, _ = precision_recall_curve(
-            y_true, y_score, pos_label=pos_label)
-        ap = average_precision_score(
-            y_true, y_score, pos_label=pos_label or 1)
+        precision, recall, _ = precision_recall_curve(y_true, y_score, pos_label=pos_label)
+        ap = average_precision_score(y_true, y_score, pos_label=pos_label or 1)
 
-        plt.step(recall, precision, 'r-', color="black", alpha=0.3,
-                 lw=1, where="post", label='%s (area = %.3f)' % (idx, ap))
+        plt.step(
+            recall,
+            precision,
+            "r-",
+            color="black",
+            alpha=0.3,
+            lw=1,
+            where="post",
+            label="%s (area = %.3f)" % (idx, ap),
+        )
 
     plt.xlim([0.0, 1.0])
     plt.ylim([0.0, 1.05])
-    plt.xlabel('Recall')
-    plt.ylabel('Precision')
-    title = title or 'Precision-Recall Curve'
+    plt.xlabel("Recall")
+    plt.ylabel("Precision")
+    title = title or "Precision-Recall Curve"
     plt.title(title)
     folder = os.getcwd()
     plt.savefig(os.path.join(folder, "output.svg"), format="svg")
-    os.rename(os.path.join(folder, "output.svg"),
-              os.path.join(folder, "output"))
+    os.rename(os.path.join(folder, "output.svg"), os.path.join(folder, "output"))
 
 
-def visualize_roc_curve_plotly(df1, df2, pos_label,
-                               drop_intermediate=True,
-                               title=None):
+def visualize_roc_curve_plotly(df1, df2, pos_label, drop_intermediate=True, title=None):
     """output roc-curve in html using plotly
 
     df1 : pandas.DataFrame
@@ -169,45 +156,31 @@
         y_true = df1.iloc[:, idx].values
         y_score = df2.iloc[:, idx].values
 
-        fpr, tpr, _ = roc_curve(y_true, y_score, pos_label=pos_label,
-                                drop_intermediate=drop_intermediate)
+        fpr, tpr, _ = roc_curve(y_true, y_score, pos_label=pos_label, drop_intermediate=drop_intermediate)
         roc_auc = auc(fpr, tpr)
 
         trace = go.Scatter(
             x=fpr,
             y=tpr,
-            mode='lines',
-            marker=dict(
-                color=default_colors[idx % len(default_colors)]
-            ),
-            name='%s (area = %.3f)' % (idx, roc_auc)
+            mode="lines",
+            marker=dict(color=default_colors[idx % len(default_colors)]),
+            name="%s (area = %.3f)" % (idx, roc_auc),
         )
         data.append(trace)
 
     layout = go.Layout(
-        xaxis=dict(
-            title='False Positive Rate',
-            linecolor='lightslategray',
-            linewidth=1
-        ),
-        yaxis=dict(
-            title='True Positive Rate',
-            linecolor='lightslategray',
-            linewidth=1
-        ),
+        xaxis=dict(title="False Positive Rate", linecolor="lightslategray", linewidth=1),
+        yaxis=dict(title="True Positive Rate", linecolor="lightslategray", linewidth=1),
         title=dict(
-            text=title or 'Receiver Operating Characteristic (ROC) Curve',
+            text=title or "Receiver Operating Characteristic (ROC) Curve",
             x=0.5,
             y=0.92,
-            xanchor='center',
-            yanchor='top'
+            xanchor="center",
+            yanchor="top",
         ),
-        font=dict(
-            family="sans-serif",
-            size=11
-        ),
+        font=dict(family="sans-serif", size=11),
         # control backgroud colors
-        plot_bgcolor='rgba(255,255,255,0)'
+        plot_bgcolor="rgba(255,255,255,0)",
     )
     """
     # legend=dict(
@@ -229,66 +202,84 @@
 
     plotly.offline.plot(fig, filename="output.html", auto_open=False)
     # to be discovered by `from_work_dir`
-    os.rename('output.html', 'output')
+    os.rename("output.html", "output")
 
 
-def visualize_roc_curve_matplotlib(df1, df2, pos_label,
-                                   drop_intermediate=True,
-                                   title=None):
-    """visualize roc-curve using matplotlib and output svg image
-    """
+def visualize_roc_curve_matplotlib(df1, df2, pos_label, drop_intermediate=True, title=None):
+    """visualize roc-curve using matplotlib and output svg image"""
     backend = matplotlib.get_backend()
     if "inline" not in backend:
         matplotlib.use("SVG")
-    plt.style.use('seaborn-colorblind')
+    plt.style.use("seaborn-colorblind")
     plt.figure()
 
     for idx in range(df1.shape[1]):
         y_true = df1.iloc[:, idx].values
         y_score = df2.iloc[:, idx].values
 
-        fpr, tpr, _ = roc_curve(y_true, y_score, pos_label=pos_label,
-                                drop_intermediate=drop_intermediate)
+        fpr, tpr, _ = roc_curve(y_true, y_score, pos_label=pos_label, drop_intermediate=drop_intermediate)
         roc_auc = auc(fpr, tpr)
 
-        plt.step(fpr, tpr, 'r-', color="black", alpha=0.3, lw=1,
-                 where="post", label='%s (area = %.3f)' % (idx, roc_auc))
+        plt.step(
+            fpr,
+            tpr,
+            "r-",
+            color="black",
+            alpha=0.3,
+            lw=1,
+            where="post",
+            label="%s (area = %.3f)" % (idx, roc_auc),
+        )
 
     plt.xlim([0.0, 1.0])
     plt.ylim([0.0, 1.05])
-    plt.xlabel('False Positive Rate')
-    plt.ylabel('True Positive Rate')
-    title = title or 'Receiver Operating Characteristic (ROC) Curve'
+    plt.xlabel("False Positive Rate")
+    plt.ylabel("True Positive Rate")
+    title = title or "Receiver Operating Characteristic (ROC) Curve"
     plt.title(title)
     folder = os.getcwd()
     plt.savefig(os.path.join(folder, "output.svg"), format="svg")
-    os.rename(os.path.join(folder, "output.svg"),
-              os.path.join(folder, "output"))
+    os.rename(os.path.join(folder, "output.svg"), os.path.join(folder, "output"))
 
 
 def get_dataframe(file_path, plot_selection, header_name, column_name):
-    header = 'infer' if plot_selection[header_name] else None
+    header = "infer" if plot_selection[header_name] else None
     column_option = plot_selection[column_name]["selected_column_selector_option"]
-    if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
+    if column_option in [
+        "by_index_number",
+        "all_but_by_index_number",
+        "by_header_name",
+        "all_but_by_header_name",
+    ]:
         col = plot_selection[column_name]["col1"]
     else:
         col = None
     _, input_df = read_columns(file_path, c=col,
-                                   c_option=column_option,
-                                   return_df=True,
-                                   sep='\t', header=header,
-                                   parse_dates=True)
+                               c_option=column_option,
+                               return_df=True,
+                               sep='\t', header=header,
+                               parse_dates=True)
     return input_df
 
 
-def main(inputs, infile_estimator=None, infile1=None,
-         infile2=None, outfile_result=None,
-         outfile_object=None, groups=None,
-         ref_seq=None, intervals=None,
-         targets=None, fasta_path=None,
-         model_config=None, true_labels=None,
-         predicted_labels=None, plot_color=None,
-         title=None):
+def main(
+    inputs,
+    infile_estimator=None,
+    infile1=None,
+    infile2=None,
+    outfile_result=None,
+    outfile_object=None,
+    groups=None,
+    ref_seq=None,
+    intervals=None,
+    targets=None,
+    fasta_path=None,
+    model_config=None,
+    true_labels=None,
+    predicted_labels=None,
+    plot_color=None,
+    title=None,
+):
     """
     Parameter
     ---------
@@ -341,34 +332,39 @@
     title : str, default is None
         Title of the confusion matrix heatmap
     """
-    warnings.simplefilter('ignore')
+    warnings.simplefilter("ignore")
 
-    with open(inputs, 'r') as param_handler:
+    with open(inputs, "r") as param_handler:
         params = json.load(param_handler)
 
-    title = params['plotting_selection']['title'].strip()
-    plot_type = params['plotting_selection']['plot_type']
-    plot_format = params['plotting_selection']['plot_format']
+    title = params["plotting_selection"]["title"].strip()
+    plot_type = params["plotting_selection"]["plot_type"]
+    plot_format = params["plotting_selection"]["plot_format"]
 
-    if plot_type == 'feature_importances':
-        with open(infile_estimator, 'rb') as estimator_handler:
+    if plot_type == "feature_importances":
+        with open(infile_estimator, "rb") as estimator_handler:
             estimator = load_model(estimator_handler)
 
-        column_option = (params['plotting_selection']
-                               ['column_selector_options']
-                               ['selected_column_selector_option'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = (params['plotting_selection']
-                       ['column_selector_options']['col1'])
+        column_option = params["plotting_selection"]["column_selector_options"]["selected_column_selector_option"]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = params["plotting_selection"]["column_selector_options"]["col1"]
         else:
             c = None
 
-        _, input_df = read_columns(infile1, c=c,
-                                   c_option=column_option,
-                                   return_df=True,
-                                   sep='\t', header='infer',
-                                   parse_dates=True)
+        _, input_df = read_columns(
+            infile1,
+            c=c,
+            c_option=column_option,
+            return_df=True,
+            sep="\t",
+            header="infer",
+            parse_dates=True,
+        )
 
         feature_names = input_df.columns.values
 
@@ -379,16 +375,14 @@
                     feature_names = feature_names[mask]
             estimator = estimator.steps[-1][-1]
 
-        if hasattr(estimator, 'coef_'):
+        if hasattr(estimator, "coef_"):
             coefs = estimator.coef_
         else:
-            coefs = getattr(estimator, 'feature_importances_', None)
+            coefs = getattr(estimator, "feature_importances_", None)
         if coefs is None:
-            raise RuntimeError('The classifier does not expose '
-                               '"coef_" or "feature_importances_" '
-                               'attributes')
+            raise RuntimeError("The classifier does not expose " '"coef_" or "feature_importances_" ' "attributes")
 
-        threshold = params['plotting_selection']['threshold']
+        threshold = params["plotting_selection"]["threshold"]
         if threshold is not None:
             mask = (coefs > threshold) | (coefs < -threshold)
             coefs = coefs[mask]
@@ -397,80 +391,74 @@
         # sort
         indices = np.argsort(coefs)[::-1]
 
-        trace = go.Bar(x=feature_names[indices],
-                       y=coefs[indices])
+        trace = go.Bar(x=feature_names[indices], y=coefs[indices])
         layout = go.Layout(title=title or "Feature Importances")
         fig = go.Figure(data=[trace], layout=layout)
 
-        plotly.offline.plot(fig, filename="output.html",
-                            auto_open=False)
+        plotly.offline.plot(fig, filename="output.html", auto_open=False)
         # to be discovered by `from_work_dir`
-        os.rename('output.html', 'output')
+        os.rename("output.html", "output")
 
         return 0
 
-    elif plot_type in ('pr_curve', 'roc_curve'):
-        df1 = pd.read_csv(infile1, sep='\t', header='infer')
-        df2 = pd.read_csv(infile2, sep='\t', header='infer').astype(np.float32)
+    elif plot_type in ("pr_curve", "roc_curve"):
+        df1 = pd.read_csv(infile1, sep="\t", header="infer")
+        df2 = pd.read_csv(infile2, sep="\t", header="infer").astype(np.float32)
 
-        minimum = params['plotting_selection']['report_minimum_n_positives']
+        minimum = params["plotting_selection"]["report_minimum_n_positives"]
         # filter out columns whose n_positives is beblow the threhold
         if minimum:
             mask = df1.sum(axis=0) >= minimum
             df1 = df1.loc[:, mask]
             df2 = df2.loc[:, mask]
 
-        pos_label = params['plotting_selection']['pos_label'].strip() \
-            or None
+        pos_label = params["plotting_selection"]["pos_label"].strip() or None
 
-        if plot_type == 'pr_curve':
-            if plot_format == 'plotly_html':
+        if plot_type == "pr_curve":
+            if plot_format == "plotly_html":
                 visualize_pr_curve_plotly(df1, df2, pos_label, title=title)
             else:
                 visualize_pr_curve_matplotlib(df1, df2, pos_label, title)
-        else:          # 'roc_curve'
-            drop_intermediate = (params['plotting_selection']
-                                       ['drop_intermediate'])
-            if plot_format == 'plotly_html':
-                visualize_roc_curve_plotly(df1, df2, pos_label,
-                                           drop_intermediate=drop_intermediate,
-                                           title=title)
+        else:  # 'roc_curve'
+            drop_intermediate = params["plotting_selection"]["drop_intermediate"]
+            if plot_format == "plotly_html":
+                visualize_roc_curve_plotly(
+                    df1,
+                    df2,
+                    pos_label,
+                    drop_intermediate=drop_intermediate,
+                    title=title,
+                )
             else:
                 visualize_roc_curve_matplotlib(
-                    df1, df2, pos_label,
+                    df1,
+                    df2,
+                    pos_label,
                     drop_intermediate=drop_intermediate,
-                    title=title)
+                    title=title,
+                )
 
         return 0
 
-    elif plot_type == 'rfecv_gridscores':
-        input_df = pd.read_csv(infile1, sep='\t', header='infer')
+    elif plot_type == "rfecv_gridscores":
+        input_df = pd.read_csv(infile1, sep="\t", header="infer")
         scores = input_df.iloc[:, 0]
-        steps = params['plotting_selection']['steps'].strip()
+        steps = params["plotting_selection"]["steps"].strip()
         steps = safe_eval(steps)
 
         data = go.Scatter(
             x=list(range(len(scores))),
             y=scores,
             text=[str(_) for _ in steps] if steps else None,
-            mode='lines'
+            mode="lines",
         )
         layout = go.Layout(
             xaxis=dict(title="Number of features selected"),
             yaxis=dict(title="Cross validation score"),
-            title=dict(
-                text=title or None,
-                x=0.5,
-                y=0.92,
-                xanchor='center',
-                yanchor='top'
-            ),
-            font=dict(
-                family="sans-serif",
-                size=11
-            ),
+            title=dict(text=title or None, x=0.5, y=0.92, xanchor="center", yanchor="top"),
+            font=dict(family="sans-serif", size=11),
             # control backgroud colors
-            plot_bgcolor='rgba(255,255,255,0)'
+            plot_bgcolor="rgba(255,255,255,0)",
         )
         """
         # legend=dict(
@@ -489,55 +477,43 @@
         """
 
         fig = go.Figure(data=[data], layout=layout)
-        plotly.offline.plot(fig, filename="output.html",
-                            auto_open=False)
+        plotly.offline.plot(fig, filename="output.html", auto_open=False)
         # to be discovered by `from_work_dir`
-        os.rename('output.html', 'output')
+        os.rename("output.html", "output")
 
         return 0
 
-    elif plot_type == 'learning_curve':
-        input_df = pd.read_csv(infile1, sep='\t', header='infer')
-        plot_std_err = params['plotting_selection']['plot_std_err']
+    elif plot_type == "learning_curve":
+        input_df = pd.read_csv(infile1, sep="\t", header="infer")
+        plot_std_err = params["plotting_selection"]["plot_std_err"]
         data1 = go.Scatter(
-            x=input_df['train_sizes_abs'],
-            y=input_df['mean_train_scores'],
-            error_y=dict(
-                array=input_df['std_train_scores']
-            ) if plot_std_err else None,
-            mode='lines',
+            x=input_df["train_sizes_abs"],
+            y=input_df["mean_train_scores"],
+            error_y=dict(array=input_df["std_train_scores"]) if plot_std_err else None,
+            mode="lines",
             name="Train Scores",
         )
         data2 = go.Scatter(
-            x=input_df['train_sizes_abs'],
-            y=input_df['mean_test_scores'],
-            error_y=dict(
-                array=input_df['std_test_scores']
-            ) if plot_std_err else None,
-            mode='lines',
+            x=input_df["train_sizes_abs"],
+            y=input_df["mean_test_scores"],
+            error_y=dict(array=input_df["std_test_scores"]) if plot_std_err else None,
+            mode="lines",
             name="Test Scores",
         )
         layout = dict(
-            xaxis=dict(
-                title='No. of samples'
-            ),
-            yaxis=dict(
-                title='Performance Score'
-            ),
+            xaxis=dict(title="No. of samples"),
+            yaxis=dict(title="Performance Score"),
             # modify these configurations to customize image
             title=dict(
-                text=title or 'Learning Curve',
+                text=title or "Learning Curve",
                 x=0.5,
                 y=0.92,
-                xanchor='center',
-                yanchor='top'
+                xanchor="center",
+                yanchor="top",
             ),
-            font=dict(
-                family="sans-serif",
-                size=11
-            ),
+            font=dict(family="sans-serif", size=11),
             # control backgroud colors
-            plot_bgcolor='rgba(255,255,255,0)'
+            plot_bgcolor="rgba(255,255,255,0)",
         )
         """
         # legend=dict(
@@ -556,27 +532,26 @@
         """
 
         fig = go.Figure(data=[data1, data2], layout=layout)
-        plotly.offline.plot(fig, filename="output.html",
-                            auto_open=False)
+        plotly.offline.plot(fig, filename="output.html", auto_open=False)
         # to be discovered by `from_work_dir`
-        os.rename('output.html', 'output')
+        os.rename("output.html", "output")
 
         return 0
 
-    elif plot_type == 'keras_plot_model':
-        with open(model_config, 'r') as f:
+    elif plot_type == "keras_plot_model":
+        with open(model_config, "r") as f:
             model_str = f.read()
         model = model_from_json(model_str)
         plot_model(model, to_file="output.png")
-        os.rename('output.png', 'output')
+        os.rename("output.png", "output")
 
         return 0
 
-    elif plot_type == 'classification_confusion_matrix':
+    elif plot_type == "classification_confusion_matrix":
         plot_selection = params["plotting_selection"]
         input_true = get_dataframe(true_labels, plot_selection, "header_true", "column_selector_options_true")
-        header_predicted = 'infer' if plot_selection["header_predicted"] else None
-        input_predicted = pd.read_csv(predicted_labels, sep='\t', parse_dates=True, header=header_predicted)
+        header_predicted = "infer" if plot_selection["header_predicted"] else None
+        input_predicted = pd.read_csv(predicted_labels, sep="\t", parse_dates=True, header=header_predicted)
         true_classes = input_true.iloc[:, -1].copy()
         predicted_classes = input_predicted.iloc[:, -1].copy()
         axis_labels = list(set(true_classes))
@@ -586,15 +561,15 @@
         for i in range(len(c_matrix)):
             for j in range(len(c_matrix)):
                 ax.text(j, i, c_matrix[i, j], ha="center", va="center", color="k")
-        ax.set_ylabel('True class labels')
-        ax.set_xlabel('Predicted class labels')
+        ax.set_ylabel("True class labels")
+        ax.set_xlabel("Predicted class labels")
         ax.set_title(title)
         ax.set_xticks(axis_labels)
         ax.set_yticks(axis_labels)
         fig.colorbar(im, ax=ax)
         fig.tight_layout()
         plt.savefig("output.png", dpi=125)
-        os.rename('output.png', 'output')
+        os.rename("output.png", "output")
 
         return 0
 
@@ -603,7 +578,7 @@
     # fig.write_image("image.pdf", format='pdf', width=340*2, height=226*2)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-e", "--estimator", dest="infile_estimator")
@@ -623,11 +598,21 @@
     aparser.add_argument("-pt", "--title", dest="title")
     args = aparser.parse_args()
 
-    main(args.inputs, args.infile_estimator, args.infile1, args.infile2,
-         args.outfile_result, outfile_object=args.outfile_object,
-         groups=args.groups, ref_seq=args.ref_seq, intervals=args.intervals,
-         targets=args.targets, fasta_path=args.fasta_path,
-         model_config=args.model_config, true_labels=args.true_labels,
-         predicted_labels=args.predicted_labels,
-         plot_color=args.plot_color,
-         title=args.title)
+    main(
+        args.inputs,
+        args.infile_estimator,
+        args.infile1,
+        args.infile2,
+        args.outfile_result,
+        outfile_object=args.outfile_object,
+        groups=args.groups,
+        ref_seq=args.ref_seq,
+        intervals=args.intervals,
+        targets=args.targets,
+        fasta_path=args.fasta_path,
+        model_config=args.model_config,
+        true_labels=args.true_labels,
+        predicted_labels=args.predicted_labels,
+        plot_color=args.plot_color,
+        title=args.title,
+    )
diff -r 508ce0649bec -r 0a3f113397b2 model_prediction.py
--- a/model_prediction.py	Thu Oct 01 20:02:43 2020 +0000
+++ b/model_prediction.py	Tue Apr 13 17:29:01 2021 +0000
@@ -1,23 +1,29 @@
 import argparse
 import json
+import warnings
+
 import numpy as np
 import pandas as pd
-import warnings
-
 from scipy.io import mmread
 from sklearn.pipeline import Pipeline
 
-from galaxy_ml.utils import (load_model, read_columns,
-                             get_module, try_get_attr)
+from galaxy_ml.utils import (get_module, load_model,
+                             read_columns, try_get_attr)
+
+
+N_JOBS = int(__import__("os").environ.get("GALAXY_SLOTS", 1))
 
 
-N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
-
-
-def main(inputs, infile_estimator, outfile_predict,
-         infile_weights=None, infile1=None,
-         fasta_path=None, ref_seq=None,
-         vcf_path=None):
+def main(
+    inputs,
+    infile_estimator,
+    outfile_predict,
+    infile_weights=None,
+    infile1=None,
+    fasta_path=None,
+    ref_seq=None,
+    vcf_path=None,
+):
     """
     Parameter
     ---------
@@ -45,96 +51,94 @@
     vcf_path : str
         File path to dataset containing variants info.
     """
-    warnings.filterwarnings('ignore')
+    warnings.filterwarnings("ignore")
 
-    with open(inputs, 'r') as param_handler:
+    with open(inputs, "r") as param_handler:
         params = json.load(param_handler)
 
     # load model
-    with open(infile_estimator, 'rb') as est_handler:
+    with open(infile_estimator, "rb") as est_handler:
         estimator = load_model(est_handler)
 
     main_est = estimator
     if isinstance(estimator, Pipeline):
         main_est = estimator.steps[-1][-1]
-    if hasattr(main_est, 'config') and hasattr(main_est, 'load_weights'):
-        if not infile_weights or infile_weights == 'None':
-            raise ValueError("The selected model skeleton asks for weights, "
-                             "but dataset for weights wan not selected!")
+    if hasattr(main_est, "config") and hasattr(main_est, "load_weights"):
+        if not infile_weights or infile_weights == "None":
+            raise ValueError(
+                "The selected model skeleton asks for weights, " "but dataset for weights wan not selected!"
+            )
         main_est.load_weights(infile_weights)
 
     # handle data input
-    input_type = params['input_options']['selected_input']
+    input_type = params["input_options"]["selected_input"]
     # tabular input
-    if input_type == 'tabular':
-        header = 'infer' if params['input_options']['header1'] else None
-        column_option = (params['input_options']
-                               ['column_selector_options_1']
-                               ['selected_column_selector_option'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = params['input_options']['column_selector_options_1']['col1']
+    if input_type == "tabular":
+        header = "infer" if params["input_options"]["header1"] else None
+        column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = params["input_options"]["column_selector_options_1"]["col1"]
         else:
             c = None
 
-        df = pd.read_csv(infile1, sep='\t', header=header, parse_dates=True)
+        df = pd.read_csv(infile1, sep="\t", header=header, parse_dates=True)
 
         X = read_columns(df, c=c, c_option=column_option).astype(float)
 
-        if params['method'] == 'predict':
+        if params["method"] == "predict":
             preds = estimator.predict(X)
         else:
             preds = estimator.predict_proba(X)
 
     # sparse input
-    elif input_type == 'sparse':
-        X = mmread(open(infile1, 'r'))
-        if params['method'] == 'predict':
+    elif input_type == "sparse":
+        X = mmread(open(infile1, "r"))
+        if params["method"] == "predict":
             preds = estimator.predict(X)
         else:
             preds = estimator.predict_proba(X)
 
     # fasta input
-    elif input_type == 'seq_fasta':
-        if not hasattr(estimator, 'data_batch_generator'):
+    elif input_type == "seq_fasta":
+        if not hasattr(estimator, "data_batch_generator"):
             raise ValueError(
                 "To do prediction on sequences in fasta input, "
                 "the estimator must be a `KerasGBatchClassifier`"
-                "equipped with data_batch_generator!")
-        pyfaidx = get_module('pyfaidx')
+                "equipped with data_batch_generator!"
+            )
+        pyfaidx = get_module("pyfaidx")
         sequences = pyfaidx.Fasta(fasta_path)
         n_seqs = len(sequences.keys())
         X = np.arange(n_seqs)[:, np.newaxis]
         seq_length = estimator.data_batch_generator.seq_length
-        batch_size = getattr(estimator, 'batch_size', 32)
+        batch_size = getattr(estimator, "batch_size", 32)
         steps = (n_seqs + batch_size - 1) // batch_size
 
-        seq_type = params['input_options']['seq_type']
-        klass = try_get_attr(
-            'galaxy_ml.preprocessors', seq_type)
+        seq_type = params["input_options"]["seq_type"]
+        klass = try_get_attr("galaxy_ml.preprocessors", seq_type)
 
-        pred_data_generator = klass(
-            fasta_path, seq_length=seq_length)
+        pred_data_generator = klass(fasta_path, seq_length=seq_length)
 
-        if params['method'] == 'predict':
-            preds = estimator.predict(
-                X, data_generator=pred_data_generator, steps=steps)
+        if params["method"] == "predict":
+            preds = estimator.predict(X, data_generator=pred_data_generator, steps=steps)
         else:
-            preds = estimator.predict_proba(
-                X, data_generator=pred_data_generator, steps=steps)
+            preds = estimator.predict_proba(X, data_generator=pred_data_generator, steps=steps)
 
     # vcf input
-    elif input_type == 'variant_effect':
-        klass = try_get_attr('galaxy_ml.preprocessors',
-                             'GenomicVariantBatchGenerator')
+    elif input_type == "variant_effect":
+        klass = try_get_attr("galaxy_ml.preprocessors", "GenomicVariantBatchGenerator")
 
-        options = params['input_options']
-        options.pop('selected_input')
-        if options['blacklist_regions'] == 'none':
-            options['blacklist_regions'] = None
+        options = params["input_options"]
+        options.pop("selected_input")
+        if options["blacklist_regions"] == "none":
+            options["blacklist_regions"] = None
 
-        pred_data_generator = klass(
-            ref_genome_path=ref_seq, vcf_path=vcf_path, **options)
+        pred_data_generator = klass(ref_genome_path=ref_seq, vcf_path=vcf_path, **options)
 
         pred_data_generator.set_processing_attrs()
 
@@ -143,9 +147,8 @@
         # predict 1600 sample at once then write to file
         gen_flow = pred_data_generator.flow(batch_size=1600)
 
-        file_writer = open(outfile_predict, 'w')
-        header_row = '\t'.join(['chrom', 'pos', 'name', 'ref',
-                                'alt', 'strand'])
+        file_writer = open(outfile_predict, "w")
+        header_row = "\t".join(["chrom", "pos", "name", "ref", "alt", "strand"])
         file_writer.write(header_row)
         header_done = False
 
@@ -155,23 +158,24 @@
         try:
             while steps_done < len(gen_flow):
                 index_array = next(gen_flow.index_generator)
-                batch_X = gen_flow._get_batches_of_transformed_samples(
-                    index_array)
+                batch_X = gen_flow._get_batches_of_transformed_samples(index_array)
 
-                if params['method'] == 'predict':
+                if params["method"] == "predict":
                     batch_preds = estimator.predict(
                         batch_X,
                         # The presence of `pred_data_generator` below is to
                         # override model carrying data_generator if there
                         # is any.
-                        data_generator=pred_data_generator)
+                        data_generator=pred_data_generator,
+                    )
                 else:
                     batch_preds = estimator.predict_proba(
                         batch_X,
                         # The presence of `pred_data_generator` below is to
                         # override model carrying data_generator if there
                         # is any.
-                        data_generator=pred_data_generator)
+                        data_generator=pred_data_generator,
+                    )
 
                 if batch_preds.ndim == 1:
                     batch_preds = batch_preds[:, np.newaxis]
@@ -181,12 +185,12 @@
 
                 if not header_done:
                     heads = np.arange(batch_preds.shape[-1]).astype(str)
-                    heads_str = '\t'.join(heads)
+                    heads_str = "\t".join(heads)
                     file_writer.write("\t%s\n" % heads_str)
                     header_done = True
 
                 for row in batch_out:
-                    row_str = '\t'.join(row)
+                    row_str = "\t".join(row)
                     file_writer.write("%s\n" % row_str)
 
                 steps_done += 1
@@ -200,14 +204,14 @@
 
     # output
     if len(preds.shape) == 1:
-        rval = pd.DataFrame(preds, columns=['Predicted'])
+        rval = pd.DataFrame(preds, columns=["Predicted"])
     else:
         rval = pd.DataFrame(preds)
 
-    rval.to_csv(outfile_predict, sep='\t', header=True, index=False)
+    rval.to_csv(outfile_predict, sep="\t", header=True, index=False)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-e", "--infile_estimator", dest="infile_estimator")
@@ -219,7 +223,13 @@
     aparser.add_argument("-v", "--vcf_path", dest="vcf_path")
     args = aparser.parse_args()
 
-    main(args.inputs, args.infile_estimator, args.outfile_predict,
-         infile_weights=args.infile_weights, infile1=args.infile1,
-         fasta_path=args.fasta_path, ref_seq=args.ref_seq,
-         vcf_path=args.vcf_path)
+    main(
+        args.inputs,
+        args.infile_estimator,
+        args.outfile_predict,
+        infile_weights=args.infile_weights,
+        infile1=args.infile1,
+        fasta_path=args.fasta_path,
+        ref_seq=args.ref_seq,
+        vcf_path=args.vcf_path,
+    )
diff -r 508ce0649bec -r 0a3f113397b2 pca.py
--- a/pca.py	Thu Oct 01 20:02:43 2020 +0000
+++ b/pca.py	Tue Apr 13 17:29:01 2021 +0000
@@ -1,98 +1,185 @@
 import argparse
+
 import numpy as np
-from sklearn.decomposition import PCA, IncrementalPCA, KernelPCA
 from galaxy_ml.utils import read_columns
+from sklearn.decomposition import IncrementalPCA, KernelPCA, PCA
+
 
 def main():
-    parser = argparse.ArgumentParser(description='RDKit screen')
-    parser.add_argument('-i', '--infile',
-                        help="Input file")
-    parser.add_argument('--header', action='store_true', help="Include the header row or skip it")
-    parser.add_argument('-c', '--columns', type=str.lower, default='all', choices=['by_index_number', 'all_but_by_index_number',\
-                        'by_header_name', 'all_but_by_header_name', 'all_columns'],
-                        help="Choose to select all columns, or exclude/include some")
-    parser.add_argument('-ci', '--column_indices', type=str.lower,
-                        help="Choose to select all columns, or exclude/include some")
-    parser.add_argument('-n', '--number', nargs='?', type=int, default=None,\
-                        help="Number of components to keep. If not set, all components are kept")
-    parser.add_argument('--whiten', action='store_true', help="Whiten the components")
-    parser.add_argument('-t', '--pca_type', type=str.lower, default='classical', choices=['classical', 'incremental', 'kernel'],
-                        help="Choose which flavour of PCA to use")
-    parser.add_argument('-s', '--svd_solver', type=str.lower, default='auto', choices=['auto', 'full', 'arpack', 'randomized'],
-                        help="Choose the type of svd solver.")
-    parser.add_argument('-b', '--batch_size', nargs='?', type=int, default=None,\
-                        help="The number of samples to use for each batch")
-    parser.add_argument('-k', '--kernel', type=str.lower, default='linear',\
-                        choices=['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'],
-                        help="Choose the type of kernel.")
-    parser.add_argument('-g', '--gamma', nargs='?', type=float, default=None,
-                        help='Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels')
-    parser.add_argument('-tol', '--tolerance', type=float, default=0.0,
-                        help='Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack')
-    parser.add_argument('-mi', '--max_iter', nargs='?', type=int, default=None,\
-                        help="Maximum number of iterations for arpack")
-    parser.add_argument('-d', '--degree', type=int, default=3,\
-                        help="Degree for poly kernels. Ignored by other kernels")
-    parser.add_argument('-cf', '--coef0', type=float, default=1.0,
-                        help='Independent term in poly and sigmoid kernels')
-    parser.add_argument('-e', '--eigen_solver', type=str.lower, default='auto', choices=['auto', 'dense', 'arpack'],
-                        help="Choose the type of eigen solver.")
-    parser.add_argument('-o', '--outfile',
-                        help="Base name for output file (no extension).")
+    parser = argparse.ArgumentParser(description="RDKit screen")
+    parser.add_argument("-i", "--infile", help="Input file")
+    parser.add_argument(
+        "--header", action="store_true", help="Include the header row or skip it"
+    )
+    parser.add_argument(
+        "-c",
+        "--columns",
+        type=str.lower,
+        default="all",
+        choices=[
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+            "all_columns",
+        ],
+        help="Choose to select all columns, or exclude/include some",
+    )
+    parser.add_argument(
+        "-ci",
+        "--column_indices",
+        type=str.lower,
+        help="Choose to select all columns, or exclude/include some",
+    )
+    parser.add_argument(
+        "-n",
+        "--number",
+        nargs="?",
+        type=int,
+        default=None,
+        help="Number of components to keep. If not set, all components are kept",
+    )
+    parser.add_argument("--whiten", action="store_true", help="Whiten the components")
+    parser.add_argument(
+        "-t",
+        "--pca_type",
+        type=str.lower,
+        default="classical",
+        choices=["classical", "incremental", "kernel"],
+        help="Choose which flavour of PCA to use",
+    )
+    parser.add_argument(
+        "-s",
+        "--svd_solver",
+        type=str.lower,
+        default="auto",
+        choices=["auto", "full", "arpack", "randomized"],
+        help="Choose the type of svd solver.",
+    )
+    parser.add_argument(
+        "-b",
+        "--batch_size",
+        nargs="?",
+        type=int,
+        default=None,
+        help="The number of samples to use for each batch",
+    )
+    parser.add_argument(
+        "-k",
+        "--kernel",
+        type=str.lower,
+        default="linear",
+        choices=["linear", "poly", "rbf", "sigmoid", "cosine", "precomputed"],
+        help="Choose the type of kernel.",
+    )
+    parser.add_argument(
+        "-g",
+        "--gamma",
+        nargs="?",
+        type=float,
+        default=None,
+        help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels",
+    )
+    parser.add_argument(
+        "-tol",
+        "--tolerance",
+        type=float,
+        default=0.0,
+        help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack",
+    )
+    parser.add_argument(
+        "-mi",
+        "--max_iter",
+        nargs="?",
+        type=int,
+        default=None,
+        help="Maximum number of iterations for arpack",
+    )
+    parser.add_argument(
+        "-d",
+        "--degree",
+        type=int,
+        default=3,
+        help="Degree for poly kernels. Ignored by other kernels",
+    )
+    parser.add_argument(
+        "-cf",
+        "--coef0",
+        type=float,
+        default=1.0,
+        help="Independent term in poly and sigmoid kernels",
+    )
+    parser.add_argument(
+        "-e",
+        "--eigen_solver",
+        type=str.lower,
+        default="auto",
+        choices=["auto", "dense", "arpack"],
+        help="Choose the type of eigen solver.",
+    )
+    parser.add_argument(
+        "-o", "--outfile", help="Base name for output file (no extension)."
+    )
     args = parser.parse_args()
 
     usecols = None
-    cols = []
     pca_params = {}
 
-    if args.columns == 'by_index_number' or args.columns == 'all_but_by_index_number':
-        usecols = [int(i) for i in args.column_indices.split(',')]
-    elif args.columns == 'by_header_name' or args.columns == 'all_but_by_header_name':
+    if args.columns == "by_index_number" or args.columns == "all_but_by_index_number":
+        usecols = [int(i) for i in args.column_indices.split(",")]
+    elif args.columns == "by_header_name" or args.columns == "all_but_by_header_name":
         usecols = args.column_indices
 
-    header = 'infer' if args.header else None
+    header = "infer" if args.header else None
 
     pca_input = read_columns(
         f=args.infile,
         c=usecols,
         c_option=args.columns,
-        sep='\t',
+        sep="\t",
         header=header,
         parse_dates=True,
         encoding=None,
-        index_col=None)
+        index_col=None,
+    )
 
-    pca_params.update({'n_components': args.number})
+    pca_params.update({"n_components": args.number})
 
-    if args.pca_type == 'classical':
-        pca_params.update({'svd_solver': args.svd_solver, 'whiten': args.whiten})
-        if args.svd_solver == 'arpack':
-            pca_params.update({'tol': args.tolerance})
+    if args.pca_type == "classical":
+        pca_params.update({"svd_solver": args.svd_solver, "whiten": args.whiten})
+        if args.svd_solver == "arpack":
+            pca_params.update({"tol": args.tolerance})
         pca = PCA()
 
-    elif args.pca_type == 'incremental':
-        pca_params.update({'batch_size': args.batch_size, 'whiten': args.whiten})
+    elif args.pca_type == "incremental":
+        pca_params.update({"batch_size": args.batch_size, "whiten": args.whiten})
         pca = IncrementalPCA()
 
-    elif args.pca_type == 'kernel':
-        pca_params.update({'kernel': args.kernel, 'eigen_solver': args.eigen_solver, 'gamma': args.gamma})
+    elif args.pca_type == "kernel":
+        pca_params.update(
+            {
+                "kernel": args.kernel,
+                "eigen_solver": args.eigen_solver,
+                "gamma": args.gamma,
+            }
+        )
 
-        if args.kernel == 'poly':
-            pca_params.update({'degree': args.degree, 'coef0': args.coef0})
-        elif args.kernel == 'sigmoid':
-            pca_params.update({'coef0': args.coef0})
-        elif args.kernel == 'precomputed':
+        if args.kernel == "poly":
+            pca_params.update({"degree": args.degree, "coef0": args.coef0})
+        elif args.kernel == "sigmoid":
+            pca_params.update({"coef0": args.coef0})
+        elif args.kernel == "precomputed":
             pca_input = np.dot(pca_input, pca_input.T)
 
-        if args.eigen_solver == 'arpack':
-            pca_params.update({'tol': args.tolerance, 'max_iter': args.max_iter})
+        if args.eigen_solver == "arpack":
+            pca_params.update({"tol": args.tolerance, "max_iter": args.max_iter})
 
         pca = KernelPCA()
 
     print(pca_params)
     pca.set_params(**pca_params)
     pca_output = pca.fit_transform(pca_input)
-    np.savetxt(fname=args.outfile, X=pca_output, fmt='%.4f', delimiter='\t')
+    np.savetxt(fname=args.outfile, X=pca_output, fmt="%.4f", delimiter="\t")
 
 
 if __name__ == "__main__":
diff -r 508ce0649bec -r 0a3f113397b2 search_model_validation.py
--- a/search_model_validation.py	Thu Oct 01 20:02:43 2020 +0000
+++ b/search_model_validation.py	Tue Apr 13 17:29:01 2021 +0000
@@ -11,45 +11,57 @@
 import sys
 import warnings
 from scipy.io import mmread
-from sklearn import (cluster, decomposition, feature_selection,
-                     kernel_approximation, model_selection, preprocessing)
+from sklearn import (
+    cluster,
+    decomposition,
+    feature_selection,
+    kernel_approximation,
+    model_selection,
+    preprocessing,
+)
 from sklearn.exceptions import FitFailedWarning
 from sklearn.model_selection._validation import _score, cross_validate
 from sklearn.model_selection import _search, _validation
 from sklearn.pipeline import Pipeline
 
-from galaxy_ml.utils import (SafeEval, get_cv, get_scoring, load_model,
-                             read_columns, try_get_attr, get_module,
-                             clean_params, get_main_estimator)
+from galaxy_ml.utils import (
+    SafeEval,
+    get_cv,
+    get_scoring,
+    load_model,
+    read_columns,
+    try_get_attr,
+    get_module,
+    clean_params,
+    get_main_estimator,
+)
 
 
-_fit_and_score = try_get_attr('galaxy_ml.model_validations', '_fit_and_score')
-setattr(_search, '_fit_and_score', _fit_and_score)
-setattr(_validation, '_fit_and_score', _fit_and_score)
+_fit_and_score = try_get_attr("galaxy_ml.model_validations", "_fit_and_score")
+setattr(_search, "_fit_and_score", _fit_and_score)
+setattr(_validation, "_fit_and_score", _fit_and_score)
 
-N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1))
+N_JOBS = int(os.environ.get("GALAXY_SLOTS", 1))
 # handle  disk cache
-CACHE_DIR = os.path.join(os.getcwd(), 'cached')
+CACHE_DIR = os.path.join(os.getcwd(), "cached")
 del os
-NON_SEARCHABLE = ('n_jobs', 'pre_dispatch', 'memory', '_path',
-                  'nthread', 'callbacks')
+NON_SEARCHABLE = ("n_jobs", "pre_dispatch", "memory", "_path", "nthread", "callbacks")
 
 
 def _eval_search_params(params_builder):
     search_params = {}
 
-    for p in params_builder['param_set']:
-        search_list = p['sp_list'].strip()
-        if search_list == '':
+    for p in params_builder["param_set"]:
+        search_list = p["sp_list"].strip()
+        if search_list == "":
             continue
 
-        param_name = p['sp_name']
+        param_name = p["sp_name"]
         if param_name.lower().endswith(NON_SEARCHABLE):
-            print("Warning: `%s` is not eligible for search and was "
-                  "omitted!" % param_name)
+            print("Warning: `%s` is not eligible for search and was " "omitted!" % param_name)
             continue
 
-        if not search_list.startswith(':'):
+        if not search_list.startswith(":"):
             safe_eval = SafeEval(load_scipy=True, load_numpy=True)
             ev = safe_eval(search_list)
             search_params[param_name] = ev
@@ -60,26 +72,27 @@
             # TODO maybe add regular express check
             ev = safe_eval_es(search_list)
             preprocessings = (
-                preprocessing.StandardScaler(), preprocessing.Binarizer(),
+                preprocessing.StandardScaler(),
+                preprocessing.Binarizer(),
                 preprocessing.MaxAbsScaler(),
-                preprocessing.Normalizer(), preprocessing.MinMaxScaler(),
+                preprocessing.Normalizer(),
+                preprocessing.MinMaxScaler(),
                 preprocessing.PolynomialFeatures(),
-                preprocessing.RobustScaler(), feature_selection.SelectKBest(),
+                preprocessing.RobustScaler(),
+                feature_selection.SelectKBest(),
                 feature_selection.GenericUnivariateSelect(),
                 feature_selection.SelectPercentile(),
-                feature_selection.SelectFpr(), feature_selection.SelectFdr(),
+                feature_selection.SelectFpr(),
+                feature_selection.SelectFdr(),
                 feature_selection.SelectFwe(),
                 feature_selection.VarianceThreshold(),
                 decomposition.FactorAnalysis(random_state=0),
                 decomposition.FastICA(random_state=0),
                 decomposition.IncrementalPCA(),
                 decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS),
-                decomposition.LatentDirichletAllocation(
-                    random_state=0, n_jobs=N_JOBS),
-                decomposition.MiniBatchDictionaryLearning(
-                    random_state=0, n_jobs=N_JOBS),
-                decomposition.MiniBatchSparsePCA(
-                    random_state=0, n_jobs=N_JOBS),
+                decomposition.LatentDirichletAllocation(random_state=0, n_jobs=N_JOBS),
+                decomposition.MiniBatchDictionaryLearning(random_state=0, n_jobs=N_JOBS),
+                decomposition.MiniBatchSparsePCA(random_state=0, n_jobs=N_JOBS),
                 decomposition.NMF(random_state=0),
                 decomposition.PCA(random_state=0),
                 decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS),
@@ -94,59 +107,48 @@
                 skrebate.SURFstar(n_jobs=N_JOBS),
                 skrebate.MultiSURF(n_jobs=N_JOBS),
                 skrebate.MultiSURFstar(n_jobs=N_JOBS),
-                imblearn.under_sampling.ClusterCentroids(
-                    random_state=0, n_jobs=N_JOBS),
-                imblearn.under_sampling.CondensedNearestNeighbour(
-                    random_state=0, n_jobs=N_JOBS),
-                imblearn.under_sampling.EditedNearestNeighbours(
-                    random_state=0, n_jobs=N_JOBS),
-                imblearn.under_sampling.RepeatedEditedNearestNeighbours(
-                    random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.CondensedNearestNeighbour(random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.EditedNearestNeighbours(random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.RepeatedEditedNearestNeighbours(random_state=0, n_jobs=N_JOBS),
                 imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS),
-                imblearn.under_sampling.InstanceHardnessThreshold(
-                    random_state=0, n_jobs=N_JOBS),
-                imblearn.under_sampling.NearMiss(
-                    random_state=0, n_jobs=N_JOBS),
-                imblearn.under_sampling.NeighbourhoodCleaningRule(
-                    random_state=0, n_jobs=N_JOBS),
-                imblearn.under_sampling.OneSidedSelection(
-                    random_state=0, n_jobs=N_JOBS),
-                imblearn.under_sampling.RandomUnderSampler(
-                    random_state=0),
-                imblearn.under_sampling.TomekLinks(
-                    random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.InstanceHardnessThreshold(random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.NearMiss(random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.NeighbourhoodCleaningRule(random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.OneSidedSelection(random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.RandomUnderSampler(random_state=0),
+                imblearn.under_sampling.TomekLinks(random_state=0, n_jobs=N_JOBS),
                 imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS),
                 imblearn.over_sampling.RandomOverSampler(random_state=0),
                 imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS),
                 imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS),
-                imblearn.over_sampling.BorderlineSMOTE(
-                    random_state=0, n_jobs=N_JOBS),
-                imblearn.over_sampling.SMOTENC(
-                    categorical_features=[], random_state=0, n_jobs=N_JOBS),
+                imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS),
+                imblearn.over_sampling.SMOTENC(categorical_features=[], random_state=0, n_jobs=N_JOBS),
                 imblearn.combine.SMOTEENN(random_state=0),
-                imblearn.combine.SMOTETomek(random_state=0))
+                imblearn.combine.SMOTETomek(random_state=0),
+            )
             newlist = []
             for obj in ev:
                 if obj is None:
                     newlist.append(None)
-                elif obj == 'all_0':
+                elif obj == "all_0":
                     newlist.extend(preprocessings[0:35])
-                elif obj == 'sk_prep_all':      # no KernalCenter()
+                elif obj == "sk_prep_all":  # no KernalCenter()
                     newlist.extend(preprocessings[0:7])
-                elif obj == 'fs_all':
+                elif obj == "fs_all":
                     newlist.extend(preprocessings[7:14])
-                elif obj == 'decomp_all':
+                elif obj == "decomp_all":
                     newlist.extend(preprocessings[14:25])
-                elif obj == 'k_appr_all':
+                elif obj == "k_appr_all":
                     newlist.extend(preprocessings[25:29])
-                elif obj == 'reb_all':
+                elif obj == "reb_all":
                     newlist.extend(preprocessings[30:35])
-                elif obj == 'imb_all':
+                elif obj == "imb_all":
                     newlist.extend(preprocessings[35:54])
                 elif type(obj) is int and -1 < obj < len(preprocessings):
                     newlist.append(preprocessings[obj])
-                elif hasattr(obj, 'get_params'):       # user uploaded object
-                    if 'n_jobs' in obj.get_params():
+                elif hasattr(obj, "get_params"):  # user uploaded object
+                    if "n_jobs" in obj.get_params():
                         newlist.append(obj.set_params(n_jobs=N_JOBS))
                     else:
                         newlist.append(obj)
@@ -158,9 +160,17 @@
     return search_params
 
 
-def _handle_X_y(estimator, params, infile1, infile2, loaded_df={},
-                ref_seq=None, intervals=None, targets=None,
-                fasta_path=None):
+def _handle_X_y(
+    estimator,
+    params,
+    infile1,
+    infile2,
+    loaded_df={},
+    ref_seq=None,
+    intervals=None,
+    targets=None,
+    fasta_path=None,
+):
     """read inputs
 
     Params
@@ -192,15 +202,18 @@
     """
     estimator_params = estimator.get_params()
 
-    input_type = params['input_options']['selected_input']
+    input_type = params["input_options"]["selected_input"]
     # tabular input
-    if input_type == 'tabular':
-        header = 'infer' if params['input_options']['header1'] else None
-        column_option = (params['input_options']['column_selector_options_1']
-                         ['selected_column_selector_option'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = params['input_options']['column_selector_options_1']['col1']
+    if input_type == "tabular":
+        header = "infer" if params["input_options"]["header1"] else None
+        column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = params["input_options"]["column_selector_options_1"]["col1"]
         else:
             c = None
 
@@ -209,25 +222,23 @@
         if df_key in loaded_df:
             infile1 = loaded_df[df_key]
 
-        df = pd.read_csv(infile1, sep='\t', header=header,
-                         parse_dates=True)
+        df = pd.read_csv(infile1, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = df
 
         X = read_columns(df, c=c, c_option=column_option).astype(float)
     # sparse input
-    elif input_type == 'sparse':
-        X = mmread(open(infile1, 'r'))
+    elif input_type == "sparse":
+        X = mmread(open(infile1, "r"))
 
     # fasta_file input
-    elif input_type == 'seq_fasta':
-        pyfaidx = get_module('pyfaidx')
+    elif input_type == "seq_fasta":
+        pyfaidx = get_module("pyfaidx")
         sequences = pyfaidx.Fasta(fasta_path)
         n_seqs = len(sequences.keys())
         X = np.arange(n_seqs)[:, np.newaxis]
         for param in estimator_params.keys():
-            if param.endswith('fasta_path'):
-                estimator.set_params(
-                    **{param: fasta_path})
+            if param.endswith("fasta_path"):
+                estimator.set_params(**{param: fasta_path})
                 break
         else:
             raise ValueError(
@@ -236,25 +247,29 @@
                 "KerasGBatchClassifier with "
                 "FastaDNABatchGenerator/FastaProteinBatchGenerator "
                 "or having GenomeOneHotEncoder/ProteinOneHotEncoder "
-                "in pipeline!")
+                "in pipeline!"
+            )
 
-    elif input_type == 'refseq_and_interval':
+    elif input_type == "refseq_and_interval":
         path_params = {
-            'data_batch_generator__ref_genome_path': ref_seq,
-            'data_batch_generator__intervals_path': intervals,
-            'data_batch_generator__target_path': targets
+            "data_batch_generator__ref_genome_path": ref_seq,
+            "data_batch_generator__intervals_path": intervals,
+            "data_batch_generator__target_path": targets,
         }
         estimator.set_params(**path_params)
         n_intervals = sum(1 for line in open(intervals))
         X = np.arange(n_intervals)[:, np.newaxis]
 
     # Get target y
-    header = 'infer' if params['input_options']['header2'] else None
-    column_option = (params['input_options']['column_selector_options_2']
-                     ['selected_column_selector_option2'])
-    if column_option in ['by_index_number', 'all_but_by_index_number',
-                         'by_header_name', 'all_but_by_header_name']:
-        c = params['input_options']['column_selector_options_2']['col2']
+    header = "infer" if params["input_options"]["header2"] else None
+    column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
+    if column_option in [
+        "by_index_number",
+        "all_but_by_index_number",
+        "by_header_name",
+        "all_but_by_header_name",
+    ]:
+        c = params["input_options"]["column_selector_options_2"]["col2"]
     else:
         c = None
 
@@ -262,30 +277,21 @@
     if df_key in loaded_df:
         infile2 = loaded_df[df_key]
     else:
-        infile2 = pd.read_csv(infile2, sep='\t',
-                              header=header, parse_dates=True)
+        infile2 = pd.read_csv(infile2, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = infile2
 
-    y = read_columns(
-            infile2,
-            c=c,
-            c_option=column_option,
-            sep='\t',
-            header=header,
-            parse_dates=True)
+    y = read_columns(infile2, c=c, c_option=column_option, sep="\t", header=header, parse_dates=True)
     if len(y.shape) == 2 and y.shape[1] == 1:
         y = y.ravel()
-    if input_type == 'refseq_and_interval':
-        estimator.set_params(
-            data_batch_generator__features=y.ravel().tolist())
+    if input_type == "refseq_and_interval":
+        estimator.set_params(data_batch_generator__features=y.ravel().tolist())
         y = None
     # end y
 
     return estimator, X, y
 
 
-def _do_outer_cv(searcher, X, y, outer_cv, scoring, error_score='raise',
-                 outfile=None):
+def _do_outer_cv(searcher, X, y, outer_cv, scoring, error_score="raise", outfile=None):
     """Do outer cross-validation for nested CV
 
     Parameters
@@ -305,21 +311,31 @@
     outfile : str
         File path to store the restuls
     """
-    if error_score == 'raise':
+    if error_score == "raise":
         rval = cross_validate(
-            searcher, X, y, scoring=scoring,
-            cv=outer_cv, n_jobs=N_JOBS, verbose=0,
-            error_score=error_score)
+            searcher,
+            X,
+            y,
+            scoring=scoring,
+            cv=outer_cv,
+            n_jobs=N_JOBS,
+            verbose=0,
+            error_score=error_score,
+        )
     else:
-        warnings.simplefilter('always', FitFailedWarning)
+        warnings.simplefilter("always", FitFailedWarning)
         with warnings.catch_warnings(record=True) as w:
             try:
                 rval = cross_validate(
-                    searcher, X, y,
+                    searcher,
+                    X,
+                    y,
                     scoring=scoring,
-                    cv=outer_cv, n_jobs=N_JOBS,
+                    cv=outer_cv,
+                    n_jobs=N_JOBS,
                     verbose=0,
-                    error_score=error_score)
+                    error_score=error_score,
+                )
             except ValueError:
                 pass
             for warning in w:
@@ -327,55 +343,57 @@
 
     keys = list(rval.keys())
     for k in keys:
-        if k.startswith('test'):
-            rval['mean_' + k] = np.mean(rval[k])
-            rval['std_' + k] = np.std(rval[k])
-        if k.endswith('time'):
+        if k.startswith("test"):
+            rval["mean_" + k] = np.mean(rval[k])
+            rval["std_" + k] = np.std(rval[k])
+        if k.endswith("time"):
             rval.pop(k)
     rval = pd.DataFrame(rval)
     rval = rval[sorted(rval.columns)]
-    rval.to_csv(path_or_buf=outfile, sep='\t', header=True, index=False)
+    rval.to_csv(path_or_buf=outfile, sep="\t", header=True, index=False)
 
 
-def _do_train_test_split_val(searcher, X, y, params, error_score='raise',
-                             primary_scoring=None, groups=None,
-                             outfile=None):
-    """ do train test split, searchCV validates on the train and then use
+def _do_train_test_split_val(
+    searcher,
+    X,
+    y,
+    params,
+    error_score="raise",
+    primary_scoring=None,
+    groups=None,
+    outfile=None,
+):
+    """do train test split, searchCV validates on the train and then use
     the best_estimator_ to evaluate on the test
 
     Returns
     --------
     Fitted SearchCV object
     """
-    train_test_split = try_get_attr(
-        'galaxy_ml.model_validations', 'train_test_split')
-    split_options = params['outer_split']
+    train_test_split = try_get_attr("galaxy_ml.model_validations", "train_test_split")
+    split_options = params["outer_split"]
 
     # splits
-    if split_options['shuffle'] == 'stratified':
-        split_options['labels'] = y
+    if split_options["shuffle"] == "stratified":
+        split_options["labels"] = y
         X, X_test, y, y_test = train_test_split(X, y, **split_options)
-    elif split_options['shuffle'] == 'group':
+    elif split_options["shuffle"] == "group":
         if groups is None:
-            raise ValueError("No group based CV option was choosen for "
-                             "group shuffle!")
-        split_options['labels'] = groups
+            raise ValueError("No group based CV option was choosen for " "group shuffle!")
+        split_options["labels"] = groups
         if y is None:
-            X, X_test, groups, _ =\
-                train_test_split(X, groups, **split_options)
+            X, X_test, groups, _ = train_test_split(X, groups, **split_options)
         else:
-            X, X_test, y, y_test, groups, _ =\
-                train_test_split(X, y, groups, **split_options)
+            X, X_test, y, y_test, groups, _ = train_test_split(X, y, groups, **split_options)
     else:
-        if split_options['shuffle'] == 'None':
-            split_options['shuffle'] = None
-        X, X_test, y, y_test =\
-            train_test_split(X, y, **split_options)
+        if split_options["shuffle"] == "None":
+            split_options["shuffle"] = None
+        X, X_test, y, y_test = train_test_split(X, y, **split_options)
 
-    if error_score == 'raise':
+    if error_score == "raise":
         searcher.fit(X, y, groups=groups)
     else:
-        warnings.simplefilter('always', FitFailedWarning)
+        warnings.simplefilter("always", FitFailedWarning)
         with warnings.catch_warnings(record=True) as w:
             try:
                 searcher.fit(X, y, groups=groups)
@@ -390,33 +408,38 @@
     else:
         is_multimetric = False
 
-    best_estimator_ = getattr(searcher, 'best_estimator_')
+    best_estimator_ = getattr(searcher, "best_estimator_")
 
     # TODO Solve deep learning models in pipeline
-    if best_estimator_.__class__.__name__ == 'KerasGBatchClassifier':
-        test_score = best_estimator_.evaluate(
-            X_test, scorer=scorer_, is_multimetric=is_multimetric)
+    if best_estimator_.__class__.__name__ == "KerasGBatchClassifier":
+        test_score = best_estimator_.evaluate(X_test, scorer=scorer_, is_multimetric=is_multimetric)
     else:
-        test_score = _score(best_estimator_, X_test,
-                            y_test, scorer_,
-                            is_multimetric=is_multimetric)
+        test_score = _score(best_estimator_, X_test, y_test, scorer_, is_multimetric=is_multimetric)
 
     if not is_multimetric:
         test_score = {primary_scoring: test_score}
     for key, value in test_score.items():
         test_score[key] = [value]
     result_df = pd.DataFrame(test_score)
-    result_df.to_csv(path_or_buf=outfile, sep='\t', header=True,
-                     index=False)
+    result_df.to_csv(path_or_buf=outfile, sep="\t", header=True, index=False)
 
     return searcher
 
 
-def main(inputs, infile_estimator, infile1, infile2,
-         outfile_result, outfile_object=None,
-         outfile_weights=None, groups=None,
-         ref_seq=None, intervals=None, targets=None,
-         fasta_path=None):
+def main(
+    inputs,
+    infile_estimator,
+    infile1,
+    infile2,
+    outfile_result,
+    outfile_object=None,
+    outfile_weights=None,
+    groups=None,
+    ref_seq=None,
+    intervals=None,
+    targets=None,
+    fasta_path=None,
+):
     """
     Parameter
     ---------
@@ -456,154 +479,174 @@
     fasta_path : str
         File path to dataset containing fasta file
     """
-    warnings.simplefilter('ignore')
+    warnings.simplefilter("ignore")
 
     # store read dataframe object
     loaded_df = {}
 
-    with open(inputs, 'r') as param_handler:
+    with open(inputs, "r") as param_handler:
         params = json.load(param_handler)
 
     # Override the refit parameter
-    params['search_schemes']['options']['refit'] = True \
-        if params['save'] != 'nope' else False
+    params["search_schemes"]["options"]["refit"] = True if params["save"] != "nope" else False
 
-    with open(infile_estimator, 'rb') as estimator_handler:
+    with open(infile_estimator, "rb") as estimator_handler:
         estimator = load_model(estimator_handler)
 
-    optimizer = params['search_schemes']['selected_search_scheme']
+    optimizer = params["search_schemes"]["selected_search_scheme"]
     optimizer = getattr(model_selection, optimizer)
 
     # handle gridsearchcv options
-    options = params['search_schemes']['options']
+    options = params["search_schemes"]["options"]
 
     if groups:
-        header = 'infer' if (options['cv_selector']['groups_selector']
-                                    ['header_g']) else None
-        column_option = (options['cv_selector']['groups_selector']
-                                ['column_selector_options_g']
-                                ['selected_column_selector_option_g'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = (options['cv_selector']['groups_selector']
-                        ['column_selector_options_g']['col_g'])
+        header = "infer" if (options["cv_selector"]["groups_selector"]["header_g"]) else None
+        column_option = options["cv_selector"]["groups_selector"]["column_selector_options_g"][
+            "selected_column_selector_option_g"
+        ]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = options["cv_selector"]["groups_selector"]["column_selector_options_g"]["col_g"]
         else:
             c = None
 
         df_key = groups + repr(header)
 
-        groups = pd.read_csv(groups, sep='\t', header=header,
-                             parse_dates=True)
+        groups = pd.read_csv(groups, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = groups
 
         groups = read_columns(
-                groups,
-                c=c,
-                c_option=column_option,
-                sep='\t',
-                header=header,
-                parse_dates=True)
+            groups,
+            c=c,
+            c_option=column_option,
+            sep="\t",
+            header=header,
+            parse_dates=True,
+        )
         groups = groups.ravel()
-        options['cv_selector']['groups_selector'] = groups
+        options["cv_selector"]["groups_selector"] = groups
 
-    splitter, groups = get_cv(options.pop('cv_selector'))
-    options['cv'] = splitter
-    primary_scoring = options['scoring']['primary_scoring']
-    options['scoring'] = get_scoring(options['scoring'])
-    if options['error_score']:
-        options['error_score'] = 'raise'
+    splitter, groups = get_cv(options.pop("cv_selector"))
+    options["cv"] = splitter
+    primary_scoring = options["scoring"]["primary_scoring"]
+    # get_scoring() expects secondary_scoring to be a comma separated string (not a list)
+    # Check if secondary_scoring is specified
+    secondary_scoring = options["scoring"].get("secondary_scoring", None)
+    if secondary_scoring is not None:
+        # If secondary_scoring is specified, convert the list into comman separated string
+        options["scoring"]["secondary_scoring"] = ",".join(options["scoring"]["secondary_scoring"])
+    options["scoring"] = get_scoring(options["scoring"])
+    if options["error_score"]:
+        options["error_score"] = "raise"
     else:
-        options['error_score'] = np.NaN
-    if options['refit'] and isinstance(options['scoring'], dict):
-        options['refit'] = primary_scoring
-    if 'pre_dispatch' in options and options['pre_dispatch'] == '':
-        options['pre_dispatch'] = None
+        options["error_score"] = np.NaN
+    if options["refit"] and isinstance(options["scoring"], dict):
+        options["refit"] = primary_scoring
+    if "pre_dispatch" in options and options["pre_dispatch"] == "":
+        options["pre_dispatch"] = None
 
-    params_builder = params['search_schemes']['search_params_builder']
+    params_builder = params["search_schemes"]["search_params_builder"]
     param_grid = _eval_search_params(params_builder)
 
     estimator = clean_params(estimator)
 
     # save the SearchCV object without fit
-    if params['save'] == 'save_no_fit':
+    if params["save"] == "save_no_fit":
         searcher = optimizer(estimator, param_grid, **options)
         print(searcher)
-        with open(outfile_object, 'wb') as output_handler:
-            pickle.dump(searcher, output_handler,
-                        pickle.HIGHEST_PROTOCOL)
+        with open(outfile_object, "wb") as output_handler:
+            pickle.dump(searcher, output_handler, pickle.HIGHEST_PROTOCOL)
         return 0
 
     # read inputs and loads new attributes, like paths
-    estimator, X, y = _handle_X_y(estimator, params, infile1, infile2,
-                                  loaded_df=loaded_df, ref_seq=ref_seq,
-                                  intervals=intervals, targets=targets,
-                                  fasta_path=fasta_path)
+    estimator, X, y = _handle_X_y(
+        estimator,
+        params,
+        infile1,
+        infile2,
+        loaded_df=loaded_df,
+        ref_seq=ref_seq,
+        intervals=intervals,
+        targets=targets,
+        fasta_path=fasta_path,
+    )
 
     # cache iraps_core fits could increase search speed significantly
     memory = joblib.Memory(location=CACHE_DIR, verbose=0)
     main_est = get_main_estimator(estimator)
-    if main_est.__class__.__name__ == 'IRAPSClassifier':
+    if main_est.__class__.__name__ == "IRAPSClassifier":
         main_est.set_params(memory=memory)
 
     searcher = optimizer(estimator, param_grid, **options)
 
-    split_mode = params['outer_split'].pop('split_mode')
+    split_mode = params["outer_split"].pop("split_mode")
 
-    if split_mode == 'nested_cv':
+    if split_mode == "nested_cv":
         # make sure refit is choosen
         # this could be True for sklearn models, but not the case for
         # deep learning models
-        if not options['refit'] and \
-                not all(hasattr(estimator, attr)
-                        for attr in ('config', 'model_type')):
+        if not options["refit"] and not all(hasattr(estimator, attr) for attr in ("config", "model_type")):
             warnings.warn("Refit is change to `True` for nested validation!")
-            setattr(searcher, 'refit', True)
+            setattr(searcher, "refit", True)
 
-        outer_cv, _ = get_cv(params['outer_split']['cv_selector'])
+        outer_cv, _ = get_cv(params["outer_split"]["cv_selector"])
         # nested CV, outer cv using cross_validate
-        if options['error_score'] == 'raise':
+        if options["error_score"] == "raise":
             rval = cross_validate(
-                searcher, X, y, scoring=options['scoring'],
-                cv=outer_cv, n_jobs=N_JOBS,
-                verbose=options['verbose'],
-                return_estimator=(params['save'] == 'save_estimator'),
-                error_score=options['error_score'],
-                return_train_score=True)
+                searcher,
+                X,
+                y,
+                scoring=options["scoring"],
+                cv=outer_cv,
+                n_jobs=N_JOBS,
+                verbose=options["verbose"],
+                return_estimator=(params["save"] == "save_estimator"),
+                error_score=options["error_score"],
+                return_train_score=True,
+            )
         else:
-            warnings.simplefilter('always', FitFailedWarning)
+            warnings.simplefilter("always", FitFailedWarning)
             with warnings.catch_warnings(record=True) as w:
                 try:
                     rval = cross_validate(
-                        searcher, X, y,
-                        scoring=options['scoring'],
-                        cv=outer_cv, n_jobs=N_JOBS,
-                        verbose=options['verbose'],
-                        return_estimator=(params['save'] == 'save_estimator'),
-                        error_score=options['error_score'],
-                        return_train_score=True)
+                        searcher,
+                        X,
+                        y,
+                        scoring=options["scoring"],
+                        cv=outer_cv,
+                        n_jobs=N_JOBS,
+                        verbose=options["verbose"],
+                        return_estimator=(params["save"] == "save_estimator"),
+                        error_score=options["error_score"],
+                        return_train_score=True,
+                    )
                 except ValueError:
                     pass
                 for warning in w:
                     print(repr(warning.message))
 
-        fitted_searchers = rval.pop('estimator', [])
+        fitted_searchers = rval.pop("estimator", [])
         if fitted_searchers:
             import os
+
             pwd = os.getcwd()
-            save_dir = os.path.join(pwd, 'cv_results_in_folds')
+            save_dir = os.path.join(pwd, "cv_results_in_folds")
             try:
                 os.mkdir(save_dir)
                 for idx, obj in enumerate(fitted_searchers):
-                    target_name = 'cv_results_' + '_' + 'split%d' % idx
+                    target_name = "cv_results_" + "_" + "split%d" % idx
                     target_path = os.path.join(pwd, save_dir, target_name)
-                    cv_results_ = getattr(obj, 'cv_results_', None)
+                    cv_results_ = getattr(obj, "cv_results_", None)
                     if not cv_results_:
                         print("%s is not available" % target_name)
                         continue
                     cv_results_ = pd.DataFrame(cv_results_)
                     cv_results_ = cv_results_[sorted(cv_results_.columns)]
-                    cv_results_.to_csv(target_path, sep='\t', header=True,
-                                       index=False)
+                    cv_results_.to_csv(target_path, sep="\t", header=True, index=False)
             except Exception as e:
                 print(e)
             finally:
@@ -611,18 +654,14 @@
 
         keys = list(rval.keys())
         for k in keys:
-            if k.startswith('test'):
-                rval['mean_' + k] = np.mean(rval[k])
-                rval['std_' + k] = np.std(rval[k])
-            if k.endswith('time'):
+            if k.startswith("test"):
+                rval["mean_" + k] = np.mean(rval[k])
+                rval["std_" + k] = np.std(rval[k])
+            if k.endswith("time"):
                 rval.pop(k)
         rval = pd.DataFrame(rval)
         rval = rval[sorted(rval.columns)]
-        rval.to_csv(path_or_buf=outfile_result, sep='\t', header=True,
-                    index=False)
-
-        return 0
-
+        rval.to_csv(path_or_buf=outfile_result, sep="\t", header=True, index=False)
         # deprecate train test split mode
         """searcher = _do_train_test_split_val(
             searcher, X, y, params,
@@ -630,14 +669,15 @@
             error_score=options['error_score'],
             groups=groups,
             outfile=outfile_result)"""
+        return 0
 
     # no outer split
     else:
         searcher.set_params(n_jobs=N_JOBS)
-        if options['error_score'] == 'raise':
+        if options["error_score"] == "raise":
             searcher.fit(X, y, groups=groups)
         else:
-            warnings.simplefilter('always', FitFailedWarning)
+            warnings.simplefilter("always", FitFailedWarning)
             with warnings.catch_warnings(record=True) as w:
                 try:
                     searcher.fit(X, y, groups=groups)
@@ -648,18 +688,19 @@
 
         cv_results = pd.DataFrame(searcher.cv_results_)
         cv_results = cv_results[sorted(cv_results.columns)]
-        cv_results.to_csv(path_or_buf=outfile_result, sep='\t',
-                          header=True, index=False)
+        cv_results.to_csv(path_or_buf=outfile_result, sep="\t", header=True, index=False)
 
     memory.clear(warn=False)
 
     # output best estimator, and weights if applicable
     if outfile_object:
-        best_estimator_ = getattr(searcher, 'best_estimator_', None)
+        best_estimator_ = getattr(searcher, "best_estimator_", None)
         if not best_estimator_:
-            warnings.warn("GridSearchCV object has no attribute "
-                          "'best_estimator_', because either it's "
-                          "nested gridsearch or `refit` is False!")
+            warnings.warn(
+                "GridSearchCV object has no attribute "
+                "'best_estimator_', because either it's "
+                "nested gridsearch or `refit` is False!"
+            )
             return
 
         # clean prams
@@ -667,24 +708,22 @@
 
         main_est = get_main_estimator(best_estimator_)
 
-        if hasattr(main_est, 'model_') \
-                and hasattr(main_est, 'save_weights'):
+        if hasattr(main_est, "model_") and hasattr(main_est, "save_weights"):
             if outfile_weights:
                 main_est.save_weights(outfile_weights)
             del main_est.model_
             del main_est.fit_params
             del main_est.model_class_
             del main_est.validation_data
-            if getattr(main_est, 'data_generator_', None):
+            if getattr(main_est, "data_generator_", None):
                 del main_est.data_generator_
 
-        with open(outfile_object, 'wb') as output_handler:
+        with open(outfile_object, "wb") as output_handler:
             print("Best estimator is saved: %s " % repr(best_estimator_))
-            pickle.dump(best_estimator_, output_handler,
-                        pickle.HIGHEST_PROTOCOL)
+            pickle.dump(best_estimator_, output_handler, pickle.HIGHEST_PROTOCOL)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-e", "--estimator", dest="infile_estimator")
@@ -700,8 +739,17 @@
     aparser.add_argument("-f", "--fasta_path", dest="fasta_path")
     args = aparser.parse_args()
 
-    main(args.inputs, args.infile_estimator, args.infile1, args.infile2,
-         args.outfile_result, outfile_object=args.outfile_object,
-         outfile_weights=args.outfile_weights, groups=args.groups,
-         ref_seq=args.ref_seq, intervals=args.intervals,
-         targets=args.targets, fasta_path=args.fasta_path)
+    main(
+        args.inputs,
+        args.infile_estimator,
+        args.infile1,
+        args.infile2,
+        args.outfile_result,
+        outfile_object=args.outfile_object,
+        outfile_weights=args.outfile_weights,
+        groups=args.groups,
+        ref_seq=args.ref_seq,
+        intervals=args.intervals,
+        targets=args.targets,
+        fasta_path=args.fasta_path,
+    )
diff -r 508ce0649bec -r 0a3f113397b2 simple_model_fit.py
--- a/simple_model_fit.py	Thu Oct 01 20:02:43 2020 +0000
+++ b/simple_model_fit.py	Tue Apr 13 17:29:01 2021 +0000
@@ -4,10 +4,11 @@
 import pickle
 
 from galaxy_ml.utils import load_model, read_columns
+from scipy.io import mmread
 from sklearn.pipeline import Pipeline
 
 
-N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
+N_JOBS = int(__import__("os").environ.get("GALAXY_SLOTS", 1))
 
 
 # TODO import from galaxy_ml.utils in future versions
@@ -20,33 +21,35 @@
     ------
     Cleaned estimator object
     """
-    ALLOWED_CALLBACKS = ('EarlyStopping', 'TerminateOnNaN',
-                         'ReduceLROnPlateau', 'CSVLogger', 'None')
+    ALLOWED_CALLBACKS = (
+        "EarlyStopping",
+        "TerminateOnNaN",
+        "ReduceLROnPlateau",
+        "CSVLogger",
+        "None",
+    )
 
     estimator_params = estimator.get_params()
 
     for name, p in estimator_params.items():
         # all potential unauthorized file write
-        if name == 'memory' or name.endswith('__memory') \
-                or name.endswith('_path'):
+        if name == "memory" or name.endswith("__memory") or name.endswith("_path"):
             new_p = {name: None}
             estimator.set_params(**new_p)
-        elif n_jobs is not None and (name == 'n_jobs' or
-                                     name.endswith('__n_jobs')):
+        elif n_jobs is not None and (name == 'n_jobs' or name.endswith('__n_jobs')):
             new_p = {name: n_jobs}
             estimator.set_params(**new_p)
-        elif name.endswith('callbacks'):
+        elif name.endswith("callbacks"):
             for cb in p:
-                cb_type = cb['callback_selection']['callback_type']
+                cb_type = cb["callback_selection"]["callback_type"]
                 if cb_type not in ALLOWED_CALLBACKS:
-                    raise ValueError(
-                        "Prohibited callback type: %s!" % cb_type)
+                    raise ValueError("Prohibited callback type: %s!" % cb_type)
 
     return estimator
 
 
 def _get_X_y(params, infile1, infile2):
-    """ read from inputs and output X and y
+    """read from inputs and output X and y
 
     Parameters
     ----------
@@ -61,35 +64,40 @@
     # store read dataframe object
     loaded_df = {}
 
-    input_type = params['input_options']['selected_input']
+    input_type = params["input_options"]["selected_input"]
     # tabular input
-    if input_type == 'tabular':
-        header = 'infer' if params['input_options']['header1'] else None
-        column_option = (params['input_options']['column_selector_options_1']
-                         ['selected_column_selector_option'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = params['input_options']['column_selector_options_1']['col1']
+    if input_type == "tabular":
+        header = "infer" if params["input_options"]["header1"] else None
+        column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = params["input_options"]["column_selector_options_1"]["col1"]
         else:
             c = None
 
         df_key = infile1 + repr(header)
-        df = pd.read_csv(infile1, sep='\t', header=header,
-                         parse_dates=True)
+        df = pd.read_csv(infile1, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = df
 
         X = read_columns(df, c=c, c_option=column_option).astype(float)
     # sparse input
-    elif input_type == 'sparse':
-        X = mmread(open(infile1, 'r'))
+    elif input_type == "sparse":
+        X = mmread(open(infile1, "r"))
 
     # Get target y
-    header = 'infer' if params['input_options']['header2'] else None
-    column_option = (params['input_options']['column_selector_options_2']
-                     ['selected_column_selector_option2'])
-    if column_option in ['by_index_number', 'all_but_by_index_number',
-                         'by_header_name', 'all_but_by_header_name']:
-        c = params['input_options']['column_selector_options_2']['col2']
+    header = "infer" if params["input_options"]["header2"] else None
+    column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
+    if column_option in [
+        "by_index_number",
+        "all_but_by_index_number",
+        "by_header_name",
+        "all_but_by_header_name",
+    ]:
+        c = params["input_options"]["column_selector_options_2"]["col2"]
     else:
         c = None
 
@@ -97,26 +105,23 @@
     if df_key in loaded_df:
         infile2 = loaded_df[df_key]
     else:
-        infile2 = pd.read_csv(infile2, sep='\t',
-                              header=header, parse_dates=True)
+        infile2 = pd.read_csv(infile2, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = infile2
 
-    y = read_columns(
-            infile2,
-            c=c,
-            c_option=column_option,
-            sep='\t',
-            header=header,
-            parse_dates=True)
+    y = read_columns(infile2,
+                     c=c,
+                     c_option=column_option,
+                     sep='\t',
+                     header=header,
+                     parse_dates=True)
     if len(y.shape) == 2 and y.shape[1] == 1:
         y = y.ravel()
 
     return X, y
 
 
-def main(inputs, infile_estimator, infile1, infile2, out_object,
-         out_weights=None):
-    """ main
+def main(inputs, infile_estimator, infile1, infile2, out_object, out_weights=None):
+    """main
 
     Parameters
     ----------
@@ -139,38 +144,37 @@
         File path for output of weights
 
     """
-    with open(inputs, 'r') as param_handler:
+    with open(inputs, "r") as param_handler:
         params = json.load(param_handler)
 
     # load model
-    with open(infile_estimator, 'rb') as est_handler:
+    with open(infile_estimator, "rb") as est_handler:
         estimator = load_model(est_handler)
     estimator = clean_params(estimator, n_jobs=N_JOBS)
 
     X_train, y_train = _get_X_y(params, infile1, infile2)
 
     estimator.fit(X_train, y_train)
-    
+
     main_est = estimator
     if isinstance(main_est, Pipeline):
         main_est = main_est.steps[-1][-1]
-    if hasattr(main_est, 'model_') \
-            and hasattr(main_est, 'save_weights'):
+    if hasattr(main_est, "model_") and hasattr(main_est, "save_weights"):
         if out_weights:
             main_est.save_weights(out_weights)
         del main_est.model_
         del main_est.fit_params
         del main_est.model_class_
-        del main_est.validation_data
-        if getattr(main_est, 'data_generator_', None):
+        if getattr(main_est, "validation_data", None):
+            del main_est.validation_data
+        if getattr(main_est, "data_generator_", None):
             del main_est.data_generator_
 
-    with open(out_object, 'wb') as output_handler:
-        pickle.dump(estimator, output_handler,
-                    pickle.HIGHEST_PROTOCOL)
+    with open(out_object, "wb") as output_handler:
+        pickle.dump(estimator, output_handler, pickle.HIGHEST_PROTOCOL)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-X", "--infile_estimator", dest="infile_estimator")
@@ -180,5 +184,11 @@
     aparser.add_argument("-t", "--out_weights", dest="out_weights")
     args = aparser.parse_args()
 
-    main(args.inputs, args.infile_estimator, args.infile1,
-         args.infile2, args.out_object, args.out_weights)
+    main(
+        args.inputs,
+        args.infile_estimator,
+        args.infile1,
+        args.infile2,
+        args.out_object,
+        args.out_weights,
+    )
diff -r 508ce0649bec -r 0a3f113397b2 stacking_ensembles.py
--- a/stacking_ensembles.py	Thu Oct 01 20:02:43 2020 +0000
+++ b/stacking_ensembles.py	Tue Apr 13 17:29:01 2021 +0000
@@ -5,22 +5,17 @@
 import mlxtend.classifier
 import pandas as pd
 import pickle
-import sklearn
 import sys
 import warnings
-from sklearn import ensemble
-
-from galaxy_ml.utils import (load_model, get_cv, get_estimator,
-                             get_search_params)
+from galaxy_ml.utils import load_model, get_cv, get_estimator, get_search_params
 
 
-warnings.filterwarnings('ignore')
+warnings.filterwarnings("ignore")
 
-N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
+N_JOBS = int(__import__("os").environ.get("GALAXY_SLOTS", 1))
 
 
-def main(inputs_path, output_obj, base_paths=None, meta_path=None,
-         outfile_params=None):
+def main(inputs_path, output_obj, base_paths=None, meta_path=None, outfile_params=None):
     """
     Parameter
     ---------
@@ -39,87 +34,79 @@
     outfile_params : str
         File path for params output
     """
-    with open(inputs_path, 'r') as param_handler:
+    with open(inputs_path, "r") as param_handler:
         params = json.load(param_handler)
 
-    estimator_type = params['algo_selection']['estimator_type']
+    estimator_type = params["algo_selection"]["estimator_type"]
     # get base estimators
     base_estimators = []
-    for idx, base_file in enumerate(base_paths.split(',')):
-        if base_file and base_file != 'None':
-            with open(base_file, 'rb') as handler:
+    for idx, base_file in enumerate(base_paths.split(",")):
+        if base_file and base_file != "None":
+            with open(base_file, "rb") as handler:
                 model = load_model(handler)
         else:
-            estimator_json = (params['base_est_builder'][idx]
-                              ['estimator_selector'])
+            estimator_json = params["base_est_builder"][idx]["estimator_selector"]
             model = get_estimator(estimator_json)
 
-        if estimator_type.startswith('sklearn'):
+        if estimator_type.startswith("sklearn"):
             named = model.__class__.__name__.lower()
-            named = 'base_%d_%s' % (idx, named)
+            named = "base_%d_%s" % (idx, named)
             base_estimators.append((named, model))
         else:
             base_estimators.append(model)
 
     # get meta estimator, if applicable
-    if estimator_type.startswith('mlxtend'):
+    if estimator_type.startswith("mlxtend"):
         if meta_path:
-            with open(meta_path, 'rb') as f:
+            with open(meta_path, "rb") as f:
                 meta_estimator = load_model(f)
         else:
-            estimator_json = (params['algo_selection']
-                              ['meta_estimator']['estimator_selector'])
+            estimator_json = params["algo_selection"]["meta_estimator"]["estimator_selector"]
             meta_estimator = get_estimator(estimator_json)
 
-    options = params['algo_selection']['options']
+    options = params["algo_selection"]["options"]
 
-    cv_selector = options.pop('cv_selector', None)
+    cv_selector = options.pop("cv_selector", None)
     if cv_selector:
-        splitter, groups = get_cv(cv_selector)
-        options['cv'] = splitter
+        splitter, _groups = get_cv(cv_selector)
+        options["cv"] = splitter
         # set n_jobs
-        options['n_jobs'] = N_JOBS
+        options["n_jobs"] = N_JOBS
 
-    weights = options.pop('weights', None)
+    weights = options.pop("weights", None)
     if weights:
         weights = ast.literal_eval(weights)
         if weights:
-            options['weights'] = weights
+            options["weights"] = weights
 
-    mod_and_name = estimator_type.split('_')
+    mod_and_name = estimator_type.split("_")
     mod = sys.modules[mod_and_name[0]]
     klass = getattr(mod, mod_and_name[1])
 
-    if estimator_type.startswith('sklearn'):
-        options['n_jobs'] = N_JOBS
+    if estimator_type.startswith("sklearn"):
+        options["n_jobs"] = N_JOBS
         ensemble_estimator = klass(base_estimators, **options)
 
     elif mod == mlxtend.classifier:
-        ensemble_estimator = klass(
-            classifiers=base_estimators,
-            meta_classifier=meta_estimator,
-            **options)
+        ensemble_estimator = klass(classifiers=base_estimators, meta_classifier=meta_estimator, **options)
 
     else:
-        ensemble_estimator = klass(
-            regressors=base_estimators,
-            meta_regressor=meta_estimator,
-            **options)
+        ensemble_estimator = klass(regressors=base_estimators, meta_regressor=meta_estimator, **options)
 
     print(ensemble_estimator)
     for base_est in base_estimators:
         print(base_est)
 
-    with open(output_obj, 'wb') as out_handler:
+    with open(output_obj, "wb") as out_handler:
         pickle.dump(ensemble_estimator, out_handler, pickle.HIGHEST_PROTOCOL)
 
-    if params['get_params'] and outfile_params:
+    if params["get_params"] and outfile_params:
         results = get_search_params(ensemble_estimator)
-        df = pd.DataFrame(results, columns=['', 'Parameter', 'Value'])
-        df.to_csv(outfile_params, sep='\t', index=False)
+        df = pd.DataFrame(results, columns=["", "Parameter", "Value"])
+        df.to_csv(outfile_params, sep="\t", index=False)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-b", "--bases", dest="bases")
     aparser.add_argument("-m", "--meta", dest="meta")
@@ -128,5 +115,10 @@
     aparser.add_argument("-p", "--outfile_params", dest="outfile_params")
     args = aparser.parse_args()
 
-    main(args.inputs, args.outfile, base_paths=args.bases,
-         meta_path=args.meta, outfile_params=args.outfile_params)
+    main(
+        args.inputs,
+        args.outfile,
+        base_paths=args.bases,
+        meta_path=args.meta,
+        outfile_params=args.outfile_params,
+    )
diff -r 508ce0649bec -r 0a3f113397b2 test-data/keras_batch_params01.tabular
--- a/test-data/keras_batch_params01.tabular	Thu Oct 01 20:02:43 2020 +0000
+++ b/test-data/keras_batch_params01.tabular	Tue Apr 13 17:29:01 2021 +0000
@@ -27,7 +27,7 @@
 @	schedule_decay	schedule_decay: None
 @	seed	seed: None
 @	steps_per_epoch	steps_per_epoch: None
-@	validation_data	validation_data: None
+@	validation_fraction	validation_fraction: 0.1
 @	validation_steps	validation_steps: None
 @	verbose	verbose: 0
 *	data_batch_generator__fasta_path	data_batch_generator__fasta_path: 'to_be_determined'
diff -r 508ce0649bec -r 0a3f113397b2 test-data/keras_batch_params04.tabular
--- a/test-data/keras_batch_params04.tabular	Thu Oct 01 20:02:43 2020 +0000
+++ b/test-data/keras_batch_params04.tabular	Tue Apr 13 17:29:01 2021 +0000
@@ -26,7 +26,7 @@
 @	schedule_decay	schedule_decay: None
 @	seed	seed: None
 @	steps_per_epoch	steps_per_epoch: None
-@	validation_data	validation_data: None
+@	validation_fraction	validation_fraction: 0.1
 @	validation_steps	validation_steps: None
 @	verbose	verbose: 0
 *	layers_0_Dense__class_name	layers_0_Dense__class_name: 'Dense'
diff -r 508ce0649bec -r 0a3f113397b2 test-data/keras_model01
Binary file test-data/keras_model01 has changed
diff -r 508ce0649bec -r 0a3f113397b2 test-data/keras_model02
Binary file test-data/keras_model02 has changed
diff -r 508ce0649bec -r 0a3f113397b2 test-data/keras_model04
Binary file test-data/keras_model04 has changed
diff -r 508ce0649bec -r 0a3f113397b2 test-data/keras_params04.tabular
--- a/test-data/keras_params04.tabular	Thu Oct 01 20:02:43 2020 +0000
+++ b/test-data/keras_params04.tabular	Tue Apr 13 17:29:01 2021 +0000
@@ -22,7 +22,7 @@
 @	schedule_decay	schedule_decay: None
 @	seed	seed: 42
 @	steps_per_epoch	steps_per_epoch: None
-@	validation_data	validation_data: None
+@	validation_fraction	validation_fraction: 0.1
 @	validation_steps	validation_steps: None
 @	verbose	verbose: 0
 *	layers_0_Dense__class_name	layers_0_Dense__class_name: 'Dense'
diff -r 508ce0649bec -r 0a3f113397b2 test-data/ohe_in_w_header.tabular
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ohe_in_w_header.tabular	Tue Apr 13 17:29:01 2021 +0000
@@ -0,0 +1,9 @@
+Label
+0
+1
+2
+3
+3
+2
+1
+0
diff -r 508ce0649bec -r 0a3f113397b2 test-data/ohe_in_wo_header.tabular
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ohe_in_wo_header.tabular	Tue Apr 13 17:29:01 2021 +0000
@@ -0,0 +1,8 @@
+0
+1
+2
+3
+3
+2
+1
+0
diff -r 508ce0649bec -r 0a3f113397b2 test-data/ohe_out_4.tabular
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ohe_out_4.tabular	Tue Apr 13 17:29:01 2021 +0000
@@ -0,0 +1,8 @@
+1	0	0	0
+0	1	0	0
+0	0	1	0
+0	0	0	1
+0	0	0	1
+0	0	1	0
+0	1	0	0
+1	0	0	0
diff -r 508ce0649bec -r 0a3f113397b2 test-data/ohe_out_5.tabular
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ohe_out_5.tabular	Tue Apr 13 17:29:01 2021 +0000
@@ -0,0 +1,8 @@
+1	0	0	0	0
+0	1	0	0	0
+0	0	1	0	0
+0	0	0	1	0
+0	0	0	1	0
+0	0	1	0	0
+0	1	0	0	0
+1	0	0	0	0
diff -r 508ce0649bec -r 0a3f113397b2 test-data/pipeline_params05.tabular
--- a/test-data/pipeline_params05.tabular	Thu Oct 01 20:02:43 2020 +0000
+++ b/test-data/pipeline_params05.tabular	Tue Apr 13 17:29:01 2021 +0000
@@ -13,6 +13,6 @@
 *	n_jobs	n_jobs: 1
 @	oob_score	oob_score: False
 @	random_state	random_state: 42
-*	verbose	verbose: 0
+@	verbose	verbose: 0
 @	warm_start	warm_start: False
 	Note:	@, params eligible for search in searchcv tool.
diff -r 508ce0649bec -r 0a3f113397b2 test-data/pipeline_params18
--- a/test-data/pipeline_params18	Thu Oct 01 20:02:43 2020 +0000
+++ b/test-data/pipeline_params18	Tue Apr 13 17:29:01 2021 +0000
@@ -47,7 +47,7 @@
                                                            output_distribution='uniform',
                                                            random_state=10,
                                                            subsample=100000))"
-*	verbose	verbose: False
+@	verbose	verbose: False
 @	powertransformer__copy	powertransformer__copy: True
 @	powertransformer__method	powertransformer__method: 'yeo-johnson'
 @	powertransformer__standardize	powertransformer__standardize: True
@@ -75,7 +75,7 @@
 *	transformedtargetregressor__regressor__n_jobs	transformedtargetregressor__regressor__n_jobs: 1
 @	transformedtargetregressor__regressor__oob_score	transformedtargetregressor__regressor__oob_score: False
 @	transformedtargetregressor__regressor__random_state	transformedtargetregressor__regressor__random_state: 10
-*	transformedtargetregressor__regressor__verbose	transformedtargetregressor__regressor__verbose: 0
+@	transformedtargetregressor__regressor__verbose	transformedtargetregressor__regressor__verbose: 0
 @	transformedtargetregressor__regressor__warm_start	transformedtargetregressor__regressor__warm_start: False
 @	transformedtargetregressor__transformer	"transformedtargetregressor__transformer: QuantileTransformer(copy=True, ignore_implicit_zeros=False, n_quantiles=1000,
                     output_distribution='uniform', random_state=10,
diff -r 508ce0649bec -r 0a3f113397b2 test-data/train_test_eval_model01
Binary file test-data/train_test_eval_model01 has changed
diff -r 508ce0649bec -r 0a3f113397b2 test-data/train_test_eval_weights01.h5
Binary file test-data/train_test_eval_weights01.h5 has changed
diff -r 508ce0649bec -r 0a3f113397b2 test-data/train_test_eval_weights02.h5
Binary file test-data/train_test_eval_weights02.h5 has changed
diff -r 508ce0649bec -r 0a3f113397b2 to_categorical.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/to_categorical.py	Tue Apr 13 17:29:01 2021 +0000
@@ -0,0 +1,50 @@
+import argparse
+import json
+import warnings
+
+import numpy as np
+import pandas as pd
+from keras.utils import to_categorical
+
+
+def main(inputs, infile, outfile, num_classes=None):
+    """
+    Parameter
+    ---------
+    input : str
+        File path to galaxy tool parameter
+
+    infile : str
+        File paths of input vector
+
+    outfile : str
+        File path to output matrix
+
+    num_classes : str
+        Total number of classes. If None, this would be inferred as the (largest number in y) + 1
+
+    """
+    warnings.simplefilter("ignore")
+
+    with open(inputs, "r") as param_handler:
+        params = json.load(param_handler)
+
+    input_header = params["header0"]
+    header = "infer" if input_header else None
+
+    input_vector = pd.read_csv(infile, sep="\t", header=header)
+
+    output_matrix = to_categorical(input_vector, num_classes=num_classes)
+
+    np.savetxt(outfile, output_matrix, fmt="%d", delimiter="\t")
+
+
+if __name__ == "__main__":
+    aparser = argparse.ArgumentParser()
+    aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
+    aparser.add_argument("-y", "--infile", dest="infile")
+    aparser.add_argument("-n", "--num_classes", dest="num_classes", type=int, default=None)
+    aparser.add_argument("-o", "--outfile", dest="outfile")
+    args = aparser.parse_args()
+
+    main(args.inputs, args.infile, args.outfile, args.num_classes)
diff -r 508ce0649bec -r 0a3f113397b2 train_test_eval.py
--- a/train_test_eval.py	Thu Oct 01 20:02:43 2020 +0000
+++ b/train_test_eval.py	Tue Apr 13 17:29:01 2021 +0000
@@ -1,59 +1,66 @@
 import argparse
-import joblib
 import json
-import numpy as np
 import os
-import pandas as pd
 import pickle
 import warnings
+
 from itertools import chain
+
+import joblib
+import numpy as np
+import pandas as pd
+from galaxy_ml.model_validations import train_test_split
+from galaxy_ml.utils import (
+    get_module,
+    get_scoring,
+    load_model,
+    read_columns,
+    SafeEval,
+    try_get_attr,
+)
 from scipy.io import mmread
-from sklearn.base import clone
-from sklearn import (cluster, compose, decomposition, ensemble,
-                     feature_extraction, feature_selection,
-                     gaussian_process, kernel_approximation, metrics,
-                     model_selection, naive_bayes, neighbors,
-                     pipeline, preprocessing, svm, linear_model,
-                     tree, discriminant_analysis)
-from sklearn.exceptions import FitFailedWarning
+from sklearn import pipeline
 from sklearn.metrics.scorer import _check_multimetric_scoring
-from sklearn.model_selection._validation import _score, cross_validate
+from sklearn.model_selection._validation import _score
 from sklearn.model_selection import _search, _validation
+from sklearn.model_selection._validation import _score
 from sklearn.utils import indexable, safe_indexing
 
-from galaxy_ml.model_validations import train_test_split
-from galaxy_ml.utils import (SafeEval, get_scoring, load_model,
-                             read_columns, try_get_attr, get_module)
 
+_fit_and_score = try_get_attr("galaxy_ml.model_validations", "_fit_and_score")
+setattr(_search, "_fit_and_score", _fit_and_score)
+setattr(_validation, "_fit_and_score", _fit_and_score)
 
-_fit_and_score = try_get_attr('galaxy_ml.model_validations', '_fit_and_score')
-setattr(_search, '_fit_and_score', _fit_and_score)
-setattr(_validation, '_fit_and_score', _fit_and_score)
-
-N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1))
-CACHE_DIR = os.path.join(os.getcwd(), 'cached')
+N_JOBS = int(os.environ.get("GALAXY_SLOTS", 1))
+CACHE_DIR = os.path.join(os.getcwd(), "cached")
 del os
-NON_SEARCHABLE = ('n_jobs', 'pre_dispatch', 'memory', '_path',
-                  'nthread', 'callbacks')
-ALLOWED_CALLBACKS = ('EarlyStopping', 'TerminateOnNaN', 'ReduceLROnPlateau',
-                     'CSVLogger', 'None')
+NON_SEARCHABLE = ("n_jobs", "pre_dispatch", "memory", "_path", "nthread", "callbacks")
+ALLOWED_CALLBACKS = (
+    "EarlyStopping",
+    "TerminateOnNaN",
+    "ReduceLROnPlateau",
+    "CSVLogger",
+    "None",
+)
 
 
 def _eval_swap_params(params_builder):
     swap_params = {}
 
-    for p in params_builder['param_set']:
-        swap_value = p['sp_value'].strip()
-        if swap_value == '':
+    for p in params_builder["param_set"]:
+        swap_value = p["sp_value"].strip()
+        if swap_value == "":
             continue
 
-        param_name = p['sp_name']
+        param_name = p["sp_name"]
         if param_name.lower().endswith(NON_SEARCHABLE):
-            warnings.warn("Warning: `%s` is not eligible for search and was "
-                          "omitted!" % param_name)
+            warnings.warn(
+                "Warning: `%s` is not eligible for search and was "
+                "omitted!" % param_name
+            )
             continue
 
-        if not swap_value.startswith(':'):
+        if not swap_value.startswith(":"):
             safe_eval = SafeEval(load_scipy=True, load_numpy=True)
             ev = safe_eval(swap_value)
         else:
@@ -80,23 +87,24 @@
         else:
             new_arrays.append(arr)
 
-    if kwargs['shuffle'] == 'None':
-        kwargs['shuffle'] = None
+    if kwargs["shuffle"] == "None":
+        kwargs["shuffle"] = None
 
-    group_names = kwargs.pop('group_names', None)
+    group_names = kwargs.pop("group_names", None)
 
     if group_names is not None and group_names.strip():
-        group_names = [name.strip() for name in
-                       group_names.split(',')]
+        group_names = [name.strip() for name in group_names.split(",")]
         new_arrays = indexable(*new_arrays)
-        groups = kwargs['labels']
+        groups = kwargs["labels"]
         n_samples = new_arrays[0].shape[0]
         index_arr = np.arange(n_samples)
         test = index_arr[np.isin(groups, group_names)]
         train = index_arr[~np.isin(groups, group_names)]
-        rval = list(chain.from_iterable(
-            (safe_indexing(a, train),
-             safe_indexing(a, test)) for a in new_arrays))
+        rval = list(
+            chain.from_iterable(
+                (safe_indexing(a, train), safe_indexing(a, test)) for a in new_arrays
+            )
+        )
     else:
         rval = train_test_split(*new_arrays, **kwargs)
 
@@ -106,11 +114,20 @@
     return rval
 
 
-def main(inputs, infile_estimator, infile1, infile2,
-         outfile_result, outfile_object=None,
-         outfile_weights=None, groups=None,
-         ref_seq=None, intervals=None, targets=None,
-         fasta_path=None):
+def main(
+    inputs,
+    infile_estimator,
+    infile1,
+    infile2,
+    outfile_result,
+    outfile_object=None,
+    outfile_weights=None,
+    groups=None,
+    ref_seq=None,
+    intervals=None,
+    targets=None,
+    fasta_path=None,
+):
     """
     Parameter
     ---------
@@ -150,17 +167,17 @@
     fasta_path : str
         File path to dataset containing fasta file
     """
-    warnings.simplefilter('ignore')
+    warnings.simplefilter("ignore")
 
-    with open(inputs, 'r') as param_handler:
+    with open(inputs, "r") as param_handler:
         params = json.load(param_handler)
 
     #  load estimator
-    with open(infile_estimator, 'rb') as estimator_handler:
+    with open(infile_estimator, "rb") as estimator_handler:
         estimator = load_model(estimator_handler)
 
     # swap hyperparameter
-    swapping = params['experiment_schemes']['hyperparams_swapping']
+    swapping = params["experiment_schemes"]["hyperparams_swapping"]
     swap_params = _eval_swap_params(swapping)
     estimator.set_params(**swap_params)
 
@@ -169,38 +186,41 @@
     # store read dataframe object
     loaded_df = {}
 
-    input_type = params['input_options']['selected_input']
+    input_type = params["input_options"]["selected_input"]
     # tabular input
-    if input_type == 'tabular':
-        header = 'infer' if params['input_options']['header1'] else None
-        column_option = (params['input_options']['column_selector_options_1']
-                         ['selected_column_selector_option'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = params['input_options']['column_selector_options_1']['col1']
+    if input_type == "tabular":
+        header = "infer" if params["input_options"]["header1"] else None
+        column_option = params["input_options"]["column_selector_options_1"][
+            "selected_column_selector_option"
+        ]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = params["input_options"]["column_selector_options_1"]["col1"]
         else:
             c = None
 
         df_key = infile1 + repr(header)
-        df = pd.read_csv(infile1, sep='\t', header=header,
-                         parse_dates=True)
+        df = pd.read_csv(infile1, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = df
 
         X = read_columns(df, c=c, c_option=column_option).astype(float)
     # sparse input
-    elif input_type == 'sparse':
-        X = mmread(open(infile1, 'r'))
+    elif input_type == "sparse":
+        X = mmread(open(infile1, "r"))
 
     # fasta_file input
-    elif input_type == 'seq_fasta':
-        pyfaidx = get_module('pyfaidx')
+    elif input_type == "seq_fasta":
+        pyfaidx = get_module("pyfaidx")
         sequences = pyfaidx.Fasta(fasta_path)
         n_seqs = len(sequences.keys())
         X = np.arange(n_seqs)[:, np.newaxis]
         for param in estimator_params.keys():
-            if param.endswith('fasta_path'):
-                estimator.set_params(
-                    **{param: fasta_path})
+            if param.endswith("fasta_path"):
+                estimator.set_params(**{param: fasta_path})
                 break
         else:
             raise ValueError(
@@ -209,25 +229,31 @@
                 "KerasGBatchClassifier with "
                 "FastaDNABatchGenerator/FastaProteinBatchGenerator "
                 "or having GenomeOneHotEncoder/ProteinOneHotEncoder "
-                "in pipeline!")
+                "in pipeline!"
+            )
 
-    elif input_type == 'refseq_and_interval':
+    elif input_type == "refseq_and_interval":
         path_params = {
-            'data_batch_generator__ref_genome_path': ref_seq,
-            'data_batch_generator__intervals_path': intervals,
-            'data_batch_generator__target_path': targets
+            "data_batch_generator__ref_genome_path": ref_seq,
+            "data_batch_generator__intervals_path": intervals,
+            "data_batch_generator__target_path": targets,
         }
         estimator.set_params(**path_params)
         n_intervals = sum(1 for line in open(intervals))
         X = np.arange(n_intervals)[:, np.newaxis]
 
     # Get target y
-    header = 'infer' if params['input_options']['header2'] else None
-    column_option = (params['input_options']['column_selector_options_2']
-                     ['selected_column_selector_option2'])
-    if column_option in ['by_index_number', 'all_but_by_index_number',
-                         'by_header_name', 'all_but_by_header_name']:
-        c = params['input_options']['column_selector_options_2']['col2']
+    header = "infer" if params["input_options"]["header2"] else None
+    column_option = params["input_options"]["column_selector_options_2"][
+        "selected_column_selector_option2"
+    ]
+    if column_option in [
+        "by_index_number",
+        "all_but_by_index_number",
+        "by_header_name",
+        "all_but_by_header_name",
+    ]:
+        c = params["input_options"]["column_selector_options_2"]["col2"]
     else:
         c = None
 
@@ -235,37 +261,39 @@
     if df_key in loaded_df:
         infile2 = loaded_df[df_key]
     else:
-        infile2 = pd.read_csv(infile2, sep='\t',
-                              header=header, parse_dates=True)
+        infile2 = pd.read_csv(infile2, sep="\t", header=header, parse_dates=True)
         loaded_df[df_key] = infile2
 
-    y = read_columns(
-            infile2,
-            c=c,
-            c_option=column_option,
-            sep='\t',
-            header=header,
-            parse_dates=True)
+    y = read_columns(infile2,
+                     c=c,
+                     c_option=column_option,
+                     sep='\t',
+                     header=header,
+                     parse_dates=True)
     if len(y.shape) == 2 and y.shape[1] == 1:
         y = y.ravel()
-    if input_type == 'refseq_and_interval':
-        estimator.set_params(
-            data_batch_generator__features=y.ravel().tolist())
+    if input_type == "refseq_and_interval":
+        estimator.set_params(data_batch_generator__features=y.ravel().tolist())
         y = None
     # end y
 
     # load groups
     if groups:
-        groups_selector = (params['experiment_schemes']['test_split']
-                                 ['split_algos']).pop('groups_selector')
+        groups_selector = (
+            params["experiment_schemes"]["test_split"]["split_algos"]
+        ).pop("groups_selector")
 
-        header = 'infer' if groups_selector['header_g'] else None
-        column_option = \
-            (groups_selector['column_selector_options_g']
-                            ['selected_column_selector_option_g'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = groups_selector['column_selector_options_g']['col_g']
+        header = "infer" if groups_selector["header_g"] else None
+        column_option = groups_selector["column_selector_options_g"][
+            "selected_column_selector_option_g"
+        ]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = groups_selector["column_selector_options_g"]["col_g"]
         else:
             c = None
 
@@ -273,13 +301,12 @@
         if df_key in loaded_df:
             groups = loaded_df[df_key]
 
-        groups = read_columns(
-                groups,
-                c=c,
-                c_option=column_option,
-                sep='\t',
-                header=header,
-                parse_dates=True)
+        groups = read_columns(groups,
+                              c=c,
+                              c_option=column_option,
+                              sep='\t',
+                              header=header,
+                              parse_dates=True)
         groups = groups.ravel()
 
     # del loaded_df
@@ -288,15 +315,15 @@
     # handle memory
     memory = joblib.Memory(location=CACHE_DIR, verbose=0)
     # cache iraps_core fits could increase search speed significantly
-    if estimator.__class__.__name__ == 'IRAPSClassifier':
+    if estimator.__class__.__name__ == "IRAPSClassifier":
         estimator.set_params(memory=memory)
     else:
         # For iraps buried in pipeline
         new_params = {}
         for p, v in estimator_params.items():
-            if p.endswith('memory'):
+            if p.endswith("memory"):
                 # for case of `__irapsclassifier__memory`
-                if len(p) > 8 and p[:-8].endswith('irapsclassifier'):
+                if len(p) > 8 and p[:-8].endswith("irapsclassifier"):
                     # cache iraps_core fits could increase search
                     # speed significantly
                     new_params[p] = memory
@@ -305,88 +332,98 @@
                 elif v:
                     new_params[p] = None
             # handle n_jobs
-            elif p.endswith('n_jobs'):
+            elif p.endswith("n_jobs"):
                 # For now, 1 CPU is suggested for iprasclassifier
-                if len(p) > 8 and p[:-8].endswith('irapsclassifier'):
+                if len(p) > 8 and p[:-8].endswith("irapsclassifier"):
                     new_params[p] = 1
                 else:
                     new_params[p] = N_JOBS
             # for security reason, types of callback are limited
-            elif p.endswith('callbacks'):
+            elif p.endswith("callbacks"):
                 for cb in v:
-                    cb_type = cb['callback_selection']['callback_type']
+                    cb_type = cb["callback_selection"]["callback_type"]
                     if cb_type not in ALLOWED_CALLBACKS:
-                        raise ValueError(
-                            "Prohibited callback type: %s!" % cb_type)
+                        raise ValueError("Prohibited callback type: %s!" % cb_type)
 
         estimator.set_params(**new_params)
 
     # handle scorer, convert to scorer dict
-    scoring = params['experiment_schemes']['metrics']['scoring']
+    # Check if scoring is specified
+    scoring = params["experiment_schemes"]["metrics"].get("scoring", None)
+    if scoring is not None:
+        # get_scoring() expects secondary_scoring to be a comma separated string (not a list)
+        # Check if secondary_scoring is specified
+        secondary_scoring = scoring.get("secondary_scoring", None)
+        if secondary_scoring is not None:
+            # If secondary_scoring is specified, convert the list into comman separated string
+            scoring["secondary_scoring"] = ",".join(scoring["secondary_scoring"])
     scorer = get_scoring(scoring)
     scorer, _ = _check_multimetric_scoring(estimator, scoring=scorer)
 
     # handle test (first) split
-    test_split_options = (params['experiment_schemes']
-                                ['test_split']['split_algos'])
+    test_split_options = params["experiment_schemes"]["test_split"]["split_algos"]
 
-    if test_split_options['shuffle'] == 'group':
-        test_split_options['labels'] = groups
-    if test_split_options['shuffle'] == 'stratified':
+    if test_split_options["shuffle"] == "group":
+        test_split_options["labels"] = groups
+    if test_split_options["shuffle"] == "stratified":
         if y is not None:
-            test_split_options['labels'] = y
+            test_split_options["labels"] = y
         else:
-            raise ValueError("Stratified shuffle split is not "
-                             "applicable on empty target values!")
+            raise ValueError(
+                "Stratified shuffle split is not " "applicable on empty target values!"
+            )
 
-    X_train, X_test, y_train, y_test, groups_train, groups_test = \
-        train_test_split_none(X, y, groups, **test_split_options)
+    X_train, X_test, y_train, y_test, groups_train, _groups_test = train_test_split_none(
+        X, y, groups, **test_split_options
+    )
 
-    exp_scheme = params['experiment_schemes']['selected_exp_scheme']
+    exp_scheme = params["experiment_schemes"]["selected_exp_scheme"]
 
     # handle validation (second) split
-    if exp_scheme == 'train_val_test':
-        val_split_options = (params['experiment_schemes']
-                                   ['val_split']['split_algos'])
+    if exp_scheme == "train_val_test":
+        val_split_options = params["experiment_schemes"]["val_split"]["split_algos"]
 
-        if val_split_options['shuffle'] == 'group':
-            val_split_options['labels'] = groups_train
-        if val_split_options['shuffle'] == 'stratified':
+        if val_split_options["shuffle"] == "group":
+            val_split_options["labels"] = groups_train
+        if val_split_options["shuffle"] == "stratified":
             if y_train is not None:
-                val_split_options['labels'] = y_train
+                val_split_options["labels"] = y_train
             else:
-                raise ValueError("Stratified shuffle split is not "
-                                 "applicable on empty target values!")
+                raise ValueError(
+                    "Stratified shuffle split is not "
+                    "applicable on empty target values!"
+                )
 
-        X_train, X_val, y_train, y_val, groups_train, groups_val = \
-            train_test_split_none(X_train, y_train, groups_train,
-                                  **val_split_options)
+        (
+            X_train,
+            X_val,
+            y_train,
+            y_val,
+            groups_train,
+            _groups_val,
+        ) = train_test_split_none(X_train, y_train, groups_train, **val_split_options)
 
     # train and eval
-    if hasattr(estimator, 'validation_data'):
-        if exp_scheme == 'train_val_test':
-            estimator.fit(X_train, y_train,
-                          validation_data=(X_val, y_val))
+    if hasattr(estimator, "validation_data"):
+        if exp_scheme == "train_val_test":
+            estimator.fit(X_train, y_train, validation_data=(X_val, y_val))
         else:
-            estimator.fit(X_train, y_train,
-                          validation_data=(X_test, y_test))
+            estimator.fit(X_train, y_train, validation_data=(X_test, y_test))
     else:
         estimator.fit(X_train, y_train)
 
-    if hasattr(estimator, 'evaluate'):
-        scores = estimator.evaluate(X_test, y_test=y_test,
-                                    scorer=scorer,
-                                    is_multimetric=True)
+    if hasattr(estimator, "evaluate"):
+        scores = estimator.evaluate(
+            X_test, y_test=y_test, scorer=scorer, is_multimetric=True
+        )
     else:
-        scores = _score(estimator, X_test, y_test, scorer,
-                        is_multimetric=True)
+        scores = _score(estimator, X_test, y_test, scorer, is_multimetric=True)
     # handle output
     for name, score in scores.items():
         scores[name] = [score]
     df = pd.DataFrame(scores)
     df = df[sorted(df.columns)]
-    df.to_csv(path_or_buf=outfile_result, sep='\t',
-              header=True, index=False)
+    df.to_csv(path_or_buf=outfile_result, sep="\t", header=True, index=False)
 
     memory.clear(warn=False)
 
@@ -395,23 +432,25 @@
         if isinstance(estimator, pipeline.Pipeline):
             main_est = estimator.steps[-1][-1]
 
-        if hasattr(main_est, 'model_') \
-                and hasattr(main_est, 'save_weights'):
+        if hasattr(main_est, "model_") and hasattr(main_est, "save_weights"):
             if outfile_weights:
                 main_est.save_weights(outfile_weights)
-            del main_est.model_
-            del main_est.fit_params
-            del main_est.model_class_
-            del main_est.validation_data
-            if getattr(main_est, 'data_generator_', None):
+            if getattr(main_est, "model_", None):
+                del main_est.model_
+            if getattr(main_est, "fit_params", None):
+                del main_est.fit_params
+            if getattr(main_est, "model_class_", None):
+                del main_est.model_class_
+            if getattr(main_est, "validation_data", None):
+                del main_est.validation_data
+            if getattr(main_est, "data_generator_", None):
                 del main_est.data_generator_
 
-        with open(outfile_object, 'wb') as output_handler:
-            pickle.dump(estimator, output_handler,
-                        pickle.HIGHEST_PROTOCOL)
+        with open(outfile_object, "wb") as output_handler:
+            pickle.dump(estimator, output_handler, pickle.HIGHEST_PROTOCOL)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-e", "--estimator", dest="infile_estimator")
@@ -427,8 +466,17 @@
     aparser.add_argument("-f", "--fasta_path", dest="fasta_path")
     args = aparser.parse_args()
 
-    main(args.inputs, args.infile_estimator, args.infile1, args.infile2,
-         args.outfile_result, outfile_object=args.outfile_object,
-         outfile_weights=args.outfile_weights, groups=args.groups,
-         ref_seq=args.ref_seq, intervals=args.intervals,
-         targets=args.targets, fasta_path=args.fasta_path)
+    main(
+        args.inputs,
+        args.infile_estimator,
+        args.infile1,
+        args.infile2,
+        args.outfile_result,
+        outfile_object=args.outfile_object,
+        outfile_weights=args.outfile_weights,
+        groups=args.groups,
+        ref_seq=args.ref_seq,
+        intervals=args.intervals,
+        targets=args.targets,
+        fasta_path=args.fasta_path,
+    )
diff -r 508ce0649bec -r 0a3f113397b2 train_test_split.py
--- a/train_test_split.py	Thu Oct 01 20:02:43 2020 +0000
+++ b/train_test_split.py	Tue Apr 13 17:29:01 2021 +0000
@@ -7,9 +7,8 @@
 from galaxy_ml.utils import get_cv, read_columns
 
 
-def _get_single_cv_split(params, array, infile_labels=None,
-                         infile_groups=None):
-    """ output (train, test) subset from a cv splitter
+def _get_single_cv_split(params, array, infile_labels=None, infile_groups=None):
+    """output (train, test) subset from a cv splitter
 
     Parameters
     ----------
@@ -25,45 +24,50 @@
     y = None
     groups = None
 
-    nth_split = params['mode_selection']['nth_split']
+    nth_split = params["mode_selection"]["nth_split"]
 
     # read groups
     if infile_groups:
-        header = 'infer' if (params['mode_selection']['cv_selector']
-                             ['groups_selector']['header_g']) else None
-        column_option = (params['mode_selection']['cv_selector']
-                         ['groups_selector']['column_selector_options_g']
-                         ['selected_column_selector_option_g'])
-        if column_option in ['by_index_number', 'all_but_by_index_number',
-                             'by_header_name', 'all_but_by_header_name']:
-            c = (params['mode_selection']['cv_selector']['groups_selector']
-                 ['column_selector_options_g']['col_g'])
+        header = "infer" if (params["mode_selection"]["cv_selector"]["groups_selector"]["header_g"]) else None
+        column_option = params["mode_selection"]["cv_selector"]["groups_selector"]["column_selector_options_g"][
+            "selected_column_selector_option_g"
+        ]
+        if column_option in [
+            "by_index_number",
+            "all_but_by_index_number",
+            "by_header_name",
+            "all_but_by_header_name",
+        ]:
+            c = params["mode_selection"]["cv_selector"]["groups_selector"]["column_selector_options_g"]["col_g"]
         else:
             c = None
 
-        groups = read_columns(infile_groups, c=c, c_option=column_option,
-                              sep='\t', header=header, parse_dates=True)
+        groups = read_columns(
+            infile_groups,
+            c=c,
+            c_option=column_option,
+            sep="\t",
+            header=header,
+            parse_dates=True,
+        )
         groups = groups.ravel()
 
-        params['mode_selection']['cv_selector']['groups_selector'] = groups
+        params["mode_selection"]["cv_selector"]["groups_selector"] = groups
 
     # read labels
     if infile_labels:
-        target_input = (params['mode_selection']
-                        ['cv_selector'].pop('target_input'))
-        header = 'infer' if target_input['header1'] else None
-        col_index = target_input['col'][0] - 1
-        df = pd.read_csv(infile_labels, sep='\t', header=header,
-                         parse_dates=True)
+        target_input = params["mode_selection"]["cv_selector"].pop("target_input")
+        header = "infer" if target_input["header1"] else None
+        col_index = target_input["col"][0] - 1
+        df = pd.read_csv(infile_labels, sep="\t", header=header, parse_dates=True)
         y = df.iloc[:, col_index].values
 
     # construct the cv splitter object
-    splitter, groups = get_cv(params['mode_selection']['cv_selector'])
+    splitter, groups = get_cv(params["mode_selection"]["cv_selector"])
 
     total_n_splits = splitter.get_n_splits(array.values, y=y, groups=groups)
     if nth_split > total_n_splits:
-        raise ValueError("Total number of splits is {}, but got `nth_split` "
-                         "= {}".format(total_n_splits, nth_split))
+        raise ValueError("Total number of splits is {}, but got `nth_split` " "= {}".format(total_n_splits, nth_split))
 
     i = 1
     for train_index, test_index in splitter.split(array.values, y=y, groups=groups):
@@ -79,8 +83,14 @@
     return train, test
 
 
-def main(inputs, infile_array, outfile_train, outfile_test,
-         infile_labels=None, infile_groups=None):
+def main(
+    inputs,
+    infile_array,
+    outfile_train,
+    outfile_test,
+    infile_labels=None,
+    infile_groups=None,
+):
     """
     Parameter
     ---------
@@ -102,45 +112,41 @@
     outfile_test : str
         File path to dataset containing test split
     """
-    warnings.simplefilter('ignore')
+    warnings.simplefilter("ignore")
 
-    with open(inputs, 'r') as param_handler:
+    with open(inputs, "r") as param_handler:
         params = json.load(param_handler)
 
-    input_header = params['header0']
-    header = 'infer' if input_header else None
-    array = pd.read_csv(infile_array, sep='\t', header=header,
-                        parse_dates=True)
+    input_header = params["header0"]
+    header = "infer" if input_header else None
+    array = pd.read_csv(infile_array, sep="\t", header=header, parse_dates=True)
 
     # train test split
-    if params['mode_selection']['selected_mode'] == 'train_test_split':
-        options = params['mode_selection']['options']
-        shuffle_selection = options.pop('shuffle_selection')
-        options['shuffle'] = shuffle_selection['shuffle']
+    if params["mode_selection"]["selected_mode"] == "train_test_split":
+        options = params["mode_selection"]["options"]
+        shuffle_selection = options.pop("shuffle_selection")
+        options["shuffle"] = shuffle_selection["shuffle"]
         if infile_labels:
-            header = 'infer' if shuffle_selection['header1'] else None
-            col_index = shuffle_selection['col'][0] - 1
-            df = pd.read_csv(infile_labels, sep='\t', header=header,
-                             parse_dates=True)
+            header = "infer" if shuffle_selection["header1"] else None
+            col_index = shuffle_selection["col"][0] - 1
+            df = pd.read_csv(infile_labels, sep="\t", header=header, parse_dates=True)
             labels = df.iloc[:, col_index].values
-            options['labels'] = labels
+            options["labels"] = labels
 
         train, test = train_test_split(array, **options)
 
     # cv splitter
     else:
-        train, test = _get_single_cv_split(params, array,
-                                           infile_labels=infile_labels,
-                                           infile_groups=infile_groups)
+        train, test = _get_single_cv_split(params, array, infile_labels=infile_labels, infile_groups=infile_groups)
 
     print("Input shape: %s" % repr(array.shape))
     print("Train shape: %s" % repr(train.shape))
     print("Test shape: %s" % repr(test.shape))
-    train.to_csv(outfile_train, sep='\t', header=input_header, index=False)
-    test.to_csv(outfile_test, sep='\t', header=input_header, index=False)
+    train.to_csv(outfile_train, sep="\t", header=input_header, index=False)
+    test.to_csv(outfile_test, sep="\t", header=input_header, index=False)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-X", "--infile_array", dest="infile_array")
@@ -150,5 +156,11 @@
     aparser.add_argument("-t", "--outfile_test", dest="outfile_test")
     args = aparser.parse_args()
 
-    main(args.inputs, args.infile_array, args.outfile_train,
-         args.outfile_test, args.infile_labels, args.infile_groups)
+    main(
+        args.inputs,
+        args.infile_array,
+        args.outfile_train,
+        args.outfile_test,
+        args.infile_labels,
+        args.infile_groups,
+    )