Mercurial > repos > bgruening > sklearn_estimator_attributes
view estimator_attributes.xml @ 23:30d21a6041f6 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57a0433defa3cbc37ab34fbb0ebcfaeb680db8d5
author | bgruening |
---|---|
date | Sun, 05 Nov 2023 15:10:07 +0000 |
parents | a01fa4e8fe4f |
children |
line wrap: on
line source
<tool id="sklearn_estimator_attributes" name="Estimator attributes" version="@VERSION@" profile="@PROFILE@"> <description>get important attributes from an estimator or scikit object</description> <macros> <import>main_macros.xml</import> </macros> <expand macro="python_requirements" /> <expand macro="macro_stdio" /> <version_command>echo "@VERSION@"</version_command> <command> <![CDATA[ python '$main_script' ]]> </command> <configfiles> <configfile name="main_script"> <![CDATA[ import json import pandas import skrebate import sys import warnings import xgboost from mlxtend import regressor, classifier from sklearn import ( cluster, compose, decomposition, ensemble, feature_extraction, feature_selection, gaussian_process, kernel_approximation, metrics, model_selection, naive_bayes, neighbors, pipeline, preprocessing, svm, linear_model, tree, discriminant_analysis) from imblearn.pipeline import Pipeline as imbPipeline from sklearn.pipeline import Pipeline from galaxy_ml.model_persist import load_model_from_h5, dump_model_to_h5 from galaxy_ml.utils import get_search_params warnings.simplefilter('ignore') infile_object = '$infile_object' attribute = '$attribute_type' est_obj = load_model_from_h5(infile_object) if attribute == 'get_params': ## get_params() results = get_search_params(est_obj) df = pandas.DataFrame(results, columns=['', 'Parameter', 'Value']) df.to_csv('$outfile', sep='\t', index=False) elif attribute == 'final_estimator': res = est_obj.steps[-1][-1] print(repr(res)) dump_model_to_h5(res, '$outfile') elif attribute in ['best_estimator_', 'init_', 'classifier_', 'regressor_']: res = getattr(est_obj, attribute) print(repr(res)) dump_model_to_h5(res, '$outfile') elif attribute in ['oob_score_', 'best_score_', 'n_features_in']: res = getattr(est_obj, attribute) res = pandas.DataFrame([res], columns=[attribute]) res.to_csv('$outfile', sep='\t', index=False) elif attribute in ['best_params_', 'named_steps']: res = getattr(est_obj, attribute) with open('$outfile', 'w') as f: f.write(repr(res)) elif attribute == 'cv_results_': res = pandas.DataFrame(est_obj.cv_results_) res = res[sorted(res.columns)] res.to_csv('$outfile', sep='\t', index=False) else: if attribute == 'get_signature': res = est_obj.get_signature() else: res = getattr(est_obj, attribute) columns = [] if res.ndim == 1 or res.shape[-1] == 1: columns = [attribute] else: for i in range(res.shape[-1]): columns.append(attribute + '_' + str(i)) res = pandas.DataFrame(res, columns=columns) res.to_csv('$outfile', sep='\t', index=False) ]]> </configfile> </configfiles> <inputs> <param name="infile_object" type="data" format="h5mlm" label="Choose the dataset containing estimator/pipeline object" /> <param name="attribute_type" type="select" label="Select an attribute retrival type"> <option value="get_params" selected="true">Estimator - get_params()</option> <option value="feature_importances_" >Fitted estimator - feature_importances_ </option> <option value="coef_">Fitted estimator - coef_ </option> <option value="train_score_">Fitted estimator - train_score_ </option> <option value="oob_score_">Fitted estimator - oob_score_ </option> <option value="init_">Fitted estimator - init_ </option> <option value="classifier_">Fitted BinarizeTargetClassifier - classifier_</option> <option value="regressor_">Fitted BinarizeTargetRegressor - regressor_</option> <option value="get_signature">Fitted IRAPSClassifier - get_signature</option> <option value="named_steps">Pipeline - named_steps </option> <option value="final_estimator">Pipeline - final_estimator </option> <option value="cv_results_">SearchCV - cv_results_ </option> <option value="best_estimator_">SearchCV - best_estimator_ </option> <option value="best_score_">SearchCV - best_score_ </option> <option value="best_params_">SearchCV - best_params_ </option> <option value="scores_">Feature_selection - scores_ </option> <option value="pvalues_">Feature_selection - pvalues_ </option> <option value="ranking_">Feature_selection - ranking_ </option> <option value="n_features_in">Feature_selection - n_features_in </option> <option value="grid_scores_">Feature_selection - grid_scores_ </option> </param> </inputs> <outputs> <data format="tabular" name="outfile" label="${attribute_type} from ${on_string}"> <change_format> <when input="attribute_type" value="named_steps" format="txt" /> <when input="attribute_type" value="best_params_" format="txt" /> <when input="attribute_type" value="final_estimator" format="h5mlm" /> <when input="attribute_type" value="best_estimator_" format="h5mlm" /> <when input="attribute_type" value="init_" format="h5mlm" /> <when input="attribute_type" value="classifier_" format="h5mlm" /> <when input="attribute_type" value="regressor_" format="h5mlm" /> </change_format> </data> </outputs> <tests> <test> <param name="infile_object" value="GridSearchCV01.h5mlm" ftype="h5mlm" /> <param name="attribute_type" value="best_score_" /> <output name="outfile" file="best_score_.tabular" /> </test> <test> <param name="infile_object" value="GridSearchCV01.h5mlm" ftype="h5mlm" /> <param name="attribute_type" value="best_params_" /> <output name="outfile" file="best_params_.txt" /> </test> <test> <param name="infile_object" value="GridSearchCV01.h5mlm" ftype="h5mlm" /> <param name="attribute_type" value="best_estimator_" /> <output name="outfile" file="best_estimator_.h5mlm" compare="sim_size" delta="10" /> </test> <test> <param name="infile_object" value="searchCV01" ftype="h5mlm" /> <param name="attribute_type" value="final_estimator" /> <output name="outfile" file="final_estimator.h5mlm" compare="sim_size" delta="10" /> </test> <test> <param name="infile_object" value="searchCV01" ftype="h5mlm" /> <param name="attribute_type" value="named_steps" /> <output name="outfile" file="named_steps.txt" compare="sim_size" delta="5" /> </test> <test> <param name="infile_object" value="best_estimator_.h5mlm" ftype="h5mlm" /> <param name="attribute_type" value="feature_importances_" /> <output name="outfile" file="feature_importances_.tabular" /> </test> <test> <param name="infile_object" value="RFE.h5mlm" ftype="h5mlm" /> <param name="attribute_type" value="ranking_" /> <output name="outfile" file="ranking_.tabular" /> </test> <test> <param name="infile_object" value="LinearRegression01.h5mlm" ftype="h5mlm" /> <param name="attribute_type" value="get_params" /> <output name="outfile" value="get_params.tabular" /> </test> </tests> <help> <![CDATA[ **What it does** Output attribute from an estimator or any scikit object. Common attributes are : - ``estimator.`` *feature_importances_* - ``RFE``. *ranking_* - ``RFECV``. *grid_scores_* - ``GridSearchCV``. *best_estimator_* ]]> </help> <expand macro="sklearn_citation"> <expand macro="skrebate_citation" /> <expand macro="xgboost_citation" /> <expand macro="imblearn_citation" /> </expand> </tool>