Mercurial > repos > goeckslab > galaxy_pycaret
comparison pycaret_train.xml @ 0:1bc26b9636d2 draft default tip
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit 5089a5dffc154c8202624cfbd5f1be0f36a9f0cc
| author | goeckslab |
|---|---|
| date | Wed, 11 Dec 2024 03:29:00 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1bc26b9636d2 |
|---|---|
| 1 <tool id="pycaret_compare" name="PyCaret Model Comparison" version="@VERSION@" profile="@PROFILE@"> | |
| 2 <description>compares different machine learning models on a dataset using PyCaret. Do feature analyses using Random Forest and LightGBM. </description> | |
| 3 <macros> | |
| 4 <import>pycaret_macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="python_requirements" /> | |
| 7 <command> | |
| 8 <![CDATA[ | |
| 9 python $__tool_directory__/pycaret_train.py --input_file $input_file --target_col $target_feature --output_dir "`pwd`" --random_seed $random_seed | |
| 10 #if $model_type == "classification" | |
| 11 #if $classification_models | |
| 12 --models $classification_models | |
| 13 #end if | |
| 14 #end if | |
| 15 #if $model_type == "regression" | |
| 16 #if $regression_models | |
| 17 --models $regression_models | |
| 18 #end if | |
| 19 #end if | |
| 20 #if $customize_defaults == "true" | |
| 21 #if $train_size | |
| 22 --train_size $train_size | |
| 23 #end if | |
| 24 #if $normalize | |
| 25 --normalize | |
| 26 #end if | |
| 27 #if $feature_selection | |
| 28 --feature_selection | |
| 29 #end if | |
| 30 #if $enable_cross_validation == "true" | |
| 31 --cross_validation | |
| 32 #end if | |
| 33 #if $cross_validation_folds | |
| 34 --cross_validation_folds $cross_validation_folds | |
| 35 #end if | |
| 36 #if $remove_outliers | |
| 37 --remove_outliers | |
| 38 #end if | |
| 39 #if $remove_multicollinearity | |
| 40 --remove_multicollinearity | |
| 41 #end if | |
| 42 #if $polynomial_features | |
| 43 --polynomial_features | |
| 44 #end if | |
| 45 #if $fix_imbalance | |
| 46 --fix_imbalance | |
| 47 #end if | |
| 48 #end if | |
| 49 #if $test_file | |
| 50 --test_file $test_file | |
| 51 #end if | |
| 52 --model_type $model_type | |
| 53 ]]> | |
| 54 </command> | |
| 55 <inputs> | |
| 56 <param name="input_file" type="data" format="csv,tabular" label="Train Dataset (CSV or TSV)" /> | |
| 57 <param name="test_file" type="data" format="csv,tabular" optional="true" label="Test Dataset (CSV or TSV)" | |
| 58 help="If a test set is not provided, | |
| 59 the selected training set will be split into training, validation, and test sets. | |
| 60 If a test set is provided, the training set will only be split into training and validation sets. | |
| 61 BTW, cross-validation is always applied by default." /> | |
| 62 <param name="target_feature" multiple="false" type="data_column" use_header_names="true" data_ref="input_file" label="Select the target column:" /> | |
| 63 <conditional name="model_selection"> | |
| 64 <param name="model_type" type="select" label="Task"> | |
| 65 <option value="classification">classification</option> | |
| 66 <option value="regression">regression</option> | |
| 67 </param> | |
| 68 <when value="classification"> | |
| 69 <param name="classification_models" type="select" multiple="true" label="Only Select Classification Models if you don't want to compare all models"> | |
| 70 <option value="lr">Logistic Regression</option> | |
| 71 <option value="knn">K Neighbors Classifier</option> | |
| 72 <option value="nb">Naive Bayes</option> | |
| 73 <option value="dt">Decision Tree Classifier</option> | |
| 74 <option value="svm">SVM - Linear Kernel</option> | |
| 75 <option value="rbfsvm">SVM - Radial Kernel</option> | |
| 76 <option value="gpc">Gaussian Process Classifier</option> | |
| 77 <option value="mlp">MLP Classifier</option> | |
| 78 <option value="ridge">Ridge Classifier</option> | |
| 79 <option value="rf">Random Forest Classifier</option> | |
| 80 <option value="qda">Quadratic Discriminant Analysis</option> | |
| 81 <option value="ada">Ada Boost Classifier</option> | |
| 82 <option value="gbc">Gradient Boosting Classifier</option> | |
| 83 <option value="lda">Linear Discriminant Analysis</option> | |
| 84 <option value="et">Extra Trees Classifier</option> | |
| 85 <option value="xgboost">Extreme Gradient Boosting</option> | |
| 86 <option value="lightgbm">Light Gradient Boosting Machine</option> | |
| 87 <option value="catboost">CatBoost Classifier</option> | |
| 88 </param> | |
| 89 </when> | |
| 90 <when value="regression"> | |
| 91 <param name="regression_models" type="select" multiple="true" label="Only Select Regression Models if you don't want to compare all models"> | |
| 92 <option value="lr">Linear Regression</option> | |
| 93 <option value="lasso">Lasso Regression</option> | |
| 94 <option value="ridge">Ridge Regression</option> | |
| 95 <option value="en">Elastic Net</option> | |
| 96 <option value="lar">Least Angle Regression</option> | |
| 97 <option value="llar">Lasso Least Angle Regression</option> | |
| 98 <option value="omp">Orthogonal Matching Pursuit</option> | |
| 99 <option value="br">Bayesian Ridge</option> | |
| 100 <option value="ard">Automatic Relevance Determination</option> | |
| 101 <option value="par">Passive Aggressive Regressor</option> | |
| 102 <option value="ransac">Random Sample Consensus</option> | |
| 103 <option value="tr">TheilSen Regressor</option> | |
| 104 <option value="huber">Huber Regressor</option> | |
| 105 <option value="kr">Kernel Ridge</option> | |
| 106 <option value="svm">Support Vector Regression</option> | |
| 107 <option value="knn">K Neighbors Regressor</option> | |
| 108 <option value="dt">Decision Tree Regressor</option> | |
| 109 <option value="rf">Random Forest Regressor</option> | |
| 110 <option value="et">Extra Trees Regressor</option> | |
| 111 <option value="ada">AdaBoost Regressor</option> | |
| 112 <option value="gbr">Gradient Boosting Regressor</option> | |
| 113 <option value="mlp">MLP Regressor</option> | |
| 114 <option value="xgboost">Extreme Gradient Boosting</option> | |
| 115 <option value="lightgbm">Light Gradient Boosting Machine</option> | |
| 116 <option value="catboost">CatBoost Regressor</option> | |
| 117 </param> | |
| 118 </when> | |
| 119 </conditional> | |
| 120 <param name="random_seed" type="integer" value="42" label="Random Seed" help="Random seed for reproducibility." /> | |
| 121 <conditional name="advanced_settings"> | |
| 122 <param name="customize_defaults" type="select" label="Customize Default Settings?" help="Select yes if you want to customize the default settings of the experiment."> | |
| 123 <option value="false" selected="true">No</option> | |
| 124 <option value="true">Yes</option> | |
| 125 </param> | |
| 126 <when value="true"> | |
| 127 <param name="train_size" type="float" value="0.7" min="0.1" max="0.9" label="Train Size" help="Proportion of the dataset to include in the train split." /> | |
| 128 <param name="normalize" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Normalize Data" help="Whether to normalize data before training." /> | |
| 129 <param name="feature_selection" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Feature Selection" help="Whether to perform feature selection." /> | |
| 130 <conditional name="cross_validation"> | |
| 131 <param name="enable_cross_validation" type="select" label="Enable Cross Validation?" help="Select whether to enable cross-validation. Default: Yes" > | |
| 132 <option value="false" >No</option> | |
| 133 <option value="true" selected="true">Yes</option> | |
| 134 </param> | |
| 135 <when value="true"> | |
| 136 <param name="cross_validation_folds" type="integer" value="10" min="2" max="20" label="Cross Validation Folds" help="Number of folds to use for cross-validation. Default: 10" /> | |
| 137 </when> | |
| 138 <when value="false"> | |
| 139 <!-- No additional parameters to show if the user selects 'No' --> | |
| 140 </when> | |
| 141 </conditional> | |
| 142 <param name="remove_outliers" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Remove Outliers" help="Whether to remove outliers from the dataset before training. Default: False" /> | |
| 143 <param name="remove_multicollinearity" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Remove Multicollinearity" help="Whether to remove multicollinear features before training. Default: False" /> | |
| 144 <param name="polynomial_features" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Polynomial Features" help="Whether to create polynomial features before training. Default: False" /> | |
| 145 <param name="fix_imbalance" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Fix Imbalance" help="ONLY for classfication! Whether to use SMOTE or similar methods to fix imbalance in the dataset. Default: False" /> | |
| 146 </when> | |
| 147 <when value="false"> | |
| 148 <!-- No additional parameters to show if the user selects 'No' --> | |
| 149 </when> | |
| 150 </conditional> | |
| 151 </inputs> | |
| 152 <outputs> | |
| 153 <data name="model" format="h5" from_work_dir="pycaret_model.h5" label="${tool.name} best model on ${on_string}" /> | |
| 154 <data name="comparison_result" format="html" from_work_dir="comparison_result.html" label="${tool.name} Comparison result on ${on_string}"/> | |
| 155 <data name="best_model_csv" format="csv" from_work_dir="best_model.csv" label="${tool.name} The prams of the best model on ${on_string}" hidden="true" /> | |
| 156 </outputs> | |
| 157 <tests> | |
| 158 <test> | |
| 159 <param name="input_file" value="pcr.tsv"/> | |
| 160 <param name="target_feature" value="11"/> | |
| 161 <param name="model_type" value="classification"/> | |
| 162 <param name="random_seed" value="42"/> | |
| 163 <param name="customize_defaults" value="true"/> | |
| 164 <param name="train_size" value="0.8"/> | |
| 165 <param name="normalize" value="true"/> | |
| 166 <param name="feature_selection" value="true"/> | |
| 167 <param name="enable_cross_validation" value="true"/> | |
| 168 <param name="cross_validation_folds" value="5"/> | |
| 169 <param name="remove_outliers" value="true"/> | |
| 170 <param name="remove_multicollinearity" value="true"/> | |
| 171 <output name="model" file="expected_model_classification_customized.h5" compare="sim_size"/> | |
| 172 <output name="comparison_result" file="expected_comparison_result_classification_customized.html" compare="sim_size" /> | |
| 173 <output name="best_model_csv" value="expected_best_model_classification_customized.csv" /> | |
| 174 </test> | |
| 175 <test> | |
| 176 <param name="input_file" value="pcr.tsv"/> | |
| 177 <param name="target_feature" value="11"/> | |
| 178 <param name="model_type" value="classification"/> | |
| 179 <param name="random_seed" value="42"/> | |
| 180 <output name="model" file="expected_model_classification.h5" compare="sim_size"/> | |
| 181 <output name="comparison_result" file="expected_comparison_result_classification.html" compare="sim_size" /> | |
| 182 <output name="best_model_csv" value="expected_best_model_classification.csv" /> | |
| 183 </test> | |
| 184 <test> | |
| 185 <param name="input_file" value="auto-mpg.tsv"/> | |
| 186 <param name="target_feature" value="1"/> | |
| 187 <param name="model_type" value="regression"/> | |
| 188 <param name="random_seed" value="42"/> | |
| 189 <output name="model" file="expected_model_regression.h5" compare="sim_size" /> | |
| 190 <output name="comparison_result" file="expected_comparison_result_regression.html" compare="sim_size" /> | |
| 191 <output name="best_model_csv" value="expected_best_model_regression.csv" /> | |
| 192 </test> | |
| 193 </tests> | |
| 194 <help> | |
| 195 This tool uses PyCaret to train and evaluate machine learning models. | |
| 196 It compares different models on a dataset and provides the best model based on the performance metrics. | |
| 197 | |
| 198 **Outputs** | |
| 199 | |
| 200 - **Model**: The best model trained on the dataset in h5 format. | |
| 201 | |
| 202 | |
| 203 - **Comparison Result**: The comparison result of different models in html format. | |
| 204 It contains the performance metrics of different models, plots of the best model | |
| 205 on the testing set (or part of the training set if a separate test set is not uploaded), and feature analysis plots. | |
| 206 | |
| 207 </help> | |
| 208 <expand macro="macro_citations" /> | |
| 209 </tool> |
