comparison pycaret_train.xml @ 4:4aa511539199 draft default tip

planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit cf47efb521b91a9cb44ae5c5ade860627f9b9030
author goeckslab
date Tue, 03 Jun 2025 19:31:16 +0000
parents 009b18a75dc3
children
comparison
equal deleted inserted replaced
3:02f7746e7772 4:4aa511539199
1 <tool id="pycaret_compare" name="PyCaret Model Comparison" version="@VERSION@" profile="@PROFILE@"> 1 <tool id="pycaret_compare" name="Tabular Learner" version="@VERSION@" profile="@PROFILE@">
2 <description>compares different machine learning models on a dataset using PyCaret. Do feature analyses using Random Forest and LightGBM. </description> 2 <description>applies and evaluates multiple machine learning models on a tabular dataset</description>
3 <macros> 3 <macros>
4 <import>pycaret_macros.xml</import> 4 <import>pycaret_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="python_requirements" /> 6 <expand macro="python_requirements" />
7 <command> 7 <command>
51 #end if 51 #end if
52 --model_type '$model_type' 52 --model_type '$model_type'
53 ]]> 53 ]]>
54 </command> 54 </command>
55 <inputs> 55 <inputs>
56 <param name="input_file" type="data" format="csv,tabular" label="Train Dataset (CSV or TSV)" /> 56 <param name="input_file" type="data" format="csv,tabular" label="Tabular Input Dataset" />
57 <param name="test_file" type="data" format="csv,tabular" optional="true" label="Test Dataset (CSV or TSV)" 57 <param name="test_file" type="data" format="csv,tabular" optional="true" label="Tabular Test Dataset"
58 help="If a test set is not provided, 58 help="If a test dataset is not provided,
59 the selected training set will be split into training, validation, and test sets. 59 the input dataset will be split into training, validation, and test sets.
60 If a test set is provided, the training set will only be split into training and validation sets. 60 If a test set is provided, the input dataset will be split into training and validation sets.
61 BTW, cross-validation is always applied by default." /> 61 Cross-validation is applied by default during training." />
62 <param name="target_feature" multiple="false" type="data_column" use_header_names="true" data_ref="input_file" label="Select the target column:" /> 62 <param name="target_feature" multiple="false" type="data_column" use_header_names="true" data_ref="input_file" label="Select the target column:" />
63 <conditional name="model_selection"> 63 <conditional name="model_selection">
64 <param name="model_type" type="select" label="Task"> 64 <param name="model_type" type="select" label="Task">
65 <option value="classification">classification</option> 65 <option value="classification">classification</option>
66 <option value="regression">regression</option> 66 <option value="regression">regression</option>
122 <param name="customize_defaults" type="select" label="Customize Default Settings?" help="Select yes if you want to customize the default settings of the experiment."> 122 <param name="customize_defaults" type="select" label="Customize Default Settings?" help="Select yes if you want to customize the default settings of the experiment.">
123 <option value="false" selected="true">No</option> 123 <option value="false" selected="true">No</option>
124 <option value="true">Yes</option> 124 <option value="true">Yes</option>
125 </param> 125 </param>
126 <when value="true"> 126 <when value="true">
127 <param name="train_size" type="float" value="0.7" min="0.1" max="0.9" label="Train Size" help="Proportion of the dataset to include in the train split." /> 127 <param name="train_size" type="float" value="0.7" min="0.1" max="0.9" label="Train Size" help="Proportion of the input dataset to include in the train split." />
128 <param name="normalize" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Normalize Data" help="Whether to normalize data before training." /> 128 <param name="normalize" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Normalize Data" help="Whether to normalize data before training." />
129 <param name="feature_selection" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Feature Selection" help="Whether to perform feature selection." /> 129 <param name="feature_selection" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Feature Selection" help="Whether to perform feature selection." />
130 <conditional name="cross_validation"> 130 <conditional name="cross_validation">
131 <param name="enable_cross_validation" type="select" label="Enable Cross Validation?" help="Select whether to enable cross-validation. Default: Yes" > 131 <param name="enable_cross_validation" type="select" label="Enable Cross Validation?" help="Select whether to enable cross-validation." >
132 <option value="false" >No</option> 132 <option value="false" >No</option>
133 <option value="true" selected="true">Yes</option> 133 <option value="true" selected="true">Yes</option>
134 </param> 134 </param>
135 <when value="true"> 135 <when value="true">
136 <param name="cross_validation_folds" type="integer" value="10" min="2" max="20" label="Cross Validation Folds" help="Number of folds to use for cross-validation. Default: 10" /> 136 <param name="cross_validation_folds" type="integer" value="10" min="2" max="20" label="Cross Validation Folds" help="Number of folds to use for cross-validation." />
137 </when> 137 </when>
138 <when value="false"> 138 <when value="false">
139 <!-- No additional parameters to show if the user selects 'No' --> 139 <!-- No additional parameters to show if the user selects 'No' -->
140 </when> 140 </when>
141 </conditional> 141 </conditional>
142 <param name="remove_outliers" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Remove Outliers" help="Whether to remove outliers from the dataset before training. Default: False" /> 142 <param name="remove_outliers" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Remove Outliers" help="Whether to remove outliers from the input dataset before training." />
143 <param name="remove_multicollinearity" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Remove Multicollinearity" help="Whether to remove multicollinear features before training. Default: False" /> 143 <param name="remove_multicollinearity" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Remove Multicollinearity" help="Whether to remove multicollinear features before training." />
144 <param name="polynomial_features" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Polynomial Features" help="Whether to create polynomial features before training. Default: False" /> 144 <param name="polynomial_features" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Polynomial Features" help="Whether to create polynomial features before training." />
145 <param name="fix_imbalance" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Fix Imbalance" help="ONLY for classfication! Whether to use SMOTE or similar methods to fix imbalance in the dataset. Default: False" /> 145 <param name="fix_imbalance" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Fix Imbalance" help="ONLY for classfication! Whether to use SMOTE or similar methods to fix imbalance in the input dataset." />
146 </when> 146 </when>
147 <when value="false"> 147 <when value="false">
148 <!-- No additional parameters to show if the user selects 'No' --> 148 <!-- No additional parameters to show if the user selects 'No' -->
149 </when> 149 </when>
150 </conditional> 150 </conditional>
151 </inputs> 151 </inputs>
152 <outputs> 152 <outputs>
153 <data name="comparison_result" format="html" from_work_dir="comparison_result.html" label="${tool.name} analysis report on ${on_string}"/>
153 <data name="model" format="h5" from_work_dir="pycaret_model.h5" label="${tool.name} best model on ${on_string}" /> 154 <data name="model" format="h5" from_work_dir="pycaret_model.h5" label="${tool.name} best model on ${on_string}" />
154 <data name="comparison_result" format="html" from_work_dir="comparison_result.html" label="${tool.name} Comparison result on ${on_string}"/> 155 <data name="best_model_csv" format="csv" from_work_dir="best_model.csv" label="${tool.name} The parameters of the best model on ${on_string}" hidden="true" />
155 <data name="best_model_csv" format="csv" from_work_dir="best_model.csv" label="${tool.name} The prams of the best model on ${on_string}" hidden="true" />
156 </outputs> 156 </outputs>
157 <tests> 157 <tests>
158 <test> 158 <test>
159 <param name="input_file" value="pcr.tsv"/> 159 <param name="input_file" value="pcr.tsv"/>
160 <param name="target_feature" value="11"/> 160 <param name="target_feature" value="11"/>