Mercurial > repos > iuc > deepmicro
changeset 2:2d20b3a1babd draft default tip
planemo upload for repository https://github.com/paulzierep/DeepMicro commit 574cb8c241e18a15f006bf307235c7dd23f07c69
author | iuc |
---|---|
date | Tue, 23 Jul 2024 15:56:32 +0000 |
parents | c58c1a99578b |
children | |
files | deepmicro.xml macros.xml |
diffstat | 2 files changed, 81 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/deepmicro.xml Sat Jun 10 21:59:04 2023 +0000 +++ b/deepmicro.xml Tue Jul 23 15:56:32 2024 +0000 @@ -18,9 +18,20 @@ #for $params in $mode.parameter_set: #if $params.rl_type.rl_type_choice == "--pca" or $params.rl_type.rl_type_choice == "--rp": - DM.py -r 1 -cd features.csv '$params.rl_type.rl_type_choice' --save_rep --no_clf -t \${GALAXY_SLOTS:-8} && + DM.py -r 1 + -cd features.csv + '$params.rl_type.rl_type_choice' + --save_rep + --no_clf + -t \${GALAXY_SLOTS:-8} && #else: - DM.py -r 1 -cd features.csv '$params.rl_type.rl_type_choice' -dm '$params.rl_type.dm' --save_rep --no_clf -t \${GALAXY_SLOTS:-8} && + DM.py -r 1 + -cd features.csv + '$params.rl_type.rl_type_choice' + -dm '$params.rl_type.dm' + --save_rep + --no_clf + -t \${GALAXY_SLOTS:-8} && #end if #end for @@ -30,14 +41,29 @@ #for $params in $mode.parameter_set: + ## general args + DM.py + -r '$mode.repeat' + -cd features.csv + -cl labels.csv + --save_rep + -m '$params.rl_type.classifier' + --scoring '$mode.scoring' + -f '$mode.folds' + -t \${GALAXY_SLOTS:-8} + ## only train classifier without encoding #if $params.rl_type.rl_type_choice == "no_rl": - DM.py -r 1 -cd features.csv -cl labels.csv --save_rep -m '$params.rl_type.classifier' -t \${GALAXY_SLOTS:-8} && + && echo "Only train Clf - no encoding!" + ## add rl type #elif $params.rl_type.rl_type_choice == "--pca" or $params.rl_type.rl_type_choice == "--rp": - DM.py -r 1 -cd features.csv -cl labels.csv '$params.rl_type.rl_type_choice' --save_rep -m '$params.rl_type.classifier' -t \${GALAXY_SLOTS:-8} && + '$params.rl_type.rl_type_choice' + ## add rl type and dm options #else: - DM.py -r 1 -cd features.csv -cl labels.csv '$params.rl_type.rl_type_choice' -dm '$params.rl_type.dm' --save_rep -m '$params.rl_type.classifier' -t \${GALAXY_SLOTS:-8} && + '$params.rl_type.rl_type_choice' + -dm '$params.rl_type.dm' #end if + && #end for #end if @@ -59,9 +85,9 @@ <param name="rl_type_choice" type="select" label="Representation learning type" help="The type of representation learning" > <option value="--pca">PCA</option> <option value="--rp">Random Projection</option> - <option value="--ae">Autoencoder or Deep Autoencoder</option> - <option value="--vae">Variational Autoencoder</option> - <option value="--cae">Convolutional Autoencoder</option> + <option value="--ae">Shallow Autoencoder or Deep Autoencoder (SAE, DAE)</option> + <option value="--vae">Variational Autoencoder (VAE)</option> + <option value="--cae">Convolutional Autoencoder (CAE)</option> </param> <when value="--pca"/> <when value="--rp"/> @@ -79,14 +105,23 @@ </when> <when value="e_and_c"> <param argument="--class_labels" type="data" format="tabular" label="Class labels" help="Dataset containing the class labels corresponding to the features"/> + <param name="scoring" type="select" label="Scoring function for the classifiere" help="The classifiere will be optimized for this scoring function." > + <option value="roc_auc">ROC AUC</option> + <option value="accuracy">Accuracy</option> + <option value="f1">F1 Score</option> + <option value="recall">Recall</option> + <option value="precision">Precision</option> + </param> + <param name="folds" type="integer" value="5" label="Cross-validation folds" min="2" max="10" help="The number of folds for cross-validation in the tranining set"/> + <param name="repeat" type="integer" value="1" label="Repeat the experiment with different seed" min="1" max="5" help="Repeat the experiment with different seeds. Leads to a different train / test split each time."/> <repeat name="parameter_set" title="Parameter Set"> <conditional name="rl_type"> <param name="rl_type_choice" type="select" label="Representation learning type" help="The type of representation learning. `Train on input` trains the classifier on the input features without representation learning" > <option value="--pca">PCA</option> <option value="--rp">Random Projection</option> - <option value="--ae">Autoencoder or Deep Autoencoder</option> - <option value="--vae">Variational Autoencoder</option> - <option value="--cae">Convolutional Autoencoder</option> + <option value="--ae">Shallow Autoencoder or Deep Autoencoder (SAE, DAE)</option> + <option value="--vae">Variational Autoencoder (VAE)</option> + <option value="--cae">Convolutional Autoencoder (CAE)</option> <option value="no_rl">Train on input</option> </param> <when value="no_rl"> @@ -126,13 +161,19 @@ <filter>mode["mode_type"] == "only_encoding"</filter> <discover_datasets directory="results" pattern="(?P<designation>.*)_rep\.csv" format="tabular" visible="false" /> </collection> + <collection name="model" type="list" label="Keras Models"> + <!-- the encoded features generated by the tool are only for the training set, this is not very useful, therefore omitting + todo change tool do export features complete dataset also when classification is performed --> + <filter>mode["mode_type"] == "only_encoding"</filter> + <discover_datasets directory="." pattern="(?P<designation>.*).h5" format="data" visible="false" /> + </collection> </outputs> <tests> <!-- only encoding --> <!-- test one parameter sets --> - <test expect_num_outputs="1"> + <test expect_num_outputs="2"> <param name="mode_type" value="only_encoding" /> <param name="features" value="UserDataExample.csv" /> <param name="rl_type_choice" value="--ae" /> @@ -148,7 +189,7 @@ </output_collection> </test> - <test expect_num_outputs="1"> + <test expect_num_outputs="2"> <param name="mode_type" value="only_encoding" /> <param name="features" value="UserDataExample.csv" /> <param name="rl_type_choice" value="--pca" /> @@ -162,8 +203,27 @@ </output_collection> </test> + <!-- only encoding --> + + <test expect_num_outputs="2"> + <param name="mode_type" value="only_encoding" /> + <param name="features" value="UserDataExample.csv" /> + <param name="rl_type_choice" value="--ae" /> + <param name="dm" value="40" /> + + <output_collection name="encoded_features" type="list"> + <!-- output is non determinisitc --> + <element name="AE[40]_features" ftype="tabular" > + <assert_contents> + <has_n_lines n="20"/> + <!-- <has_n_columns n="40" sep="," /> --> + </assert_contents> + </element> + </output_collection> + </test> + <!-- test multiple parameter sets --> - <test expect_num_outputs="1"> + <test expect_num_outputs="2"> <param name="features" value="UserDataExample.csv" /> <conditional name="mode"> <param name="mode_type" value="only_encoding" /> @@ -202,13 +262,16 @@ <!-- encoding and clf --> <!-- test one parameter set --> - + <!-- test additional parameters scoring / folds and repeat --> <test expect_num_outputs="1"> <param name="features" value="UserDataExample.csv" /> <param name="mode_type" value="e_and_c" /> <param name="class_labels" value="UserLabelExample.csv" /> <param name="rl_type_choice" value="--vae" /> <param name="dm" value="40" /> + <param name="scoring" value="roc_auc" /> + <param name="folds" value="2" /> + <param name="repeat" value="2" /> <param name="classifier" value="rf" /> <output ftype="tabular" name="results" > <assert_contents> @@ -218,6 +281,8 @@ </test> + + <!-- no rl --> <test expect_num_outputs="1"> <param name="features" value="UserDataExample.csv" /> <param name="mode_type" value="e_and_c" />
--- a/macros.xml Sat Jun 10 21:59:04 2023 +0000 +++ b/macros.xml Tue Jul 23 15:56:32 2024 +0000 @@ -1,7 +1,7 @@ <?xml version="1.0"?> <macros> <token name="@TOOL_VERSION@">1.4</token> - <token name="@VERSION_SUFFIX@">1</token> + <token name="@VERSION_SUFFIX@">2</token> <token name="@PROFILE@">22.05</token> <xml name="biotools"> <xrefs>