Mercurial > repos > fabio > chopin2
changeset 0:89fb0de13457 draft default tip
Uploading wrapper for chopin2
| author | fabio | 
|---|---|
| date | Thu, 19 May 2022 22:13:24 +0000 | 
| parents | |
| children | |
| files | chopin2.xml | 
| diffstat | 1 files changed, 163 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chopin2.xml Thu May 19 22:13:24 2022 +0000 @@ -0,0 +1,163 @@ +<?xml version="1.0"?> +<tool name="chopin2" id="chopin2" version="1.0.6"> + <description>Domain-Agnostic Supervised Learning with Hyperdimensional Computing</description> + + <!-- Tool developer --> + <creator> + <person givenName="Fabio" familyName="Cumbo" + url="https://fabio-cumbo.github.io/" + email="fabio.cumbo@gmail.com" /> + </creator> + + <!-- Define dependencies --> + <requirements> + <requirement type="package" version="1.0.6">chopin2</requirement> + </requirements> + + <command detect_errors="exit_code"> +<![CDATA[ + ln -s ${dataset}; + + chopin2 + + --dataset `basename $dataset` + --dimensionality ${dimensionality} + --levels ${levels} + --retrain ${retrain} + --stop + --crossv_k ${folds} + + #if $feature_selection.enable_fs == "true": + --select_features + --group_min ${feature_selection.group_min} + --accuracy_threshold ${feature_selection.accuracy_threshold} + --accuracy_uncertainty_perc ${feature_selection.accuracy_uncertainty_perc} + #end if + + --dump + --cleanup + --nproc "\${GALAXY_SLOTS:-4}" + --verbose +]]> + </command> + + <inputs> + <!-- Select a dataset --> + <param name="dataset" type="data" format="csv" + label="Select a dataset" + help="Input dataset with features on columns and observations on rows. Last column must contain classes." /> + + <!-- Vector dimensionality --> + <param name="dimensionality" type="integer" value="10000" min="100" + label="Vectors dimensionality" + help="Size of hypervectors is usually 10,000 in vector-symbolic architectures. However, lower values could work + with small datasets in terms of number of features and observations. Please note that you may require + to increase this number in case of datasets with a huge number of features." /> + + <!-- Number of levels --> + <param name="levels" type="integer" value="1000" min="2" + label="Levels" + help="Number of level vectors. You may consider to look at the distribution of your data in order to choose + the most appropriate value." /> + + <!-- Number of retraining iterations --> + <param name="retrain" type="integer" value="0" min="0" + label="Model retraining iterations" + help="Maximum number of retraining iterations. Class hypervectors are retrained to minimize errors caused by noise." /> + + <!-- Number of folds for cross-validation --> + <param name="folds" type="integer" value="2" min="2" + label="Number of folds for cross-validation" + help="This tool makes use of k-folds cross-validation to evaluate the accuracy of the hyperdimensional model. + Make sure to choose a good number of folds for validating the classification model. Please note that higher number + of folds could significantly increase the running time." /> + + <!-- Allow to discard genomes according to their completeness and contamination stats --> + <conditional name="feature_selection"> + <!-- Enable feature selection --> + <param name="enable_fs" type="boolean" checked="false" truevalue="true" falsevalue="false" + label="Enable feature selection" + help="If selected, this will extract a set of features with the better discriminative power among classes. + The feature selection algorithm is defined as a backward variable selection method." /> + + <when value="true"> + <!-- Minimum group size --> + <param name="group_min" type="integer" value="1" min="1" + label="Minimum number of selected features" + help="Tool will stop removing features if its number will reach this value." /> + + <!-- Accuracy threshold --> + <param name="accuracy_threshold" type="float" value="60.0" min="0.0" max="100.0" + label="Accuracy threshold" + help="Stop the execution if the best accuracy reached for a group of features is lower than this value." /> + + <!-- Accuracy uncertainty percentage --> + <param name="accuracy_uncertainty_perc" type="float" value="5.0" min="0.0" max="100.0" + label="Accuracy uncertainty percentage" + help="Consider non optimal solutions if model accuracy is greater than the best accuracy minus this percentage." /> + </when> + </conditional> + </inputs> + + <outputs> + <!-- Output summary file --> + <data format="txt" name="summary" label="${tool.name} on ${on_string}: Summary" from_work_dir="summary.txt" /> + <!-- Output file with selected features --> + <data format="txt" name="selection" label="${tool.name} on ${on_string}: Selection" from_work_dir="selection.txt"> + <filter>feature_selection["enable_fs"]</filter> + </data> + </outputs> + + <help><![CDATA[ +This is a domain-agnostic supervised learning tool that exploit the Hyperdimensional Computing paradigm to encode and compare data. + +----- + +**Input file** + +The input is a CSV file representing a matrix with the observations on the rows and features on columns. +The last column contains classes associated to the observations. + +The tool doesn't support datasets with missing values. It also supports numerical datasets only. + +Please, refer to the example below to know how to structure your dataset: + ++------------------+--------------+--------------+--------------+------+--------------+-------------+ +| | **Feature1** | **Feature2** | **Feature3** | ... | **FeatureM** | **Class** | ++==================+==============+==============+==============+======+==============+=============+ +| **Observation1** | 0.68 | 1.97 | 0.02 | ... | 0.01 | Case | ++------------------+--------------+--------------+--------------+------+--------------+-------------+ +| **Observation2** | 0.52 | 0.60 | 1.16 | ... | 0.07 | Case | ++------------------+--------------+--------------+--------------+------+--------------+-------------+ +| **Observation2** | 0.56 | 0.01 | 0.50 | ... | 1.16 | Control | ++------------------+--------------+--------------+--------------+------+--------------+-------------+ +| ... | ... | ... | ... | ... | ... | ... | ++------------------+--------------+--------------+--------------+------+--------------+-------------+ +| **ObservationN** | 0.05 | 1.86 | 0.03 | ... | 2.83 | Control | ++------------------+--------------+--------------+--------------+------+--------------+-------------+ + +----- + +**Output** + +The output is a summary table with information about the accuracy of the hyperdimensional model and +the number of retraining iterations that were required to achieve that level of accuracy. + +In case the feature selection is enabled, it also returns a file with the list of selected features +that come out from the hyperdimensional model that reached the best accuracy. + +----- + +.. class:: infomark + +**Notes** + +Please visit the official GitHub repository_ for other information about `chopin2`. + +.. _repository: https://github.com/fabio-cumbo/chopin2 + ]]></help> + + <citations> + <citation type="doi">10.3390/a13090233</citation> + </citations> +</tool> \ No newline at end of file
