comparison chopin2.xml @ 0:89fb0de13457 draft default tip

Uploading wrapper for chopin2
author fabio
date Thu, 19 May 2022 22:13:24 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:89fb0de13457
1 <?xml version="1.0"?>
2 <tool name="chopin2" id="chopin2" version="1.0.6">
3 <description>Domain-Agnostic Supervised Learning with Hyperdimensional Computing</description>
4
5 <!-- Tool developer -->
6 <creator>
7 <person givenName="Fabio" familyName="Cumbo"
8 url="https://fabio-cumbo.github.io/"
9 email="fabio.cumbo@gmail.com" />
10 </creator>
11
12 <!-- Define dependencies -->
13 <requirements>
14 <requirement type="package" version="1.0.6">chopin2</requirement>
15 </requirements>
16
17 <command detect_errors="exit_code">
18 <![CDATA[
19 ln -s ${dataset};
20
21 chopin2
22
23 --dataset `basename $dataset`
24 --dimensionality ${dimensionality}
25 --levels ${levels}
26 --retrain ${retrain}
27 --stop
28 --crossv_k ${folds}
29
30 #if $feature_selection.enable_fs == "true":
31 --select_features
32 --group_min ${feature_selection.group_min}
33 --accuracy_threshold ${feature_selection.accuracy_threshold}
34 --accuracy_uncertainty_perc ${feature_selection.accuracy_uncertainty_perc}
35 #end if
36
37 --dump
38 --cleanup
39 --nproc "\${GALAXY_SLOTS:-4}"
40 --verbose
41 ]]>
42 </command>
43
44 <inputs>
45 <!-- Select a dataset -->
46 <param name="dataset" type="data" format="csv"
47 label="Select a dataset"
48 help="Input dataset with features on columns and observations on rows. Last column must contain classes." />
49
50 <!-- Vector dimensionality -->
51 <param name="dimensionality" type="integer" value="10000" min="100"
52 label="Vectors dimensionality"
53 help="Size of hypervectors is usually 10,000 in vector-symbolic architectures. However, lower values could work
54 with small datasets in terms of number of features and observations. Please note that you may require
55 to increase this number in case of datasets with a huge number of features." />
56
57 <!-- Number of levels -->
58 <param name="levels" type="integer" value="1000" min="2"
59 label="Levels"
60 help="Number of level vectors. You may consider to look at the distribution of your data in order to choose
61 the most appropriate value." />
62
63 <!-- Number of retraining iterations -->
64 <param name="retrain" type="integer" value="0" min="0"
65 label="Model retraining iterations"
66 help="Maximum number of retraining iterations. Class hypervectors are retrained to minimize errors caused by noise." />
67
68 <!-- Number of folds for cross-validation -->
69 <param name="folds" type="integer" value="2" min="2"
70 label="Number of folds for cross-validation"
71 help="This tool makes use of k-folds cross-validation to evaluate the accuracy of the hyperdimensional model.
72 Make sure to choose a good number of folds for validating the classification model. Please note that higher number
73 of folds could significantly increase the running time." />
74
75 <!-- Allow to discard genomes according to their completeness and contamination stats -->
76 <conditional name="feature_selection">
77 <!-- Enable feature selection -->
78 <param name="enable_fs" type="boolean" checked="false" truevalue="true" falsevalue="false"
79 label="Enable feature selection"
80 help="If selected, this will extract a set of features with the better discriminative power among classes.
81 The feature selection algorithm is defined as a backward variable selection method." />
82
83 <when value="true">
84 <!-- Minimum group size -->
85 <param name="group_min" type="integer" value="1" min="1"
86 label="Minimum number of selected features"
87 help="Tool will stop removing features if its number will reach this value." />
88
89 <!-- Accuracy threshold -->
90 <param name="accuracy_threshold" type="float" value="60.0" min="0.0" max="100.0"
91 label="Accuracy threshold"
92 help="Stop the execution if the best accuracy reached for a group of features is lower than this value." />
93
94 <!-- Accuracy uncertainty percentage -->
95 <param name="accuracy_uncertainty_perc" type="float" value="5.0" min="0.0" max="100.0"
96 label="Accuracy uncertainty percentage"
97 help="Consider non optimal solutions if model accuracy is greater than the best accuracy minus this percentage." />
98 </when>
99 </conditional>
100 </inputs>
101
102 <outputs>
103 <!-- Output summary file -->
104 <data format="txt" name="summary" label="${tool.name} on ${on_string}: Summary" from_work_dir="summary.txt" />
105 <!-- Output file with selected features -->
106 <data format="txt" name="selection" label="${tool.name} on ${on_string}: Selection" from_work_dir="selection.txt">
107 <filter>feature_selection["enable_fs"]</filter>
108 </data>
109 </outputs>
110
111 <help><![CDATA[
112 This is a domain-agnostic supervised learning tool that exploit the Hyperdimensional Computing paradigm to encode and compare data.
113
114 -----
115
116 **Input file**
117
118 The input is a CSV file representing a matrix with the observations on the rows and features on columns.
119 The last column contains classes associated to the observations.
120
121 The tool doesn't support datasets with missing values. It also supports numerical datasets only.
122
123 Please, refer to the example below to know how to structure your dataset:
124
125 +------------------+--------------+--------------+--------------+------+--------------+-------------+
126 | | **Feature1** | **Feature2** | **Feature3** | ... | **FeatureM** | **Class** |
127 +==================+==============+==============+==============+======+==============+=============+
128 | **Observation1** | 0.68 | 1.97 | 0.02 | ... | 0.01 | Case |
129 +------------------+--------------+--------------+--------------+------+--------------+-------------+
130 | **Observation2** | 0.52 | 0.60 | 1.16 | ... | 0.07 | Case |
131 +------------------+--------------+--------------+--------------+------+--------------+-------------+
132 | **Observation2** | 0.56 | 0.01 | 0.50 | ... | 1.16 | Control |
133 +------------------+--------------+--------------+--------------+------+--------------+-------------+
134 | ... | ... | ... | ... | ... | ... | ... |
135 +------------------+--------------+--------------+--------------+------+--------------+-------------+
136 | **ObservationN** | 0.05 | 1.86 | 0.03 | ... | 2.83 | Control |
137 +------------------+--------------+--------------+--------------+------+--------------+-------------+
138
139 -----
140
141 **Output**
142
143 The output is a summary table with information about the accuracy of the hyperdimensional model and
144 the number of retraining iterations that were required to achieve that level of accuracy.
145
146 In case the feature selection is enabled, it also returns a file with the list of selected features
147 that come out from the hyperdimensional model that reached the best accuracy.
148
149 -----
150
151 .. class:: infomark
152
153 **Notes**
154
155 Please visit the official GitHub repository_ for other information about `chopin2`.
156
157 .. _repository: https://github.com/fabio-cumbo/chopin2
158 ]]></help>
159
160 <citations>
161 <citation type="doi">10.3390/a13090233</citation>
162 </citations>
163 </tool>