33
|
1 <tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.7">
|
29
|
2 <description></description>
|
16
|
3 <macros>
|
|
4 <import>marea_macros.xml</import>
|
|
5 </macros>
|
|
6 <requirements>
|
17
|
7 <requirement type="package" version="0.25.1">pandas</requirement>
|
16
|
8 <requirement type="package" version="1.1.0">scipy</requirement>
|
|
9 <requirement type="package" version="0.10.1">cobra</requirement>
|
|
10 <requirement type="package" version="0.21.3">scikit-learn</requirement>
|
|
11 <requirement type="package" version="2.2.2">matplotlib</requirement>
|
|
12 <requirement type="package" version="1.17">numpy</requirement>
|
|
13 </requirements>
|
|
14 <command detect_errors="exit_code">
|
|
15 <![CDATA[
|
|
16 python $__tool_directory__/marea_cluster.py
|
|
17 --input $input
|
|
18 --tool_dir $__tool_directory__
|
|
19 --out_log $log
|
28
|
20 --best_cluster $best_cluster
|
24
|
21 --cluster_type ${data.clust_type}
|
16
|
22 #if $data.clust_type == 'kmeans':
|
|
23 --k_min ${data.k_min}
|
|
24 --k_max ${data.k_max}
|
|
25 --elbow ${data.elbow}
|
|
26 --silhouette ${data.silhouette}
|
|
27 #end if
|
|
28 #if $data.clust_type == 'dbscan':
|
|
29 #if $data.dbscan_advanced.advanced == 'true'
|
|
30 --eps ${data.dbscan_advanced.eps}
|
|
31 --min_samples ${data.dbscan_advanced.min_samples}
|
|
32 #end if
|
|
33 #end if
|
|
34 #if $data.clust_type == 'hierarchy':
|
|
35 --k_min ${data.k_min}
|
|
36 --k_max ${data.k_max}
|
|
37 #end if
|
|
38 ]]>
|
|
39 </command>
|
|
40 <inputs>
|
31
|
41 <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
|
16
|
42
|
|
43 <conditional name="data">
|
|
44 <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
|
|
45 <option value="kmeans" selected="true">KMeans</option>
|
|
46 <option value="dbscan">DBSCAN</option>
|
|
47 <option value="hierarchy">Agglomerative Hierarchical</option>
|
|
48 </param>
|
|
49 <when value="kmeans">
|
31
|
50 <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
|
|
51 <param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" />
|
16
|
52 <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
|
|
53 <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
|
|
54 </when>
|
|
55 <when value="dbscan">
|
|
56 <conditional name="dbscan_advanced">
|
|
57 <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">
|
|
58 <option value="true">Yes</option>
|
|
59 <option value="false">No</option>
|
|
60 </param>
|
|
61 <when value="false"></when>
|
|
62 <when value="true">
|
|
63 <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />
|
|
64 <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>
|
|
65
|
|
66 </when>
|
|
67 </conditional>
|
|
68 </when>
|
|
69 <when value="hierarchy">
|
|
70 <param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" />
|
|
71 <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />
|
|
72 </when>
|
|
73 </conditional>
|
|
74 </inputs>
|
|
75
|
|
76 <outputs>
|
|
77 <data format="txt" name="log" label="${tool.name} - Log" />
|
33
|
78 <data format="tabular" name="best_cluster" label="${tool.name} - best cluster assignment" />
|
28
|
79 <collection name="results" type="list" label="${tool.name} - Plots and results">
|
16
|
80 <discover_datasets pattern="__name_and_ext__" directory="clustering"/>
|
33
|
81 <filter>data['clust_type'] == "kmeans" or data['clust_type'] == "hierarchy"</filter>
|
16
|
82 </collection>
|
|
83 </outputs>
|
|
84 <help>
|
|
85 <![CDATA[
|
|
86
|
|
87 What it does
|
|
88 -------------
|
|
89
|
|
90
|
|
91 ]]>
|
|
92 </help>
|
|
93 <expand macro="citations" />
|
|
94 </tool>
|
|
95
|
|
96
|