annotate utils/marea_2_0_0/marea_cluster.xml @ 289:f7812d713af5 draft default tip

Uploaded
author luca_milaz
date Tue, 09 Jul 2024 22:45:02 +0000
parents 8842083704ee
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
286
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
1 <tool id="MaREA_cluester" name="Cluster Analysis" version="1.1.2">
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
2 <description></description>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
3 <macros>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
4 <import>marea_macros.xml</import>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
5 </macros>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
6 <requirements>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
7 <requirement type="package" version="1.18.5">numpy</requirement>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
8 <requirement type="package" version="1.4.4">pandas</requirement>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
9 <requirement type="package" version="1.6.3">scipy</requirement>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
10 <requirement type="package" version="0.24.2">scikit-learn</requirement>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
11 <requirement type="package" version="3.4.2">matplotlib</requirement>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
12 </requirements>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
13 <command detect_errors="exit_code">
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
14 <![CDATA[
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
15 python $__tool_directory__/marea_cluster.py
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
16 --input $input
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
17 --tool_dir $__tool_directory__
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
18 --out_log $log
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
19 --best_cluster $best_cluster
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
20 --cluster_type ${data.clust_type}
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
21 #if $data.clust_type == 'kmeans':
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
22 --k_min ${data.k_min}
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
23 --k_max ${data.k_max}
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
24 --elbow ${data.elbow}
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
25 --silhouette ${data.silhouette}
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
26 #end if
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
27 #if $data.clust_type == 'dbscan':
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
28 #if $data.dbscan_advanced.advanced == 'true'
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
29 --eps ${data.dbscan_advanced.eps}
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
30 --min_samples ${data.dbscan_advanced.min_samples}
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
31 #end if
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
32 #end if
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
33 #if $data.clust_type == 'hierarchy':
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
34 --k_min ${data.k_min}
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
35 --k_max ${data.k_max}
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
36 --silhouette ${data.silhouette}
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
37 #end if
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
38 ]]>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
39 </command>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
40 <inputs>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
41 <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
42
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
43 <conditional name="data">
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
44 <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
45 <option value="kmeans" selected="true">KMeans</option>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
46 <option value="dbscan">DBSCAN</option>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
47 <option value="hierarchy">Agglomerative Hierarchical</option>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
48 </param>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
49 <when value="kmeans">
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
50 <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
51 <param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" />
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
52 <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
53 <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
54 </when>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
55 <when value="dbscan">
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
56 <conditional name="dbscan_advanced">
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
57 <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
58 <option value="true">Yes</option>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
59 <option value="false">No</option>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
60 </param>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
61 <when value="false"></when>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
62 <when value="true">
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
63 <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
64 <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
65
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
66 </when>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
67 </conditional>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
68 </when>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
69 <when value="hierarchy">
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
70 <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
71 <param name="k_max" argument="--k_max" type="integer" min="3" max="20" value="3" label="Max number of clusters (k) to be tested" />
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
72 <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
73 </when>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
74 </conditional>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
75 </inputs>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
76
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
77 <outputs>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
78 <data format="txt" name="log" label="${tool.name} - Log" />
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
79 <data format="tabular" name="best_cluster" label="${tool.name} - best cluster assignment" />
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
80 <collection name="results" type="list" label="${tool.name} - Plots and results">
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
81 <discover_datasets pattern="__name_and_ext__" directory="clustering"/>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
82 <filter>data['clust_type'] == "kmeans" or data['clust_type'] == "hierarchy"</filter>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
83 </collection>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
84 </outputs>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
85 <help>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
86 <![CDATA[
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
87
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
88 What it does
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
89 -------------
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
90
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
91 The tool performs cluster analysis of any dataset, according to most used algorithms: K-means, agglomerative
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
92 clustering and DBSCAN (Density Based Spatial Clustering of Applications with Noise).
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
93
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
94 Accepted files are:
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
95 - Tabular files in which rows indicate different variables and columns different observations. The first row reports the observations’ labels.
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
96
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
97
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
98 Example of input dataset:
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
99 -------------------------
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
100
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
101 +----------+----------+----------+
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
102 |TCGAA62670|TCGAA62671|TCGAA62672|
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
103 +==========+==========+==========+
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
104 | 0.523167 | 0.371355 | 0.925661 |
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
105 +----------+----------+----------+
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
106 | 0.568765 | 0.765567 | 0.456789 |
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
107 +----------+----------+----------+
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
108 | 0.876545 | 0.768933 | 0.987654 |
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
109 +----------+----------+----------+
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
110 | 0.456788 | 0.876543 | 0.876542 |
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
111 +----------+----------+----------+
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
112 | 0.876543 | 0.786543 | 0.897654 |
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
113 +----------+----------+----------+
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
114
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
115 .
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
116
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
117
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
118 Options:
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
119 --------
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
120
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
121 The following clustering types can be chosen:
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
122 - K-means. This option requires the number of clusters (k) to be set. Different values of k can be tested.
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
123 - Agglomerative clustering. Different values of k can be set, to cut the resulting dendrogram.
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
124 - DBSCAN. The DBSCAN method chooses the number of clusters based on parameters that define when a region is to be considered dense. Custom parameters may be used, namely the maximum distance between two samples for one to be considered as in the neighborhood of the other and the number of samples in a neighborhood for a point to be considered as a core point.
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
125
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
126 The tool generates:
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
127 - a tab-separated file: reporting the affiliation of each observation to a cluster. In case different numbers of clusters have been tested, the best cluster assignment is reported according to maximum average silhouette score. If desired, the elbow plot is generated, as well as silhouette plot for each k.
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
128 - a list of items, including: 1) the cluster assignment for each tested number of clusters 2) the dendrogram in case of agglomerative clustering 3) elbow and silhouete plots in case of k-means clustering.
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
129 - a log file (.txt).
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
130
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
131
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
132 .. class:: infomark
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
133
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
134 **TIP**: This tool has been conceived to cluster gene expression data, by using the RAS scores computed by `Ras tool`_.
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
135
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
136 .. class:: infomark
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
137
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
138 **TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
139
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
140 @REFERENCE@
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
141
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
142 .. _Ras tool: http://bimib.disco.unimib.it:5555/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbimib%2Fmarea%2FMaREA+RAS+Generator%2F1.0.6&version=1.0.6&__identifer=auulv6gbp76
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
143 .. _Convert delimiters to TAB: http://bimib.disco.unimib.it:5555/?tool_id=Convert+characters1&version=1.0.0&__identifer=76g7trea4j6
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
144
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
145 ]]>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
146 </help>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
147 <expand macro="citations" />
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
148 </tool>
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
149
8842083704ee Uploaded
luca_milaz
parents:
diff changeset
150