annotate marea_cluster.xml @ 289:f7812d713af5 draft default tip

Uploaded
author luca_milaz
date Tue, 09 Jul 2024 22:45:02 +0000
parents 813439d60f85
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
283
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
1 <tool id="MaREA_cluester" name="Cluster Analysis" version="1.1.2">
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
2 <description></description>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
3 <macros>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
4 <import>marea_macros.xml</import>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
5 </macros>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
6 <requirements>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
7 <requirement type="package" version="1.18.5">numpy</requirement>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
8 <requirement type="package" version="1.4.4">pandas</requirement>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
9 <requirement type="package" version="1.6.3">scipy</requirement>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
10 <requirement type="package" version="0.24.2">scikit-learn</requirement>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
11 <requirement type="package" version="3.4.2">matplotlib</requirement>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
12 </requirements>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
13 <command detect_errors="exit_code">
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
14 <![CDATA[
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
15 python $__tool_directory__/marea_cluster.py
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
16 --input $input
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
17 --tool_dir $__tool_directory__
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
18 --out_log $log
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
19 --best_cluster $best_cluster
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
20 --cluster_type ${data.clust_type}
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
21 #if $data.clust_type == 'kmeans':
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
22 --k_min ${data.k_min}
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
23 --k_max ${data.k_max}
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
24 --elbow ${data.elbow}
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
25 --silhouette ${data.silhouette}
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
26 #end if
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
27 #if $data.clust_type == 'dbscan':
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
28 #if $data.dbscan_advanced.advanced == 'true'
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
29 --eps ${data.dbscan_advanced.eps}
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
30 --min_samples ${data.dbscan_advanced.min_samples}
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
31 #end if
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
32 #end if
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
33 #if $data.clust_type == 'hierarchy':
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
34 --k_min ${data.k_min}
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
35 --k_max ${data.k_max}
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
36 --silhouette ${data.silhouette}
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
37 #end if
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
38 ]]>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
39 </command>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
40 <inputs>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
41 <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
42
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
43 <conditional name="data">
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
44 <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
45 <option value="kmeans" selected="true">KMeans</option>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
46 <option value="dbscan">DBSCAN</option>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
47 <option value="hierarchy">Agglomerative Hierarchical</option>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
48 </param>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
49 <when value="kmeans">
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
50 <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
51 <param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" />
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
52 <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
53 <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
54 </when>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
55 <when value="dbscan">
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
56 <conditional name="dbscan_advanced">
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
57 <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
58 <option value="true">Yes</option>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
59 <option value="false">No</option>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
60 </param>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
61 <when value="false"></when>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
62 <when value="true">
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
63 <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
64 <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
65
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
66 </when>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
67 </conditional>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
68 </when>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
69 <when value="hierarchy">
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
70 <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
71 <param name="k_max" argument="--k_max" type="integer" min="3" max="20" value="3" label="Max number of clusters (k) to be tested" />
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
72 <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
73 </when>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
74 </conditional>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
75 </inputs>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
76
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
77 <outputs>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
78 <data format="txt" name="log" label="${tool.name} - Log" />
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
79 <data format="tabular" name="best_cluster" label="${tool.name} - best cluster assignment" />
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
80 <collection name="results" type="list" label="${tool.name} - Plots and results">
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
81 <discover_datasets pattern="__name_and_ext__" directory="clustering"/>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
82 <filter>data['clust_type'] == "kmeans" or data['clust_type'] == "hierarchy"</filter>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
83 </collection>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
84 </outputs>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
85 <help>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
86 <![CDATA[
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
87
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
88 What it does
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
89 -------------
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
90
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
91 The tool performs cluster analysis of any dataset, according to most used algorithms: K-means, agglomerative
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
92 clustering and DBSCAN (Density Based Spatial Clustering of Applications with Noise).
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
93
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
94 Accepted files are:
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
95 - Tabular files in which rows indicate different variables and columns different observations. The first row reports the observations’ labels.
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
96
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
97
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
98 Example of input dataset:
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
99 -------------------------
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
100
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
101 +----------+----------+----------+
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
102 |TCGAA62670|TCGAA62671|TCGAA62672|
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
103 +==========+==========+==========+
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
104 | 0.523167 | 0.371355 | 0.925661 |
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
105 +----------+----------+----------+
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
106 | 0.568765 | 0.765567 | 0.456789 |
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
107 +----------+----------+----------+
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
108 | 0.876545 | 0.768933 | 0.987654 |
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
109 +----------+----------+----------+
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
110 | 0.456788 | 0.876543 | 0.876542 |
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
111 +----------+----------+----------+
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
112 | 0.876543 | 0.786543 | 0.897654 |
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
113 +----------+----------+----------+
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
114
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
115 .
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
116
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
117
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
118 Options:
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
119 --------
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
120
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
121 The following clustering types can be chosen:
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
122 - K-means. This option requires the number of clusters (k) to be set. Different values of k can be tested.
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
123 - Agglomerative clustering. Different values of k can be set, to cut the resulting dendrogram.
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
124 - DBSCAN. The DBSCAN method chooses the number of clusters based on parameters that define when a region is to be considered dense. Custom parameters may be used, namely the maximum distance between two samples for one to be considered as in the neighborhood of the other and the number of samples in a neighborhood for a point to be considered as a core point.
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
125
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
126 The tool generates:
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
127 - a tab-separated file: reporting the affiliation of each observation to a cluster. In case different numbers of clusters have been tested, the best cluster assignment is reported according to maximum average silhouette score. If desired, the elbow plot is generated, as well as silhouette plot for each k.
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
128 - a list of items, including: 1) the cluster assignment for each tested number of clusters 2) the dendrogram in case of agglomerative clustering 3) elbow and silhouete plots in case of k-means clustering.
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
129 - a log file (.txt).
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
130
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
131
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
132 .. class:: infomark
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
133
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
134 **TIP**: This tool has been conceived to cluster gene expression data, by using the RAS scores computed by `Ras tool`_.
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
135
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
136 .. class:: infomark
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
137
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
138 **TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
139
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
140 @REFERENCE@
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
141
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
142 .. _Ras tool: http://bimib.disco.unimib.it:5555/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbimib%2Fmarea%2FMaREA+RAS+Generator%2F1.0.6&version=1.0.6&__identifer=auulv6gbp76
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
143 .. _Convert delimiters to TAB: http://bimib.disco.unimib.it:5555/?tool_id=Convert+characters1&version=1.0.0&__identifer=76g7trea4j6
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
144
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
145 ]]>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
146 </help>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
147 <expand macro="citations" />
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
148 </tool>
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
149
813439d60f85 Uploaded
luca_milaz
parents:
diff changeset
150