annotate COBRAxy/src/marea_cluster.xml @ 552:0b3e3678ea95 draft default tip

Uploaded
author francesco_lapi
date Thu, 11 Dec 2025 13:30:36 +0000
parents 2fb97466e404
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
539
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
1 <tool id="MaREAcluster" name="Cluster Analysis" version="2.0.0">
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
2 <description></description>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
3 <macros>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
4 <import>marea_macros.xml</import>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
5 </macros>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
6 <requirements>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
7 <requirement type="package" version="1.24.4">numpy</requirement>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
8 <requirement type="package" version="2.0.3">pandas</requirement>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
9 <requirement type="package" version="1.11">scipy</requirement>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
10 <requirement type="package" version="1.3.2">scikit-learn</requirement>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
11 <requirement type="package" version="3.7.3">matplotlib</requirement>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
12 <requirement type="package" version="5.2.2">lxml</requirement>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
13 </requirements>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
14 <command detect_errors="exit_code">
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
15 <![CDATA[
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
16 python $__tool_directory__/marea_cluster.py
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
17 --input $input
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
18 --tool_dir $__tool_directory__
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
19 --out_log $log
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
20 --best_cluster $best_cluster
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
21 --cluster_type ${data.clust_type}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
22 --scaling $scaling
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
23 #if $data.clust_type == 'kmeans':
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
24 --k_min ${data.k_min}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
25 --k_max ${data.k_max}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
26 --elbow ${data.elbow}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
27 --silhouette ${data.silhouette}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
28 #end if
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
29 #if $data.clust_type == 'dbscan':
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
30 #if $data.dbscan_advanced.advanced == 'true'
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
31 --eps ${data.dbscan_advanced.eps}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
32 --min_samples ${data.dbscan_advanced.min_samples}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
33 #end if
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
34 #end if
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
35 #if $data.clust_type == 'hierarchy':
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
36 --k_min ${data.k_min}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
37 --k_max ${data.k_max}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
38 --silhouette ${data.silhouette}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
39 #end if
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
40 ]]>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
41 </command>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
42 <inputs>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
43 <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
44 <param name="scaling" argument="--scaling" type="boolean" value="true" label="Apply scaling to the dataset before clustering" />
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
45
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
46 <conditional name="data">
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
47 <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
48 <option value="kmeans" selected="true">KMeans</option>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
49 <option value="dbscan">DBSCAN</option>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
50 <option value="hierarchy">Agglomerative Hierarchical</option>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
51 </param>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
52 <when value="kmeans">
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
53 <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
54 <param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" />
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
55 <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
56 <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
57 </when>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
58 <when value="dbscan">
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
59 <conditional name="dbscan_advanced">
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
60 <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
61 <option value="true">Yes</option>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
62 <option value="false">No</option>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
63 </param>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
64 <when value="false"></when>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
65 <when value="true">
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
66 <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
67 <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
68
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
69 </when>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
70 </conditional>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
71 </when>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
72 <when value="hierarchy">
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
73 <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
74 <param name="k_max" argument="--k_max" type="integer" min="3" max="20" value="3" label="Max number of clusters (k) to be tested" />
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
75 <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
76 </when>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
77 </conditional>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
78 </inputs>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
79
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
80 <outputs>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
81 <data format="txt" name="log" label="${tool.name} - Log" />
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
82 <data format="tabular" name="best_cluster" label="${tool.name} - best cluster assignment" />
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
83 <collection name="clustering" type="list" label="${tool.name} - Plots and results">
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
84 <discover_datasets pattern="__name_and_ext__" directory="clustering"/>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
85 <filter>data['clust_type'] == "kmeans" or data['clust_type'] == "hierarchy"</filter>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
86 </collection>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
87 </outputs>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
88 <help>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
89 <![CDATA[
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
90
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
91 What it does
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
92 -------------
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
93
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
94 The tool performs cluster analysis of any dataset, according to most used algorithms: K-means, agglomerative
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
95 clustering and DBSCAN (Density Based Spatial Clustering of Applications with Noise).
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
96
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
97 Accepted files are:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
98 - Tabular files in which rows indicate different variables and columns different observations. The first row reports the observations’ labels.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
99
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
100
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
101 Example of input dataset:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
102 -------------------------
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
103
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
104 +----------+----------+----------+
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
105 |TCGAA62670|TCGAA62671|TCGAA62672|
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
106 +==========+==========+==========+
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
107 | 0.523167 | 0.371355 | 0.925661 |
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
108 +----------+----------+----------+
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
109 | 0.568765 | 0.765567 | 0.456789 |
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
110 +----------+----------+----------+
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
111 | 0.876545 | 0.768933 | 0.987654 |
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
112 +----------+----------+----------+
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
113 | 0.456788 | 0.876543 | 0.876542 |
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
114 +----------+----------+----------+
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
115 | 0.876543 | 0.786543 | 0.897654 |
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
116 +----------+----------+----------+
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
117
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
118 .
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
119
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
120
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
121 Options:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
122 --------
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
123
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
124 The following clustering types can be chosen:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
125 - K-means. This option requires the number of clusters (k) to be set. Different values of k can be tested.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
126 - Agglomerative clustering. Different values of k can be set, to cut the resulting dendrogram.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
127 - DBSCAN. The DBSCAN method chooses the number of clusters based on parameters that define when a region is to be considered dense. Custom parameters may be used, namely the maximum distance between two samples for one to be considered as in the neighborhood of the other and the number of samples in a neighborhood for a point to be considered as a core point.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
128
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
129 The tool generates:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
130 - a tab-separated file: reporting the affiliation of each observation to a cluster. In case different numbers of clusters have been tested, the best cluster assignment is reported according to maximum average silhouette score. If desired, the elbow plot is generated, as well as silhouette plot for each k.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
131 - a list of items, including: 1) the cluster assignment for each tested number of clusters 2) the dendrogram in case of agglomerative clustering 3) elbow and silhouete plots in case of k-means clustering.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
132 - a log file (.txt).
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
133
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
134
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
135 .. class:: infomark
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
136
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
137 **TIP**: This tool has been conceived to cluster gene expression data, by using the RAS scores computed by `Ras tool`_.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
138
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
139 .. class:: infomark
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
140
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
141 **TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
142
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
143 @REFERENCE@
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
144
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
145 .. _Ras tool: http://bimib.disco.unimib.it:5555/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbimib%2Fmarea%2FMaREA+RAS+Generator%2F1.0.6&version=1.0.6&__identifer=auulv6gbp76
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
146 .. _Convert delimiters to TAB: http://bimib.disco.unimib.it:5555/?tool_id=Convert+characters1&version=1.0.0&__identifer=76g7trea4j6
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
147
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
148 ]]>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
149 </help>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
150 <expand macro="citations" />
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
151 </tool>
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
152
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
153