annotate Marea/marea_cluster.xml @ 73:11335fd2dda0 draft

Uploaded
author bimib
date Wed, 03 Jun 2020 11:31:28 -0400
parents c22ba0547370
children 500c9e788a05
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
42
b3f9e13bf15f Uploaded
bimib
parents: 41
diff changeset
1 <tool id="MaREA_cluester" name="Cluster Analysis" version="1.1.2">
29
9fcb0e8d6d47 Uploaded
bimib
parents: 28
diff changeset
2 <description></description>
16
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
3 <macros>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
4 <import>marea_macros.xml</import>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
5 </macros>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
6 <requirements>
17
640f303d0cec fix for dependencies
bimib
parents: 16
diff changeset
7 <requirement type="package" version="0.25.1">pandas</requirement>
16
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
8 <requirement type="package" version="1.1.0">scipy</requirement>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
9 <requirement type="package" version="0.10.1">cobra</requirement>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
10 <requirement type="package" version="0.21.3">scikit-learn</requirement>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
11 <requirement type="package" version="2.2.2">matplotlib</requirement>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
12 <requirement type="package" version="1.17">numpy</requirement>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
13 </requirements>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
14 <command detect_errors="exit_code">
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
15 <![CDATA[
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
16 python $__tool_directory__/marea_cluster.py
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
17 --input $input
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
18 --tool_dir $__tool_directory__
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
19 --out_log $log
28
e6831924df01 small fixes (elbow plot and output managment)
bimib
parents: 27
diff changeset
20 --best_cluster $best_cluster
24
69ed2562e81e Uploaded
bimib
parents: 18
diff changeset
21 --cluster_type ${data.clust_type}
16
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
22 #if $data.clust_type == 'kmeans':
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
23 --k_min ${data.k_min}
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
24 --k_max ${data.k_max}
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
25 --elbow ${data.elbow}
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
26 --silhouette ${data.silhouette}
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
27 #end if
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
28 #if $data.clust_type == 'dbscan':
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
29 #if $data.dbscan_advanced.advanced == 'true'
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
30 --eps ${data.dbscan_advanced.eps}
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
31 --min_samples ${data.dbscan_advanced.min_samples}
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
32 #end if
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
33 #end if
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
34 #if $data.clust_type == 'hierarchy':
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
35 --k_min ${data.k_min}
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
36 --k_max ${data.k_max}
34
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
37 --silhouette ${data.silhouette}
16
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
38 #end if
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
39 ]]>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
40 </command>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
41 <inputs>
31
944e15aa970a Uploaded
bimib
parents: 29
diff changeset
42 <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
16
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
43
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
44 <conditional name="data">
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
45 <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
46 <option value="kmeans" selected="true">KMeans</option>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
47 <option value="dbscan">DBSCAN</option>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
48 <option value="hierarchy">Agglomerative Hierarchical</option>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
49 </param>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
50 <when value="kmeans">
31
944e15aa970a Uploaded
bimib
parents: 29
diff changeset
51 <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
944e15aa970a Uploaded
bimib
parents: 29
diff changeset
52 <param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" />
16
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
53 <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
54 <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
55 </when>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
56 <when value="dbscan">
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
57 <conditional name="dbscan_advanced">
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
58 <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
59 <option value="true">Yes</option>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
60 <option value="false">No</option>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
61 </param>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
62 <when value="false"></when>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
63 <when value="true">
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
64 <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
65 <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
66
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
67 </when>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
68 </conditional>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
69 </when>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
70 <when value="hierarchy">
34
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
71 <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
72 <param name="k_max" argument="--k_max" type="integer" min="3" max="20" value="3" label="Max number of clusters (k) to be tested" />
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
73 <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
16
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
74 </when>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
75 </conditional>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
76 </inputs>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
77
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
78 <outputs>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
79 <data format="txt" name="log" label="${tool.name} - Log" />
33
abf0bfe01c78 Uploaded
bimib
parents: 31
diff changeset
80 <data format="tabular" name="best_cluster" label="${tool.name} - best cluster assignment" />
28
e6831924df01 small fixes (elbow plot and output managment)
bimib
parents: 27
diff changeset
81 <collection name="results" type="list" label="${tool.name} - Plots and results">
16
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
82 <discover_datasets pattern="__name_and_ext__" directory="clustering"/>
33
abf0bfe01c78 Uploaded
bimib
parents: 31
diff changeset
83 <filter>data['clust_type'] == "kmeans" or data['clust_type'] == "hierarchy"</filter>
16
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
84 </collection>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
85 </outputs>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
86 <help>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
87 <![CDATA[
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
88
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
89 What it does
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
90 -------------
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
91
34
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
92 The tool performs cluster analysis of any dataset, according to most used algorithms: K-means, agglomerative
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
93 clustering and DBSCAN (Density Based Spatial Clustering of Applications with Noise).
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
94
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
95 Accepted files are:
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
96 - Tabular files in which rows indicate different variables and columns different observations. The first row reports the observations’ labels.
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
97
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
98
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
99 Example of input dataset:
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
100 -------------------------
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
101
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
102 +----------+----------+----------+
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
103 |TCGAA62670|TCGAA62671|TCGAA62672|
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
104 +==========+==========+==========+
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
105 | 0.523167 | 0.371355 | 0.925661 |
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
106 +----------+----------+----------+
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
107 | 0.568765 | 0.765567 | 0.456789 |
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
108 +----------+----------+----------+
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
109 | 0.876545 | 0.768933 | 0.987654 |
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
110 +----------+----------+----------+
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
111 | 0.456788 | 0.876543 | 0.876542 |
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
112 +----------+----------+----------+
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
113 | 0.876543 | 0.786543 | 0.897654 |
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
114 +----------+----------+----------+
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
115
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
116 .
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
117
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
118
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
119 Options:
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
120 --------
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
121
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
122 The following clustering types can be chosen:
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
123 - K-means. This option requires the number of clusters (k) to be set. Different values of k can be tested.
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
124 - Agglomerative clustering. Different values of k can be set, to cut the resulting dendrogram.
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
125 - DBSCAN. The DBSCAN method chooses the number of clusters based on parameters that define when a region is to be considered dense. Custom parameters may be used, namely the maximum distance between two samples for one to be considered as in the neighborhood of the other and the number of samples in a neighborhood for a point to be considered as a core point.
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
126
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
127 The tool generates:
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
128 - a tab-separated file: reporting the affiliation of each observation to a cluster. In case different numbers of clusters have been tested, the best cluster assignment is reported according to maximum average silhouette score. If desired, the elbow plot is generated, as well as silhouette plot for each k.
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
129 - a list of items, including: 1) the cluster assignment for each tested number of clusters 2) the dendrogram in case of agglomerative clustering 3) elbow and silhouete plots in case of k-means clustering.
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
130 - a log file (.txt).
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
131
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
132
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
133 .. class:: infomark
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
134
70
c22ba0547370 update links and pubblications.
bimib
parents: 42
diff changeset
135 **TIP**: This tool has been conceived to cluster gene expression data, by using the RAS scores computed by `Ras tool`_.
34
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
136
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
137 .. class:: infomark
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
138
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
139 **TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
140
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
141
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
142
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
143 @REFERENCE@
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
144
70
c22ba0547370 update links and pubblications.
bimib
parents: 42
diff changeset
145 .. _Ras tool: http://bimib.disco.unimib.it:5555/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbimib%2Fmarea%2FMaREA+RAS+Generator%2F1.0.6&version=1.0.6&__identifer=auulv6gbp76
c22ba0547370 update links and pubblications.
bimib
parents: 42
diff changeset
146 .. _Convert delimiters to TAB: http://bimib.disco.unimib.it:5555/?tool_id=Convert+characters1&version=1.0.0&__identifer=76g7trea4j6
34
1a97d1537623 Lot of bug fixes
bimib
parents: 33
diff changeset
147
16
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
148 ]]>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
149 </help>
73
11335fd2dda0 Uploaded
bimib
parents: 70
diff changeset
150 <expand macro="citations" />
16
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
151 </tool>
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
152
c71ac0bb12de Uploaded
bimib
parents: 8
diff changeset
153