Mercurial > repos > bimib > marea
comparison Marea/marea_cluster.xml @ 16:c71ac0bb12de draft
Uploaded
author | bimib |
---|---|
date | Tue, 01 Oct 2019 06:05:13 -0400 |
parents | 68a5f2db55b9 |
children | 640f303d0cec |
comparison
equal
deleted
inserted
replaced
15:d0e7f14b773f | 16:c71ac0bb12de |
---|---|
1 <tool id="MaREA_cluester" name="MaREA cluster analysis" version="1.0.0"> | 1 <tool id="MaREA_cluester" name="MaREA cluster analysis" version="1.0.1"> |
2 <description>of Reaction Activity Scores</description> | 2 <description>of Reaction Activity Scores - 1.0.1</description> |
3 <macros> | 3 <macros> |
4 <import>marea_macros.xml</import> | 4 <import>marea_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <requirements> | 6 <requirements> |
7 <requirement type="package" version="0.23.0">pandas</requirement> | 7 <requirement type="package" version="0.23.0">pandas</requirement> |
8 <requirement type="package" version="1.1.0">scipy</requirement> | 8 <requirement type="package" version="1.1.0">scipy</requirement> |
9 <requirement type="package" version="0.10.1">cobra</requirement> | 9 <requirement type="package" version="0.10.1">cobra</requirement> |
10 <requirement type="package" version="0.19.1">scikit-learn</requirement> | 10 <requirement type="package" version="0.21.3">scikit-learn</requirement> |
11 <requirement type="package" version="2.2.2">matplotlib</requirement> | 11 <requirement type="package" version="2.2.2">matplotlib</requirement> |
12 <requirement type="package" version="1.17">numpy</requirement> | |
12 </requirements> | 13 </requirements> |
13 <command detect_errors="exit_code"> | 14 <command detect_errors="exit_code"> |
14 <![CDATA[ | 15 <![CDATA[ |
15 python $__tool_directory__/marea_cluster.py | 16 python $__tool_directory__/marea_cluster.py |
16 --rules_selector $cond_rule.rules_selector | 17 --input $input |
17 #if $cond_rule.rules_selector == 'Custom': | |
18 --custom ${cond_rule.Custom_rules} | |
19 #end if | |
20 --cond_hier $cond_hier.hier | |
21 #if $cond_hier.hier == 'yes': | |
22 --linkage ${cond_hier.linkage} | |
23 --dendro $dendrogram | |
24 #end if | |
25 --k_max $k_max | |
26 --k_min $k_min | |
27 --data $input | |
28 --name $name | |
29 --none $None | |
30 --tool_dir $__tool_directory__ | 18 --tool_dir $__tool_directory__ |
31 --out_log $log | 19 --out_log $log |
32 --elbow $elbow | 20 #if $data.clust_type == 'kmeans': |
21 --k_min ${data.k_min} | |
22 --k_max ${data.k_max} | |
23 --elbow ${data.elbow} | |
24 --silhouette ${data.silhouette} | |
25 #end if | |
26 #if $data.clust_type == 'dbscan': | |
27 #if $data.dbscan_advanced.advanced == 'true' | |
28 --eps ${data.dbscan_advanced.eps} | |
29 --min_samples ${data.dbscan_advanced.min_samples} | |
30 #end if | |
31 #end if | |
32 #if $data.clust_type == 'hierarchy': | |
33 --k_min ${data.k_min} | |
34 --k_max ${data.k_max} | |
35 #end if | |
33 ]]> | 36 ]]> |
34 </command> | 37 </command> |
35 <inputs> | 38 <inputs> |
36 <conditional name="cond_rule"> | 39 <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" /> |
37 <expand macro="options"/> | 40 |
38 <when value="Custom"> | 41 <conditional name="data"> |
39 <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" /> | 42 <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:"> |
40 </when> | 43 <option value="kmeans" selected="true">KMeans</option> |
41 <when value="HMRcore"> | 44 <option value="dbscan">DBSCAN</option> |
42 </when> | 45 <option value="hierarchy">Agglomerative Hierarchical</option> |
43 <when value="Recon"> | 46 </param> |
44 </when> | 47 <when value="kmeans"> |
45 </conditional> | 48 <param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" /> |
46 <param name="input" argument="--data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" /> | 49 <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" /> |
47 <param name="name" argument="--name" type="text" label="Output name prefix" value="dataset" /> | 50 <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/> |
48 <param name="k_min" argument="--k_min" type="integer" size="20" value="3" min="2" max="30" label="Min number of clusters (k) to be tested (k-means)"/> | 51 <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/> |
49 <param name="k_max" argument="--k_max" type="integer" size="20" value="3" min="2" max="30" label="Max number of clusters (k) to be tested (k-means)"/> | 52 </when> |
50 <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="If NO is selected, (A and NaN) is solved as (NaN)" /> | 53 <when value="dbscan"> |
51 <conditional name="cond_hier"> | 54 <conditional name="dbscan_advanced"> |
52 <param name="hier" argument="--cond_hier" type="select" label="Produce dendrogram (hierarchical clustering):"> | 55 <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)"> |
53 <option value="no" selected="true">no</option> | 56 <option value="true">Yes</option> |
54 <option value="yes">yes</option> | 57 <option value="false">No</option> |
55 </param> | 58 </param> |
56 <when value="yes"> | 59 <when value="false"></when> |
57 <param name="linkage" argument="--linkage" type="select" label="Linkage type:"> | 60 <when value="true"> |
58 <option value="single" selected="true">Single: minimum distance between all observations of two sets</option> | 61 <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" /> |
59 <option value="complete">Complete: maximum distance between all observations of two sets</option> | 62 <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/> |
60 <option value="average">Average: average distance between all observations of two sets</option> | 63 |
61 </param> | 64 </when> |
62 </when> | 65 </conditional> |
63 <when value="no"> | 66 </when> |
64 </when> | 67 <when value="hierarchy"> |
65 </conditional> | 68 <param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" /> |
69 <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" /> | |
70 </when> | |
71 </conditional> | |
66 </inputs> | 72 </inputs> |
67 | 73 |
68 <outputs> | 74 <outputs> |
69 <data format="txt" name="log" label="Log" /> | 75 <data format="txt" name="log" label="${tool.name} - Log" /> |
70 <data format="pdf" name="dendrogram" label="$name dendrogram"> | 76 <collection name="results" type="list" label="${tool.name} - Results"> |
71 <filter>cond_hier['hier'] == 'yes'</filter> | 77 <discover_datasets pattern="__name_and_ext__" directory="clustering"/> |
72 </data> | |
73 <data format="pdf" name="elbow" label="$name elbow evaluation method" /> | |
74 <collection name="cluster_out" type="list" label="Clusters $k_min - $k_max"> | |
75 <discover_datasets pattern="__name_and_ext__" directory="cluster_out" /> | |
76 </collection> | 78 </collection> |
77 </outputs> | 79 </outputs> |
78 <tests> | |
79 <test> | |
80 <param name="k_min" value="4"/> | |
81 <output name="log" file="log.txt"/> | |
82 </test> | |
83 </tests> | |
84 <help> | 80 <help> |
85 <![CDATA[ | 81 <![CDATA[ |
86 | 82 |
87 What it does | 83 What it does |
88 ------------- | 84 ------------- |
89 | |
90 This tool performs cluster analysis of RNA-seq dataset(s) based of Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724. | |
91 | |
92 Accepted files are: | |
93 1) For "Recon 2.2 rules" or "HMRcore rules" options: RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*"); | |
94 2) For "Custom rules" option: custom rules dataset, custom map (.svg) and RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*"). | |
95 | |
96 Optional files: | |
97 - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats: | |
98 | |
99 * (Cobra Toolbox and CobraPy compliant) xml of metabolic model; | |
100 * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2). | |
101 - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example. | |
102 | |
103 The tool generates: | |
104 1) Clusters n1 - n2 (n1 and n2 refer to min and max number of clusters): class-files (as many files as the chosen different number of clusters k to be tested) specifying the class/condition each sample belongs to; | |
105 2) Log: a log file (.txt); | |
106 3) *dataset* elbow evaluation method: diagram (.pdf) of elbow evaluation method; | |
107 4) *dataset* dendrogram (optional): dendrogram (.pdf) if the user chooses to produce a dendrogram (hierachical clustering). | |
108 | |
109 RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID. | |
110 | |
111 | |
112 Example input | |
113 ------------- | |
114 | |
115 **RNA-seq dataset**: | |
116 | |
117 @DATASET_EXEMPLE1@ | |
118 | |
119 **Custom Rules Dataset**: | |
120 | |
121 @CUSTOM_RULES_EXEMPLE@ | |
122 | |
123 **Custom Map**: | |
124 | |
125 *see the generated HMRcore .svg map for example* | |
126 | |
127 | |
128 | |
129 .. class:: infomark | |
130 | |
131 **TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_. | |
132 | |
133 .. class:: warningmark | |
134 | |
135 If dendrogram it's too populated, each path and label can be not clear. | |
136 | |
137 @REFERENCE@ | |
138 | |
139 .. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724 | |
140 .. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj | |
141 | 85 |
142 | 86 |
143 ]]> | 87 ]]> |
144 </help> | 88 </help> |
145 <expand macro="citations" /> | 89 <expand macro="citations" /> |