4
|
1 <tool id="MaREA_cluester" name="MaREA cluster analysis" version="1.0.0">
|
0
|
2 <description>of Reaction Activity Scores</description>
|
1
|
3 <macros>
|
|
4 <import>marea_macros.xml</import>
|
|
5 </macros>
|
0
|
6 <requirements>
|
5
|
7 <requirement type="package" version="0.23.0">pandas</requirement>
|
|
8 <requirement type="package" version="1.1.0">scipy</requirement>
|
6
|
9 <requirement type="package" version="0.10.1">cobra</requirement>
|
5
|
10 <requirement type="package" version="0.19.1">scikit-learn</requirement>
|
|
11 <requirement type="package" version="2.2.2">matplotlib</requirement>
|
0
|
12 </requirements>
|
4
|
13 <command detect_errors="exit_code">
|
0
|
14 <![CDATA[
|
|
15 python $__tool_directory__/marea_cluster.py
|
|
16 --rules_selector $cond_rule.rules_selector
|
|
17 #if $cond_rule.rules_selector == 'Custom':
|
|
18 --custom ${cond_rule.Custom_rules}
|
|
19 #end if
|
|
20 --cond_hier $cond_hier.hier
|
|
21 #if $cond_hier.hier == 'yes':
|
|
22 --linkage ${cond_hier.linkage}
|
|
23 --dendro $dendrogram
|
|
24 #end if
|
|
25 --k_max $k_max
|
|
26 --k_min $k_min
|
|
27 --data $input
|
|
28 --name $name
|
|
29 --none $None
|
|
30 --tool_dir $__tool_directory__
|
|
31 --out_log $log
|
|
32 --elbow $elbow
|
|
33 ]]>
|
|
34 </command>
|
|
35 <inputs>
|
|
36 <conditional name="cond_rule">
|
2
|
37 <expand macro="options"/>
|
0
|
38 <when value="Custom">
|
1
|
39 <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />
|
0
|
40 </when>
|
4
|
41 <when value="HMRcore">
|
|
42 </when>
|
|
43 <when value="Recon">
|
|
44 </when>
|
0
|
45 </conditional>
|
1
|
46 <param name="input" argument="--data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />
|
|
47 <param name="name" argument="--name" type="text" label="Output name prefix" value="dataset" />
|
|
48 <param name="k_min" argument="--k_min" type="integer" size="20" value="3" min="2" max="30" label="Min number of clusters (k) to be tested (k-means)"/>
|
|
49 <param name="k_max" argument="--k_max" type="integer" size="20" value="3" min="2" max="30" label="Max number of clusters (k) to be tested (k-means)"/>
|
|
50 <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="If NO is selected, (A and NaN) is solved as (NaN)" />
|
0
|
51 <conditional name="cond_hier">
|
1
|
52 <param name="hier" argument="--cond_hier" type="select" label="Produce dendrogram (hierarchical clustering):">
|
0
|
53 <option value="no" selected="true">no</option>
|
|
54 <option value="yes">yes</option>
|
|
55 </param>
|
|
56 <when value="yes">
|
1
|
57 <param name="linkage" argument="--linkage" type="select" label="Linkage type:">
|
0
|
58 <option value="single" selected="true">Single: minimum distance between all observations of two sets</option>
|
|
59 <option value="complete">Complete: maximum distance between all observations of two sets</option>
|
|
60 <option value="average">Average: average distance between all observations of two sets</option>
|
|
61 </param>
|
|
62 </when>
|
4
|
63 <when value="no">
|
|
64 </when>
|
0
|
65 </conditional>
|
|
66 </inputs>
|
1
|
67
|
0
|
68 <outputs>
|
1
|
69 <data format="txt" name="log" label="Log" />
|
0
|
70 <data format="pdf" name="dendrogram" label="$name dendrogram">
|
|
71 <filter>cond_hier['hier'] == 'yes'</filter>
|
|
72 </data>
|
1
|
73 <data format="pdf" name="elbow" label="$name elbow evaluation method" />
|
0
|
74 <collection name="cluster_out" type="list" label="Clusters $k_min - $k_max">
|
1
|
75 <discover_datasets pattern="__name_and_ext__" directory="cluster_out" />
|
0
|
76 </collection>
|
|
77 </outputs>
|
4
|
78 <tests>
|
|
79 <test>
|
|
80 <param name="k_min" value="4"/>
|
|
81 <output name="log" file="log.txt"/>
|
|
82 </test>
|
|
83 </tests>
|
0
|
84 <help>
|
1
|
85 <![CDATA[
|
|
86
|
|
87 What it does
|
|
88 -------------
|
|
89
|
|
90 This tool performs cluster analysis of RNA-seq dataset(s) based of Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.
|
|
91
|
|
92 Accepted files are:
|
|
93 1) For "Recon 2.2 rules" or "HMRcore rules" options: RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*");
|
|
94 2) For "Custom rules" option: custom rules dataset, custom map (.svg) and RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*").
|
|
95
|
|
96 Optional files:
|
|
97 - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:
|
|
98
|
|
99 * (Cobra Toolbox and CobraPy compliant) xml of metabolic model;
|
|
100 * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).
|
|
101 - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.
|
0
|
102
|
1
|
103 The tool generates:
|
|
104 1) Clusters n1 - n2 (n1 and n2 refer to min and max number of clusters): class-files (as many files as the chosen different number of clusters k to be tested) specifying the class/condition each sample belongs to;
|
|
105 2) Log: a log file (.txt);
|
|
106 3) *dataset* elbow evaluation method: diagram (.pdf) of elbow evaluation method;
|
|
107 4) *dataset* dendrogram (optional): dendrogram (.pdf) if the user chooses to produce a dendrogram (hierachical clustering).
|
|
108
|
|
109 RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.
|
|
110
|
|
111
|
|
112 Example input
|
|
113 -------------
|
0
|
114
|
1
|
115 **RNA-seq dataset**:
|
|
116
|
|
117 @DATASET_EXEMPLE@
|
|
118
|
|
119 **Custom Rules Dataset**:
|
|
120
|
|
121 @CUSTOM_RULES_EXEMPLE@
|
|
122
|
|
123 **Custom Map**:
|
|
124
|
|
125 *see the generated HMRcore .svg map for example*
|
|
126
|
|
127
|
0
|
128
|
|
129 .. class:: infomark
|
|
130
|
1
|
131 **TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.
|
|
132
|
|
133 .. class:: warningmark
|
|
134
|
|
135 If dendrogram it's too populated, each path and label can be not clear.
|
|
136
|
|
137 @REFERENCE@
|
0
|
138
|
1
|
139 .. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724
|
|
140 .. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj
|
|
141
|
|
142
|
|
143 ]]>
|
0
|
144 </help>
|
1
|
145 <expand macro="citations" />
|
0
|
146 </tool>
|
|
147
|
|
148
|