1
|
1 <tool id="aurora_wgcna" name="Aurora Galaxy WGCNA" version="1.0.0">
|
|
2 <description>
|
|
3 Identify gene co-expression network modules using WGCNA.
|
|
4 </description>
|
|
5 <requirements>
|
|
6 <requirement type="package" version="2.7.3">pandoc</requirement>
|
|
7 <requirement type="package" version="1.20.3">r-getopt</requirement>
|
|
8 <requirement type="package" version="1.12">r-rmarkdown</requirement>
|
|
9 <requirement type="package" version="1.8.4">r-plyr</requirement>
|
|
10 <requirement type="package" version="0.10">r-dt</requirement>
|
|
11 <requirement type="package" version="0.4.0">r-htmltools</requirement>
|
|
12 <requirement type="package" version="3.2.1">r-ggplot2</requirement>
|
|
13 <requirement type="package" version="1.4.3">r-reshape2</requirement>
|
|
14 <requirement type="package" version="6.0_83">r-caret</requirement>
|
7
|
15 <requirement type="package" version="1.68">r-wgcna</requirement>
|
6
|
16 <requirement type="package" version="0.12">r-tinytex</requirement>
|
1
|
17 </requirements>
|
|
18 <stdio>
|
|
19 <regex match="Execution halted"
|
|
20 source="both"
|
|
21 level="fatal"
|
|
22 description="Execution halted." />
|
|
23 <regex match="Error in"
|
|
24 source="both"
|
|
25 level="fatal"
|
|
26 description="An undefined error occured, please check your intput carefully and contact your administrator." />
|
|
27 <regex match="Fatal error"
|
|
28 source="both"
|
|
29 level="fatal"
|
|
30 description="An undefined error occured, please check your intput carefully and contact your administrator." />
|
|
31 </stdio>
|
|
32 <command>
|
|
33 <![CDATA[
|
|
34 export TOOL_INSTALL_DIR='${__tool_directory__}' &&
|
|
35
|
|
36 Rscript '${__tool_directory__}/aurora_wgcna_render.R'
|
|
37 -e $expression_data
|
|
38 -s $min_cluster_size
|
|
39 -b $block_size
|
|
40 -j $hard_threshold
|
|
41 #if $trait_info.trait_data
|
|
42 -t $trait_info.trait_data
|
|
43 -c $trait_info.sname_col
|
|
44 #if $trait_info.missing_value2
|
|
45 -o "$trait_info.missing_value2"
|
|
46 #else
|
|
47 -o "NA"
|
|
48 #end if
|
|
49 #if $trait_info.one_hot_cols
|
|
50 -y $trait_info.one_hot_cols
|
|
51 #end if
|
|
52 #if $trait_info.ignore_cols
|
|
53 -x $trait_info.ignore_cols
|
|
54 #end if
|
|
55 #end if
|
|
56 #if $height_cut
|
|
57 -h $height_cut
|
|
58 #end if
|
|
59 #if $power
|
|
60 -p $power
|
|
61 #end if
|
|
62 -l $render_log_file
|
|
63 -a $r_data
|
|
64 -k $gene_module_file
|
|
65 -w $network_edges_file
|
|
66 -g $gene_association_file
|
|
67 -m $module_association_file
|
|
68 -q $module_association_report
|
|
69 -r $network_construction_report
|
|
70 -z $updated_trait_matrix
|
|
71 -d $filtered_GEM
|
|
72 -i $missing_value1
|
|
73 ]]>
|
|
74 </command>
|
|
75 <inputs>
|
|
76 <param
|
|
77 type="data"
|
|
78 name="expression_data"
|
|
79 format="csv"
|
|
80 optional="false"
|
|
81 label="Gene expression data"
|
|
82 help="The gene expression data is an n x m matrix where n rows are the genes, m columns are the samples and the elements represent gene expression levels (derived either from Microarray or RNA-Seq). The matrix should be stored in a comma-separated (CSV) file and it must have a header. The gene names must appear as the first column of data in the file."
|
|
83 />
|
|
84 <param
|
|
85 type="text"
|
|
86 value="NA"
|
|
87 name="missing_value1"
|
|
88 optional="false"
|
|
89 label="Missing Value Identifier"
|
|
90 help="Within the gene expression data some genes may have missing values. If so, please indicate the text that is used to identify a missing value. Some common examples include: NA, 0.0, 0, -Inf. Any expression level that exactly matches the value provided will be considered a missing value."
|
|
91 />
|
|
92 <param
|
|
93 type="float"
|
|
94 value=""
|
|
95 name="height_cut"
|
|
96 optional="true"
|
|
97 label="Outlier Dendrogram Cut Height"
|
|
98 help="When checking for outliers, WGCNA performs hierarchical clustering. The resulting dendrogram can be cut at the given height to remove outliers. If no value is provided a cut height will automatically be determined. Try running this tool first without providing a value. Return and set a value if the results are not adequate."
|
|
99 />
|
|
100 <param
|
|
101 type="integer"
|
|
102 value=""
|
|
103 name="power"
|
|
104 optional="true"
|
|
105 label="Power"
|
|
106 help="Prior to network construction, WGCNA recommends that the gene expression data is raised to a power. The exact power that should be used will be automatically determined. Try running this tool first without providing a value. Return and set a value if the results are not adequate."
|
|
107 />
|
|
108 <param
|
|
109 type="integer"
|
|
110 value="30"
|
|
111 name="min_cluster_size"
|
|
112 optional="false"
|
|
113 label="Minimum Module Size"
|
|
114 help="The minimum module size. Modules smaller than this will not be included in the network."
|
|
115 />
|
|
116 <param
|
|
117 type="integer"
|
|
118 value="5000"
|
|
119 min="1000"
|
|
120 max="10000"
|
|
121 name="block_size"
|
|
122 optional="false"
|
|
123 label="Block Size"
|
|
124 help="Constructing a network can use an extreme amount of memory if the number of genes is high. The block size enables WGCNA to divide the data into blocks of genes with similar expression reducing the amount of memory used. The block size indicates the maximum number of genes that can be used in a block. The total number of blocks used will be the total genes divided by this number (plus 1 for any remainder)."
|
|
125 />
|
|
126 <param
|
|
127 type="float"
|
5
|
128 value="0.2"
|
1
|
129 name="hard_threshold"
|
|
130 optional="false"
|
|
131 label="Hard Threshold"
|
|
132 min="0"
|
|
133 max="1"
|
|
134 help="While WGCNA uses a soft thresholding approach for finding modules and constructing gene similarity, when exporting the network for display as a graph a hard threshold is still required. For WGCNA, the threhshold is applied to the Euclidian distance between all genes. But, there is no set prescribed method to decide on a proper hard threshold value. Set a threshold now, then you can apply filters later (such as in Cytoscape) to remove low weighted edges if desired."
|
|
135 />
|
|
136 <section name="trait_info" title="Trait/Phenotype" expanded="true">
|
|
137 <param
|
|
138 type="data"
|
|
139 name="trait_data"
|
|
140 format="csv"
|
|
141 optional="true"
|
|
142 label="Trait/Phenotype Data Matrix"
|
|
143 help="The trait/phenotype data is an n x m matrix where n is the samples and m are the features such as experimental condition, biosample properties, traits or phenotype values. The matrix should be stored in a comma-separated (CSV) file. It must have a header."
|
|
144 />
|
|
145 <param
|
|
146 type="text"
|
|
147 value="NA"
|
|
148 name="missing_value2"
|
|
149 optional="true"
|
|
150 label="Missing Value Identifier"
|
|
151 help="Within the trait data some columns may have missing values. If so, please indicate the text that is used to identify a missing value. Some common examples include: NA, 0.0, 0, -Inf. Any value that exactly matches the value provided will be considered a missing value."
|
|
152 />
|
|
153 <param
|
|
154 type="integer"
|
|
155 value="1"
|
|
156 name="sname_col"
|
|
157 optional="true"
|
|
158 label="Sample Name Column"
|
|
159 help="The number of the column (starting from 1) in the sample annotation data file where the sample name column is found."
|
|
160 />
|
|
161 <param
|
|
162 type="text"
|
|
163 value=""
|
|
164 name="one_hot_cols"
|
|
165 optional="true"
|
|
166 label="Categorical Columns"
|
|
167 help="Categorical columns in the sample daa matrix must be '1-hot encoded'. This means that each categorical column is expanded into new columns (one for each category) and the values for the new columns are set to 1 if the sample has the category and 0 if not. This value should be a comma-separated list of column names with no spaces between column names. Please make sure the column headers only have alphanumeric characters and underscores. If you do not specify categorical column names then they will be converted to factors and treated as ordinal data."
|
|
168 />
|
|
169 <param
|
|
170 type="text"
|
|
171 value=""
|
|
172 name="ignore_cols"
|
|
173 optional="true"
|
|
174 label="Columns to Ignore"
|
|
175 help="The names of columns in the sample data matrix that should be ignored. This value should be a comma-separated list of column names with no spaces between column names. Please make sure the column headers only have alphanumeric characters and underscores."
|
|
176 />
|
|
177 </section>
|
|
178 </inputs>
|
|
179 <outputs>
|
|
180 <data
|
|
181 name="network_construction_report"
|
|
182 format="pdf"
|
|
183 label="network_construction_report.pdf"
|
|
184 />
|
|
185 <data
|
|
186 name="filtered_GEM"
|
|
187 format="csv"
|
|
188 label="filtered_GEM.csv">
|
|
189 </data>
|
|
190 <data
|
|
191 name="gene_module_file"
|
|
192 format="csv"
|
|
193 label="gene_module_file.csv"
|
|
194 />
|
|
195 <data
|
|
196 name="network_edges_file"
|
|
197 format="tabular"
|
|
198 label="network_edges.txt"
|
|
199 />
|
|
200 <data
|
|
201 name="module_association_report"
|
|
202 format="pdf"
|
|
203 label="module_association_report.pdf">
|
|
204 <filter>trait_data != None</filter>
|
|
205 </data>
|
|
206 <data
|
|
207 name="gene_association_file"
|
|
208 format="csv"
|
|
209 label="gene_association.csv">
|
|
210 <filter>trait_data != None</filter>
|
|
211 </data>
|
|
212 <data
|
|
213 name="module_association_file"
|
|
214 format="csv"
|
|
215 label="module_association.csv">
|
|
216 <filter>trait_data != None</filter>
|
|
217 </data>
|
|
218 <data
|
|
219 name="updated_trait_matrix"
|
|
220 format="csv"
|
|
221 label="updated_trait_matrix.csv">
|
|
222 <filter>trait_data != None</filter>
|
|
223 </data>
|
|
224 <data
|
|
225 name="render_log_file"
|
|
226 format="txt"
|
|
227 label="render_log_file"
|
|
228 hidden="false"
|
|
229 />
|
|
230 <data
|
|
231 name="r_data"
|
|
232 format="rdata"
|
|
233 label="aurora_wgcna.RData"
|
|
234 hidden="true"
|
|
235 />
|
|
236 <collection name="figures" type="list" label="Figures">
|
|
237 <discover_datasets
|
|
238 pattern="__designation_and_ext__"
|
|
239 directory="figures"
|
|
240 visible="false" />
|
|
241 </collection>
|
|
242 </outputs>
|
|
243 <tests>
|
|
244 <test>
|
|
245 <param name='expression_data' value="LiverFemale3600.gem.csv"/>
|
|
246 <output name="gene_module_file" file="gene_module.csv"/>
|
|
247 <output name="network_edges_file" file="network_edges.txt"/>
|
|
248 <output name="network_construction_report" file="network_construction_report.html"/>
|
|
249 </test>
|
|
250 <test>
|
|
251 <param name='expression_data' value="LiverFemale3600.gem.csv"/>
|
|
252 <param name='trait_data' value="ClinicalTraits.csv"/>
|
|
253 <output name="gene_module_file" file="gene_module.csv"/>
|
|
254 <output name="network_edges_file" file="network_edges.txt"/>
|
|
255 <output name="gene_association_file" file="gene_association_file.csv"/>
|
|
256 <output name="module_association_file" file="module_association_file.csv"/>
|
|
257 <output name="module_association_report" file="module_association_report.html"/>
|
|
258 <output name="network_construction_report" file="network_construction_report.html"/>
|
|
259 </test>
|
|
260 </tests>
|
|
261 <help><![CDATA[
|
|
262 This tool is a wrapper for the WGCNA R library. Please see the online
|
|
263 WGCNA tutorial for further details.
|
|
264 ]]>
|
|
265 </help>
|
|
266 <citations>
|
|
267 <citation type="bibtex">
|
|
268 @article{langfelder2008wgcna,
|
|
269 title={WGCNA: an R package for weighted correlation network analysis},
|
|
270 author={Langfelder, Peter and Horvath, Steve},
|
|
271 journal={BMC bioinformatics},
|
|
272 volume={9},
|
|
273 number={1},
|
|
274 pages={559},
|
|
275 year={2008},
|
|
276 publisher={BioMed Central}
|
|
277 }
|
|
278 </citation>
|
|
279 <citation type="bibtex">
|
|
280 @article{allaire2016rmarkdown,
|
|
281 title={rmarkdown: Dynamic Documents for R, 2016},
|
|
282 author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
|
|
283 journal={R package version 0.9},
|
|
284 volume={6},
|
|
285 year={2016}
|
|
286 }
|
|
287 </citation>
|
|
288 <citation type="bibtex">
|
|
289 @book{xie2015dynamic,
|
|
290 title={Dynamic Documents with R and knitr},
|
|
291 author={Xie, Yihui},
|
|
292 volume={29},
|
|
293 year={2015},
|
|
294 publisher={CRC Press}
|
|
295 }
|
|
296 </citation>
|
|
297 <citation type="bibtex">
|
|
298 @misc{dt2016,
|
|
299 title = {DT: A Wrapper of the JavaScript Library 'DataTables'},
|
|
300 author = {Yihui Xie},
|
|
301 year = {2016},
|
|
302 note = {R package version 0.2},
|
|
303 url = {https://CRAN.R-project.org/package=DT},
|
|
304 }
|
|
305 </citation>
|
|
306 </citations>
|
|
307 </tool>
|