1
|
1 <tool id="aurora_wgcna" name="Aurora Galaxy WGCNA" version="1.0.0">
|
|
2 <description>
|
|
3 Identify gene co-expression network modules using WGCNA.
|
|
4 </description>
|
|
5 <requirements>
|
|
6 <requirement type="package" version="2.7.3">pandoc</requirement>
|
|
7 <requirement type="package" version="1.20.3">r-getopt</requirement>
|
|
8 <requirement type="package" version="1.12">r-rmarkdown</requirement>
|
|
9 <requirement type="package" version="1.8.4">r-plyr</requirement>
|
|
10 <requirement type="package" version="0.10">r-dt</requirement>
|
|
11 <requirement type="package" version="0.4.0">r-htmltools</requirement>
|
|
12 <requirement type="package" version="3.2.1">r-ggplot2</requirement>
|
|
13 <requirement type="package" version="1.4.3">r-reshape2</requirement>
|
|
14 <requirement type="package" version="6.0_83">r-caret</requirement>
|
7
|
15 <requirement type="package" version="1.68">r-wgcna</requirement>
|
6
|
16 <requirement type="package" version="0.12">r-tinytex</requirement>
|
12
|
17 <requirement type="package" version="20180414">texlive-core</requirement>
|
1
|
18 </requirements>
|
|
19 <stdio>
|
|
20 <regex match="Execution halted"
|
|
21 source="both"
|
|
22 level="fatal"
|
|
23 description="Execution halted." />
|
|
24 <regex match="Error in"
|
|
25 source="both"
|
|
26 level="fatal"
|
|
27 description="An undefined error occured, please check your intput carefully and contact your administrator." />
|
|
28 <regex match="Fatal error"
|
|
29 source="both"
|
|
30 level="fatal"
|
|
31 description="An undefined error occured, please check your intput carefully and contact your administrator." />
|
|
32 </stdio>
|
|
33 <command>
|
|
34 <![CDATA[
|
|
35 export TOOL_INSTALL_DIR='${__tool_directory__}' &&
|
|
36
|
|
37 Rscript '${__tool_directory__}/aurora_wgcna_render.R'
|
|
38 -e $expression_data
|
|
39 -s $min_cluster_size
|
|
40 -b $block_size
|
|
41 -j $hard_threshold
|
|
42 #if $trait_info.trait_data
|
|
43 -t $trait_info.trait_data
|
|
44 -c $trait_info.sname_col
|
|
45 #if $trait_info.missing_value2
|
|
46 -o "$trait_info.missing_value2"
|
12
|
47 #else
|
1
|
48 -o "NA"
|
|
49 #end if
|
|
50 #if $trait_info.one_hot_cols
|
|
51 -y $trait_info.one_hot_cols
|
|
52 #end if
|
|
53 #if $trait_info.ignore_cols
|
|
54 -x $trait_info.ignore_cols
|
|
55 #end if
|
|
56 #end if
|
|
57 #if $height_cut
|
|
58 -h $height_cut
|
|
59 #end if
|
|
60 #if $power
|
|
61 -p $power
|
|
62 #end if
|
|
63 -l $render_log_file
|
|
64 -a $r_data
|
|
65 -k $gene_module_file
|
|
66 -w $network_edges_file
|
|
67 -g $gene_association_file
|
|
68 -m $module_association_file
|
|
69 -q $module_association_report
|
|
70 -r $network_construction_report
|
|
71 -z $updated_trait_matrix
|
|
72 -d $filtered_GEM
|
|
73 -i $missing_value1
|
|
74 ]]>
|
|
75 </command>
|
|
76 <inputs>
|
|
77 <param
|
|
78 type="data"
|
|
79 name="expression_data"
|
|
80 format="csv"
|
|
81 optional="false"
|
|
82 label="Gene expression data"
|
|
83 help="The gene expression data is an n x m matrix where n rows are the genes, m columns are the samples and the elements represent gene expression levels (derived either from Microarray or RNA-Seq). The matrix should be stored in a comma-separated (CSV) file and it must have a header. The gene names must appear as the first column of data in the file."
|
|
84 />
|
|
85 <param
|
|
86 type="text"
|
|
87 value="NA"
|
|
88 name="missing_value1"
|
|
89 optional="false"
|
|
90 label="Missing Value Identifier"
|
|
91 help="Within the gene expression data some genes may have missing values. If so, please indicate the text that is used to identify a missing value. Some common examples include: NA, 0.0, 0, -Inf. Any expression level that exactly matches the value provided will be considered a missing value."
|
|
92 />
|
|
93 <param
|
|
94 type="float"
|
|
95 value=""
|
|
96 name="height_cut"
|
|
97 optional="true"
|
|
98 label="Outlier Dendrogram Cut Height"
|
|
99 help="When checking for outliers, WGCNA performs hierarchical clustering. The resulting dendrogram can be cut at the given height to remove outliers. If no value is provided a cut height will automatically be determined. Try running this tool first without providing a value. Return and set a value if the results are not adequate."
|
|
100 />
|
|
101 <param
|
|
102 type="integer"
|
|
103 value=""
|
|
104 name="power"
|
|
105 optional="true"
|
|
106 label="Power"
|
|
107 help="Prior to network construction, WGCNA recommends that the gene expression data is raised to a power. The exact power that should be used will be automatically determined. Try running this tool first without providing a value. Return and set a value if the results are not adequate."
|
|
108 />
|
|
109 <param
|
|
110 type="integer"
|
|
111 value="30"
|
|
112 name="min_cluster_size"
|
|
113 optional="false"
|
|
114 label="Minimum Module Size"
|
|
115 help="The minimum module size. Modules smaller than this will not be included in the network."
|
|
116 />
|
|
117 <param
|
|
118 type="integer"
|
|
119 value="5000"
|
|
120 min="1000"
|
|
121 max="10000"
|
|
122 name="block_size"
|
|
123 optional="false"
|
|
124 label="Block Size"
|
|
125 help="Constructing a network can use an extreme amount of memory if the number of genes is high. The block size enables WGCNA to divide the data into blocks of genes with similar expression reducing the amount of memory used. The block size indicates the maximum number of genes that can be used in a block. The total number of blocks used will be the total genes divided by this number (plus 1 for any remainder)."
|
|
126 />
|
|
127 <param
|
|
128 type="float"
|
5
|
129 value="0.2"
|
1
|
130 name="hard_threshold"
|
|
131 optional="false"
|
|
132 label="Hard Threshold"
|
|
133 min="0"
|
|
134 max="1"
|
|
135 help="While WGCNA uses a soft thresholding approach for finding modules and constructing gene similarity, when exporting the network for display as a graph a hard threshold is still required. For WGCNA, the threhshold is applied to the Euclidian distance between all genes. But, there is no set prescribed method to decide on a proper hard threshold value. Set a threshold now, then you can apply filters later (such as in Cytoscape) to remove low weighted edges if desired."
|
|
136 />
|
|
137 <section name="trait_info" title="Trait/Phenotype" expanded="true">
|
|
138 <param
|
|
139 type="data"
|
|
140 name="trait_data"
|
|
141 format="csv"
|
|
142 optional="true"
|
|
143 label="Trait/Phenotype Data Matrix"
|
|
144 help="The trait/phenotype data is an n x m matrix where n is the samples and m are the features such as experimental condition, biosample properties, traits or phenotype values. The matrix should be stored in a comma-separated (CSV) file. It must have a header."
|
|
145 />
|
|
146 <param
|
|
147 type="text"
|
|
148 value="NA"
|
|
149 name="missing_value2"
|
|
150 optional="true"
|
|
151 label="Missing Value Identifier"
|
|
152 help="Within the trait data some columns may have missing values. If so, please indicate the text that is used to identify a missing value. Some common examples include: NA, 0.0, 0, -Inf. Any value that exactly matches the value provided will be considered a missing value."
|
|
153 />
|
|
154 <param
|
|
155 type="integer"
|
|
156 value="1"
|
|
157 name="sname_col"
|
|
158 optional="true"
|
|
159 label="Sample Name Column"
|
|
160 help="The number of the column (starting from 1) in the sample annotation data file where the sample name column is found."
|
|
161 />
|
|
162 <param
|
|
163 type="text"
|
|
164 value=""
|
|
165 name="one_hot_cols"
|
|
166 optional="true"
|
|
167 label="Categorical Columns"
|
|
168 help="Categorical columns in the sample daa matrix must be '1-hot encoded'. This means that each categorical column is expanded into new columns (one for each category) and the values for the new columns are set to 1 if the sample has the category and 0 if not. This value should be a comma-separated list of column names with no spaces between column names. Please make sure the column headers only have alphanumeric characters and underscores. If you do not specify categorical column names then they will be converted to factors and treated as ordinal data."
|
|
169 />
|
|
170 <param
|
|
171 type="text"
|
|
172 value=""
|
|
173 name="ignore_cols"
|
|
174 optional="true"
|
|
175 label="Columns to Ignore"
|
|
176 help="The names of columns in the sample data matrix that should be ignored. This value should be a comma-separated list of column names with no spaces between column names. Please make sure the column headers only have alphanumeric characters and underscores."
|
|
177 />
|
|
178 </section>
|
|
179 </inputs>
|
|
180 <outputs>
|
|
181 <data
|
|
182 name="network_construction_report"
|
|
183 format="pdf"
|
|
184 label="network_construction_report.pdf"
|
|
185 />
|
|
186 <data
|
|
187 name="filtered_GEM"
|
|
188 format="csv"
|
|
189 label="filtered_GEM.csv">
|
|
190 </data>
|
|
191 <data
|
|
192 name="gene_module_file"
|
|
193 format="csv"
|
|
194 label="gene_module_file.csv"
|
|
195 />
|
|
196 <data
|
|
197 name="network_edges_file"
|
|
198 format="tabular"
|
|
199 label="network_edges.txt"
|
|
200 />
|
|
201 <data
|
|
202 name="module_association_report"
|
|
203 format="pdf"
|
|
204 label="module_association_report.pdf">
|
|
205 <filter>trait_data != None</filter>
|
|
206 </data>
|
|
207 <data
|
|
208 name="gene_association_file"
|
|
209 format="csv"
|
|
210 label="gene_association.csv">
|
|
211 <filter>trait_data != None</filter>
|
|
212 </data>
|
|
213 <data
|
|
214 name="module_association_file"
|
|
215 format="csv"
|
|
216 label="module_association.csv">
|
|
217 <filter>trait_data != None</filter>
|
|
218 </data>
|
|
219 <data
|
|
220 name="updated_trait_matrix"
|
|
221 format="csv"
|
|
222 label="updated_trait_matrix.csv">
|
|
223 <filter>trait_data != None</filter>
|
|
224 </data>
|
|
225 <data
|
|
226 name="render_log_file"
|
|
227 format="txt"
|
|
228 label="render_log_file"
|
|
229 hidden="false"
|
|
230 />
|
|
231 <data
|
|
232 name="r_data"
|
|
233 format="rdata"
|
|
234 label="aurora_wgcna.RData"
|
|
235 hidden="true"
|
|
236 />
|
|
237 <collection name="figures" type="list" label="Figures">
|
12
|
238 <discover_datasets
|
|
239 pattern="__designation_and_ext__"
|
|
240 directory="figures"
|
1
|
241 visible="false" />
|
|
242 </collection>
|
|
243 </outputs>
|
|
244 <tests>
|
|
245 <test>
|
|
246 <param name='expression_data' value="LiverFemale3600.gem.csv"/>
|
|
247 <output name="gene_module_file" file="gene_module.csv"/>
|
|
248 <output name="network_edges_file" file="network_edges.txt"/>
|
|
249 <output name="network_construction_report" file="network_construction_report.html"/>
|
|
250 </test>
|
|
251 <test>
|
|
252 <param name='expression_data' value="LiverFemale3600.gem.csv"/>
|
|
253 <param name='trait_data' value="ClinicalTraits.csv"/>
|
|
254 <output name="gene_module_file" file="gene_module.csv"/>
|
|
255 <output name="network_edges_file" file="network_edges.txt"/>
|
|
256 <output name="gene_association_file" file="gene_association_file.csv"/>
|
|
257 <output name="module_association_file" file="module_association_file.csv"/>
|
|
258 <output name="module_association_report" file="module_association_report.html"/>
|
|
259 <output name="network_construction_report" file="network_construction_report.html"/>
|
|
260 </test>
|
|
261 </tests>
|
|
262 <help><![CDATA[
|
|
263 This tool is a wrapper for the WGCNA R library. Please see the online
|
|
264 WGCNA tutorial for further details.
|
|
265 ]]>
|
|
266 </help>
|
|
267 <citations>
|
|
268 <citation type="bibtex">
|
|
269 @article{langfelder2008wgcna,
|
|
270 title={WGCNA: an R package for weighted correlation network analysis},
|
|
271 author={Langfelder, Peter and Horvath, Steve},
|
|
272 journal={BMC bioinformatics},
|
|
273 volume={9},
|
|
274 number={1},
|
|
275 pages={559},
|
|
276 year={2008},
|
|
277 publisher={BioMed Central}
|
|
278 }
|
|
279 </citation>
|
|
280 <citation type="bibtex">
|
|
281 @article{allaire2016rmarkdown,
|
|
282 title={rmarkdown: Dynamic Documents for R, 2016},
|
|
283 author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
|
|
284 journal={R package version 0.9},
|
|
285 volume={6},
|
|
286 year={2016}
|
|
287 }
|
|
288 </citation>
|
|
289 <citation type="bibtex">
|
|
290 @book{xie2015dynamic,
|
|
291 title={Dynamic Documents with R and knitr},
|
|
292 author={Xie, Yihui},
|
|
293 volume={29},
|
|
294 year={2015},
|
|
295 publisher={CRC Press}
|
|
296 }
|
|
297 </citation>
|
|
298 <citation type="bibtex">
|
|
299 @misc{dt2016,
|
|
300 title = {DT: A Wrapper of the JavaScript Library 'DataTables'},
|
|
301 author = {Yihui Xie},
|
|
302 year = {2016},
|
|
303 note = {R package version 0.2},
|
|
304 url = {https://CRAN.R-project.org/package=DT},
|
|
305 }
|
|
306 </citation>
|
|
307 </citations>
|
|
308 </tool>
|