comparison aurora_wgcna.xml @ 1:c18d0db68d51 draft

Uploaded
author spficklin
date Fri, 22 Nov 2019 19:45:05 -0500
parents
children 1915371bea02
comparison
equal deleted inserted replaced
0:66ef158fa85c 1:c18d0db68d51
1 <tool id="aurora_wgcna" name="Aurora Galaxy WGCNA" version="1.0.0">
2 <description>
3 Identify gene co-expression network modules using WGCNA.
4 </description>
5 <requirements>
6 <requirement type="package" version="2.7.3">pandoc</requirement>
7 <requirement type="package" version="1.20.3">r-getopt</requirement>
8 <requirement type="package" version="1.12">r-rmarkdown</requirement>
9 <requirement type="package" version="1.8.4">r-plyr</requirement>
10 <requirement type="package" version="0.10">r-dt</requirement>
11 <requirement type="package" version="0.4.0">r-htmltools</requirement>
12 <requirement type="package" version="3.2.1">r-ggplot2</requirement>
13 <requirement type="package" version="1.4.3">r-reshape2</requirement>
14 <requirement type="package" version="6.0_83">r-caret</requirement>
15 <requirement type="package" version="1.68">r-wgcna</requirement>
16 </requirements>
17 <stdio>
18 <regex match="Execution halted"
19 source="both"
20 level="fatal"
21 description="Execution halted." />
22 <regex match="Error in"
23 source="both"
24 level="fatal"
25 description="An undefined error occured, please check your intput carefully and contact your administrator." />
26 <regex match="Fatal error"
27 source="both"
28 level="fatal"
29 description="An undefined error occured, please check your intput carefully and contact your administrator." />
30 </stdio>
31 <command>
32 <![CDATA[
33 export TOOL_INSTALL_DIR='${__tool_directory__}' &&
34
35 Rscript '${__tool_directory__}/aurora_wgcna_render.R'
36 -e $expression_data
37 -s $min_cluster_size
38 -b $block_size
39 -j $hard_threshold
40 #if $trait_info.trait_data
41 -t $trait_info.trait_data
42 -c $trait_info.sname_col
43 #if $trait_info.missing_value2
44 -o "$trait_info.missing_value2"
45 #else
46 -o "NA"
47 #end if
48 #if $trait_info.one_hot_cols
49 -y $trait_info.one_hot_cols
50 #end if
51 #if $trait_info.ignore_cols
52 -x $trait_info.ignore_cols
53 #end if
54 #end if
55 #if $height_cut
56 -h $height_cut
57 #end if
58 #if $power
59 -p $power
60 #end if
61 -l $render_log_file
62 -a $r_data
63 -k $gene_module_file
64 -w $network_edges_file
65 -g $gene_association_file
66 -m $module_association_file
67 -q $module_association_report
68 -r $network_construction_report
69 -z $updated_trait_matrix
70 -d $filtered_GEM
71 -i $missing_value1
72 ]]>
73 </command>
74 <inputs>
75 <param
76 type="data"
77 name="expression_data"
78 format="csv"
79 optional="false"
80 label="Gene expression data"
81 help="The gene expression data is an n x m matrix where n rows are the genes, m columns are the samples and the elements represent gene expression levels (derived either from Microarray or RNA-Seq). The matrix should be stored in a comma-separated (CSV) file and it must have a header. The gene names must appear as the first column of data in the file."
82 />
83 <param
84 type="text"
85 value="NA"
86 name="missing_value1"
87 optional="false"
88 label="Missing Value Identifier"
89 help="Within the gene expression data some genes may have missing values. If so, please indicate the text that is used to identify a missing value. Some common examples include: NA, 0.0, 0, -Inf. Any expression level that exactly matches the value provided will be considered a missing value."
90 />
91 <param
92 type="float"
93 value=""
94 name="height_cut"
95 optional="true"
96 label="Outlier Dendrogram Cut Height"
97 help="When checking for outliers, WGCNA performs hierarchical clustering. The resulting dendrogram can be cut at the given height to remove outliers. If no value is provided a cut height will automatically be determined. Try running this tool first without providing a value. Return and set a value if the results are not adequate."
98 />
99 <param
100 type="integer"
101 value=""
102 name="power"
103 optional="true"
104 label="Power"
105 help="Prior to network construction, WGCNA recommends that the gene expression data is raised to a power. The exact power that should be used will be automatically determined. Try running this tool first without providing a value. Return and set a value if the results are not adequate."
106 />
107 <param
108 type="integer"
109 value="30"
110 name="min_cluster_size"
111 optional="false"
112 label="Minimum Module Size"
113 help="The minimum module size. Modules smaller than this will not be included in the network."
114 />
115 <param
116 type="integer"
117 value="5000"
118 min="1000"
119 max="10000"
120 name="block_size"
121 optional="false"
122 label="Block Size"
123 help="Constructing a network can use an extreme amount of memory if the number of genes is high. The block size enables WGCNA to divide the data into blocks of genes with similar expression reducing the amount of memory used. The block size indicates the maximum number of genes that can be used in a block. The total number of blocks used will be the total genes divided by this number (plus 1 for any remainder)."
124 />
125 <param
126 type="float"
127 value="0.5"
128 name="hard_threshold"
129 optional="false"
130 label="Hard Threshold"
131 min="0"
132 max="1"
133 help="While WGCNA uses a soft thresholding approach for finding modules and constructing gene similarity, when exporting the network for display as a graph a hard threshold is still required. For WGCNA, the threhshold is applied to the Euclidian distance between all genes. But, there is no set prescribed method to decide on a proper hard threshold value. Set a threshold now, then you can apply filters later (such as in Cytoscape) to remove low weighted edges if desired."
134 />
135 <section name="trait_info" title="Trait/Phenotype" expanded="true">
136 <param
137 type="data"
138 name="trait_data"
139 format="csv"
140 optional="true"
141 label="Trait/Phenotype Data Matrix"
142 help="The trait/phenotype data is an n x m matrix where n is the samples and m are the features such as experimental condition, biosample properties, traits or phenotype values. The matrix should be stored in a comma-separated (CSV) file. It must have a header."
143 />
144 <param
145 type="text"
146 value="NA"
147 name="missing_value2"
148 optional="true"
149 label="Missing Value Identifier"
150 help="Within the trait data some columns may have missing values. If so, please indicate the text that is used to identify a missing value. Some common examples include: NA, 0.0, 0, -Inf. Any value that exactly matches the value provided will be considered a missing value."
151 />
152 <param
153 type="integer"
154 value="1"
155 name="sname_col"
156 optional="true"
157 label="Sample Name Column"
158 help="The number of the column (starting from 1) in the sample annotation data file where the sample name column is found."
159 />
160 <param
161 type="text"
162 value=""
163 name="one_hot_cols"
164 optional="true"
165 label="Categorical Columns"
166 help="Categorical columns in the sample daa matrix must be '1-hot encoded'. This means that each categorical column is expanded into new columns (one for each category) and the values for the new columns are set to 1 if the sample has the category and 0 if not. This value should be a comma-separated list of column names with no spaces between column names. Please make sure the column headers only have alphanumeric characters and underscores. If you do not specify categorical column names then they will be converted to factors and treated as ordinal data."
167 />
168 <param
169 type="text"
170 value=""
171 name="ignore_cols"
172 optional="true"
173 label="Columns to Ignore"
174 help="The names of columns in the sample data matrix that should be ignored. This value should be a comma-separated list of column names with no spaces between column names. Please make sure the column headers only have alphanumeric characters and underscores."
175 />
176 </section>
177 </inputs>
178 <outputs>
179 <data
180 name="network_construction_report"
181 format="pdf"
182 label="network_construction_report.pdf"
183 />
184 <data
185 name="filtered_GEM"
186 format="csv"
187 label="filtered_GEM.csv">
188 </data>
189 <data
190 name="gene_module_file"
191 format="csv"
192 label="gene_module_file.csv"
193 />
194 <data
195 name="network_edges_file"
196 format="tabular"
197 label="network_edges.txt"
198 />
199 <data
200 name="module_association_report"
201 format="pdf"
202 label="module_association_report.pdf">
203 <filter>trait_data != None</filter>
204 </data>
205 <data
206 name="gene_association_file"
207 format="csv"
208 label="gene_association.csv">
209 <filter>trait_data != None</filter>
210 </data>
211 <data
212 name="module_association_file"
213 format="csv"
214 label="module_association.csv">
215 <filter>trait_data != None</filter>
216 </data>
217 <data
218 name="updated_trait_matrix"
219 format="csv"
220 label="updated_trait_matrix.csv">
221 <filter>trait_data != None</filter>
222 </data>
223 <data
224 name="render_log_file"
225 format="txt"
226 label="render_log_file"
227 hidden="false"
228 />
229 <data
230 name="r_data"
231 format="rdata"
232 label="aurora_wgcna.RData"
233 hidden="true"
234 />
235 <collection name="figures" type="list" label="Figures">
236 <discover_datasets
237 pattern="__designation_and_ext__"
238 directory="figures"
239 visible="false" />
240 </collection>
241 </outputs>
242 <tests>
243 <test>
244 <param name='expression_data' value="LiverFemale3600.gem.csv"/>
245 <output name="gene_module_file" file="gene_module.csv"/>
246 <output name="network_edges_file" file="network_edges.txt"/>
247 <output name="network_construction_report" file="network_construction_report.html"/>
248 </test>
249 <test>
250 <param name='expression_data' value="LiverFemale3600.gem.csv"/>
251 <param name='trait_data' value="ClinicalTraits.csv"/>
252 <output name="gene_module_file" file="gene_module.csv"/>
253 <output name="network_edges_file" file="network_edges.txt"/>
254 <output name="gene_association_file" file="gene_association_file.csv"/>
255 <output name="module_association_file" file="module_association_file.csv"/>
256 <output name="module_association_report" file="module_association_report.html"/>
257 <output name="network_construction_report" file="network_construction_report.html"/>
258 </test>
259 </tests>
260 <help><![CDATA[
261 This tool is a wrapper for the WGCNA R library. Please see the online
262 WGCNA tutorial for further details.
263 ]]>
264 </help>
265 <citations>
266 <citation type="bibtex">
267 @article{langfelder2008wgcna,
268 title={WGCNA: an R package for weighted correlation network analysis},
269 author={Langfelder, Peter and Horvath, Steve},
270 journal={BMC bioinformatics},
271 volume={9},
272 number={1},
273 pages={559},
274 year={2008},
275 publisher={BioMed Central}
276 }
277 </citation>
278 <citation type="bibtex">
279 @article{allaire2016rmarkdown,
280 title={rmarkdown: Dynamic Documents for R, 2016},
281 author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
282 journal={R package version 0.9},
283 volume={6},
284 year={2016}
285 }
286 </citation>
287 <citation type="bibtex">
288 @book{xie2015dynamic,
289 title={Dynamic Documents with R and knitr},
290 author={Xie, Yihui},
291 volume={29},
292 year={2015},
293 publisher={CRC Press}
294 }
295 </citation>
296 <citation type="bibtex">
297 @misc{dt2016,
298 title = {DT: A Wrapper of the JavaScript Library 'DataTables'},
299 author = {Yihui Xie},
300 year = {2016},
301 note = {R package version 0.2},
302 url = {https://CRAN.R-project.org/package=DT},
303 }
304 </citation>
305 </citations>
306 </tool>