comparison masigpro.xml @ 0:c8c290f3ea7d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/masigpro commit 5798bd978553dee97521c39920d263dd750e0755
author iuc
date Mon, 15 May 2017 07:29:03 -0400
parents
children cc96abdef027
comparison
equal deleted inserted replaced
-1:000000000000 0:c8c290f3ea7d
1 <tool id="masigpro" name="maSigPro" version="1.49.0.0">
2 <description>Significant Gene Expression Profile Differences in Time Course Gene Expression Data</description>
3 <requirements>
4 <requirement type="package" version="1.49.0">bioconductor-masigpro</requirement>
5 <requirement type="package" version="1.3.2">r-optparse</requirement>
6 <requirement type="package" version="4.4">sed</requirement>
7 </requirements>
8 <stdio>
9 <regex match="Execution halted"
10 source="both"
11 level="fatal"
12 description="Execution halted." />
13 <regex match="Error in"
14 source="both"
15 level="fatal"
16 description="An undefined error occurred, please check your input carefully and contact your administrator." />
17 <regex match="Fatal error"
18 source="both"
19 level="fatal"
20 description="An undefined error occurred, please check your input carefully and contact your administrator." />
21 </stdio>
22 <version_command>
23 <![CDATA[
24 echo $(R --version | grep version | grep -v GNU)", maSigPro version" $(R --vanilla --slave -e "library(maSigPro); cat(sessionInfo()\$otherPkgs\$maSigPro\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
25 ]]>
26 </version_command>
27 <command>
28 <![CDATA[
29 #if str($source.source_selector) == "advanced":
30 paste
31 #set $start = True
32 #set $header = ''
33 #for $time in $source.rep_time:
34 #for $file in $time.files:
35 #if $start:
36 <(cut -f1 $file)
37 #set $start = False
38 #end if
39 #set $header += ' "' + $file.name + '"'
40 <(cut -f2 $file)
41 #end for
42 #end for
43 > data && sed -i '1i$header' data &&
44 #if $source.enable_output:
45 ln -f data $data_out && ln -f $design_matrix $edesign_out &&
46 #end if
47 #set $data = 'data'
48 #set $edesign = $design_matrix
49 #else:
50 #set $data = $source.data
51 #set $edesign = $source.edesign
52 #end if
53 Rscript '${__tool_directory__}/masigpro.R'
54 -e '$edesign'
55 -d '$data'
56 -o '$masigpro_out'
57 #if str($source.source_selector) == "defaults":
58 --time_col $source.time_col
59 --repl_col $source.repl_col
60 #end if
61 --degree $makeDesignMatrix.degree
62 --qvalue $p_vector.qvalue
63 --min_obs $p_vector.min_obs
64 --step_method '$Tfit.step_method'
65 --nvar_correction $Tfit.nvar_correction
66 --alfa $Tfit.alfa
67 --rsq $getSiggenes.rsq
68 --vars '$getSiggenes.vars'
69 --significant_intercept '$getSiggenes.significant_intercept'
70 #if $pdf.pdf_selector:
71 --cluster_data $pdf.seeGenes.clusterData
72 -k $pdf.seeGenes.k
73 --cluster_method $pdf.seeGenes.clustering.clusterMethod
74 #if str($pdf.seeGenes.clustering.clusterMethod) == "hclust":
75 --distance $pdf.seeGenes.clustering.distance
76 --agglo_method $pdf.seeGenes.clustering.aggloMethod
77 #end if
78 #if str($pdf.seeGenes.clustering.clusterMethod) == "kmeans":
79 --iter_max $pdf.seeGenes.clustering.iterMax
80 #end if
81 --color_mode $pdf.seeGenes.colorMode
82 --show_fit $pdf.seeGenes.showFit
83 --show_lines $pdf.seeGenes.showLines
84 --cexlab $pdf.seeGenes.cexlab
85 --legend $pdf.seeGenes.legend
86 #end if
87 ]]>
88 </command>
89 <configfiles>
90 <configfile name="design_matrix">#if str($source.source_selector) == "advanced":
91 #set $header = "Name Time Replicate"
92 #for $group in $source.rep_groups:
93 #set $header = $header + ' ' + str($group.name)
94 #end for
95 $header
96 #set $c = len($source.rep_repl) + 1
97 #for $time in $source.rep_time:
98 #for $file in $time.files:
99 #set $is_repl = False
100 #for $i, $repl in enumerate($source.rep_repl):
101 #if str($file) in str($repl.files):
102 #set $r = $i + 1
103 #set $is_repl = True
104 #end if
105 #end for
106 #if $is_repl == False:
107 #set $r = $c
108 #set $c += 1
109 #end if
110 #set $line = '"' + str($file.name) + '" ' + str($time.time) + ' ' + str($r)
111 #for $group in $source.rep_groups:
112 #if str($file) in str($group.files):
113 #set $line += " 1"
114 #else
115 #set $line += " 0"
116 #end if
117 #end for
118 $line
119 #end for
120 #end for
121 #end if
122 </configfile>
123 </configfiles>
124 <inputs>
125 <conditional name="source">
126 <param label="Choose data source" name="source_selector"
127 help="Choose if you want to provide seperate count files (e.g. from HTSeq-count or feature-seq)
128 and define your experiment design matrix here, or if you have maSigPro edesign and data input files already."
129 type="select">
130 <option value="defaults">Use maSigPro edesign and data files</option>
131 <option value="advanced">Seperate count data (e.g. from HTSeq-count or feature-count)</option>
132 </param>
133 <when value="defaults">
134 <param name="edesign" format="tabular,txt" type="data" label="Experiment matrix"
135 help="Matrix describing experimental design. Rows must be arrays and columns experiment descriptors" />
136 <param name="data" format="tabular,txt" type="data" label="Gene expression matrix"
137 help="Matrix containing normalized gene expression data. Genes must be in rows and arrays in columns" />
138 <param name="time_col" label="Column number containing time values" type="integer" value="1"
139 help="Column number in edesign containing time values. Default is first column" />
140 <param name="repl_col" label="Column number containing replicate coding" type="integer" value="2"
141 help="Column number in edesign containing coding for replicate arrays. Default is second column" />
142 </when>
143 <when value="advanced">
144 <param name="enable_output" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output generated maSigPro input files?"
145 help="Choose if you want to output the generated edesign and data files for direct use in maSigPro as history elements." />
146 <repeat name="rep_time" title="Time values" min="1" default="1">
147 <param name="time" type="integer" value="0" label="Specify a numerical time value" help="Only numbers will be allowed">
148 <sanitizer>
149 <valid initial="string.digits"></valid>
150 </sanitizer>
151 </param>
152 <param name="files" type="data" format="tabular" multiple="true" label="Counts file(s) at this time value" />
153 </repeat>
154 <repeat name="rep_groups" title="Experimental groups" min="1" default="1">
155 <param name="name" type="text" value="Group title" label="Specify the name of this experimental group"
156 help="Use a single name without spaces or special characters">
157 </param>
158 <param name="files" type="data" format="tabular" multiple="true"
159 label="Counts file(s) belonging to this experimental group" />
160 </repeat>
161 <repeat name="rep_repl" title="Replicates" min="0" default="0">
162 <param name="files" type="data" format="tabular" multiple="true" label="Counts files that are replicates" />
163 </repeat>
164 </when>
165 </conditional>
166 <section name="makeDesignMatrix"
167 title="Step 1: make.Design.Matrix - Defining the regression model"
168 help="‘make.design.matrix’ creates the design matrix of dummies for
169 fitting time series micorarray gene expression experiments.">
170 <param name="degree" type="integer" value="1"
171 label="Degree of regression fit polynome"
172 help="The degree of the regression fit polynome. ‘degree’ = 1 returns
173 linear regression, ‘degree’ = 2 returns quadratic regression, etc" />
174 </section>
175 <section name="p_vector"
176 title="Step 2: p.vector - Finding significant genes"
177 help="‘p.vector’ performs a regression fit for each gene taking all
178 variables present in the model given by a regression matrix and
179 returns a list of FDR corrected significant genes">
180 <param name="qvalue" type="float" value="0.05" label="Q" help="Significance level" />
181 <param name="min_obs" label="Minimum values" type="integer" value="6"
182 help="Genes with less than this number of true numerical values
183 will be excluded from the analysis. Minimum value to estimate
184 the model is (degree+1)xGroups+1. Default is 6." />
185 </section>
186 <section name="Tfit" title="Step 3: T.fit - Finding significant differences"
187 help="‘T.fit’ selects the best regression model for each gene using
188 stepwise regression. In the maSigPro approach ‘p.vector’ and ‘T.fit’ are subsequent
189 steps, meaning that significant genes are first selected on the
190 basis of a general model and then the significant variables for
191 each gene are found by step-wise regression.">
192 <param name="step_method" type="select" label="Step regression"
193 help="The step regression can be ‘backward’ or ‘forward’ indicating
194 whether the step procedure starts from the model with all or none
195 variables. With the ‘two.ways.backward’ or ‘two.ways.forward’
196 options the variables are both allowed to get in and out. At each
197 step the p-value of each variable is computed and variables get
198 in/out the model when this p-value is lower or higher than given
199 threshold alfa.">
200 <option selected="True" value="backward">backward</option>
201 <option value="forward">forward</option>
202 <option value="two.ways.backward">two.ways.backward</option>
203 <option value="two.ways.forward">two.ways.forward</option>
204 </param>
205 <param type="boolean" name="nvar_correction" label="nvar correction" truevalue="TRUE" falsevalue="FALSE" checked="false"
206 help="When nvar.correction is TRUE the given significance
207 level is corrected by the number of variables in the model.">
208 <option selected="True" value="FALSE">False</option>
209 <option value="TRUE">True</option>
210 </param>
211 <param name="alfa" type="float" value="0.05" label="alfa" help="Significance level used for variable selection in the stepwise regression" />
212 </section>
213 <section name="getSiggenes"
214 title="Step 4: get.siggenes - Obtaining lists of significant genes"
215 help="This function creates lists of significant genes for a set of
216 variables whose significance value has been computed with the
217 ‘T.fit’ function.">
218 <param name="rsq" type="float" value="0.7" label="rsq"
219 help="cut-off level at the R-squared value for the stepwise
220 regression fit. Only genes with R-squared more than rsq are
221 selected" />
222 <param name="vars" type="select" label="Variables"
223 help="Variables for which to extract significant genes.
224 ‘all’: generates one single matrix or gene list with all
225 significant genes.
226
227 ‘each’: generates as many significant genes extractions as
228 variables in the general regression model. Each extraction
229 contains the significant genes for that variable.
230
231 ‘groups’: generates a significant genes extraction for each
232 experimental group.
233
234 The difference between ‘each’ and ‘groups’ is that in the
235 first case the variables of the same group (e.g. ‘TreatmentA’
236 and ‘time*TreatmentA’) will be extracted separately and in t
237 he
238 second case jointly.">
239 <option selected="True" value="groups">Groups</option>
240 <option value="each">Each</option>
241 <option value="all">All</option>
242 </param>
243 <param name="significant_intercept" type="select" label="Significant intercept"
244 help="The argument ‘significant.intercept’ modulates the treatment for
245 intercept coefficients to apply for selecting significant genes
246 when ‘vars’ equals ‘groups’. There are three possible values:
247 ‘none’, no significant intercept (differences) are considered
248 for significant gene selection, ‘dummy’, includes genes with
249 significant intercept differences between control and experimental
250 groups, and ‘all’ when both significant intercept coefficient
251 for the control group and significant intercept differences are
252 considered for selecting significant genes.">
253 <option selected="True" value="dummy">Dummy</option>
254 <option value="none">None</option>
255 <option value="all">All</option>
256 </param>
257 </section>
258 <conditional name="pdf">
259 <param label="Generate visualization PDF" name="pdf_selector" type="boolean"
260 truevalue="1" falsevalue="0" checked="true"
261 help="Choose if you want to generate a PDF file containing the visualizations" />
262 <when value="1">
263 <section name="seeGenes" title="Step 5: see.genes - Visualization"
264 help="This function provides visualisation tools for gene expression
265 values in a time course experiment. The function first calls the
266 heatmap function for a general overview of experiment results.
267 Next a partioning of the data is generated using a clustering
268 method. The results of the clustering are visualized both as gene
269 expression profiles extended along all arrays in the experiment,
270 as provided by the plot.profiles function, and as summary
271 expression profiles for comparison among experimental groups.">
272 <param name="clusterData" label="Cluster Data" type="integer" value="1"
273 help="Data clustering can be done on the basis of either the original
274 expression values, the regression coefficients, or the t.scores.
275 In case ‘data’ is a ‘get.siggenes’ object, this is given by
276 providing the element names of the list
277 ‘c(sig.profiles,coefficients,t.score)’ of their list
278 position (1,2 or 3)." />
279 <param name="k" type="integer" label="Number of clusters for data partioning" value="9" />
280 <conditional name="clustering">
281 <param name="clusterMethod" label="Cluster Method" type="select"
282 help="clustering method for data partioning. Currently
283 ‘hclust’, ‘kmeans’ and ‘Mclust’ are supported">
284 <option selected="True" value="hclust">hclust</option>
285 <option value="kmeans">kmeans</option>
286 <option value="Mclust">Mclust</option>
287 </param>
288 <when value="hclust">
289 <param name="distance" type="select" label="Distance measure"
290 help="Distance measurement function when ‘cluster.method’ is
291 ‘hclust’. Default uses correlation.">
292 <option selected="True" value="cor">Correlation</option>
293 <option value="euclidean">Euclidean</option>
294 <option value="maximum">Maximum</option>
295 <option value="manhattan">Manhattan</option>
296 <option value="Canberra">Canberra</option>
297 <option value="binary">Binary</option>
298 <option value="minkowski">Minkowski</option>
299 </param>
300 <param name="aggloMethod" type="select" label="Agglomeration method"
301 help="The agglomeration method to be used when ‘cluster.method’ is ‘hclust’.">
302 <option selected="True" value="ward.D">ward.D</option>
303 <option value="ward.D2">ward.D2</option>
304 <option value="single">single</option>
305 <option value="complete">complete</option>
306 <option value="average">average (= UPGMA)</option>
307 <option value="mcquitty">mcquitty (= WPGMA)</option>
308 <option value="median">median (= WPGMC)</option>
309 <option value="centroid">centroid (= UPGMC)</option>
310 </param>
311 </when>
312 <when value="kmeans">
313 <param name="iterMax" type="integer" label="Maximum number of iterations" value="500"
314 help="Maximum number of iterations when ‘cluster.method’ is ‘kmeans’" />
315 </when>
316 </conditional>
317 <param name="colorMode" label="Color Mode" type="select" help="Color scale for plotting profiles. Can be either ‘rainbow’ or ‘gray’">
318 <option selected="True" value="rainbow">Rainbow</option>
319 <option value="gray">Gray</option>
320 </param>
321 <param name="showFit" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Show regression fit curves?"
322 help="Indicating whether regression fit curves must be plotted" />
323 <param name="showLines" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Draw lines?"
324 help="Indicating whether a line must be drawn joining plotted data points for each group" />
325 <param name="cexlab" type="float" value="0.8" label="Magnification for x labels"
326 help="Graphical parameter maginfication to be used for x labels in plotting functions" />
327 <param name="legend" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Add legend to plotting profiles?"
328 help="Indicating whether legend must be added when plotting profiles" />
329 </section>
330 </when>
331 </conditional>
332 </inputs>
333 <outputs>
334 <data format="tabular" name="masigpro_out" label="maSigPro result file on ${on_string}">
335 </data>
336 <data format="txt" name="edesign_out" label="maSigPro edesign file on ${on_string}">
337 <filter>
338 ((
339 source['source_selector'] == 'advanced' and
340 source['enable_output'] == True
341 ))
342 </filter>
343 </data>
344 <data format="txt" name="data_out" label="maSigPro data file on ${on_string}">
345 <filter>
346 ((
347 source['source_selector'] == 'advanced' and
348 source['enable_output'] == True
349 ))
350 </filter>
351 </data>
352 <data format="pdf" name="pdf_out" from_work_dir="Results.pdf" label="maSigPro Plot file on ${on_string}">
353 <filter>
354 ((
355 pdf['pdf_selector'] == True
356 ))
357 </filter>
358 </data>
359 </outputs>
360 <tests>
361 <test>
362 <param name="source_selector" value="advanced" />
363 <param name="enable_output" value="1" />
364 <repeat name="rep_time">
365 <param name="time" value="1" />
366 <param name="files" value="control_1H.counts,treat_1H.counts" />
367 </repeat>
368 <repeat name="rep_time">
369 <param name="time" value="2" />
370 <param name="files" value="control_2H.counts,treat_2H.counts" />
371 </repeat>
372 <repeat name="rep_time">
373 <param name="time" value="3" />
374 <param name="files" value="control_3H.counts,treat_3H_1.counts,treat_3H_2.counts" />
375 </repeat>
376 <param name="replicates_selector" value="advanced" />
377 <repeat name="rep_repl">
378 <param name="files" value="treat_3H_1.counts,treat_3H_2.counts" />
379 </repeat>
380 <repeat name="rep_groups">
381 <param name="name" value="Control" />
382 <param name="files" value="control_1H.counts,control_2H.counts,control_3H.counts" />
383 </repeat>
384 <repeat name="rep_groups">
385 <param name="name" value="Treatment" />
386 <param name="files" value="treat_1H.counts,treat_2H.counts,treat_3H_1.counts,treat_3H_2.counts" />
387 </repeat>
388 <output name="masigpro_out" file="masigpro_out.tab" />
389 <output name="data_out" file="data_out.txt" />
390 <output name="edesign_out" file="edesign_out.txt" />
391 <output name="pdf_out" file="Results.pdf" />
392 </test>
393 <test>
394 <param name="source_selector" value="defaults" />
395 <param name="edesign" value="edesign_out.txt" />
396 <param name="data" value="data_out.txt" />
397 <output name="masigpro_out" file="masigpro_out.tab" />
398 <output name="pdf_out" file="Results.pdf" />
399 </test>
400 </tests>
401 <help>
402 <![CDATA[
403 .. class:: infomark
404
405 **What it does**
406
407 maSigPro_ is a regression based approach to find genes for which there are significant gene expression profile differences between experimental groups in time course microarray and RNA-Seq experiments.
408
409 **Inputs**
410
411 The maSigPro wrapper has two options for input data:
412
413 - directly through two seperate text files containing the experiment design (edesign) and the data or
414 - count tables generated from HTSeq-count. Count tables must be generated for each sample individually.
415
416 To set up an experimental design from seperate count files you first have to select which files belong to a certain time point.
417 Likewise you can specify which files are replicates. In a third step you have to create the experimental groups and select the related files.
418 For a more comfortable setup in future analysis you have the option to output the generated edesign and data files.
419
420 **Output**
421
422 maSigPro_ generates a summary file containing the list of significant genes. Additionally you can obtain a PDF file containing plots of profiles and groups that visualize the clustering analysis.
423
424 .. _maSigPro: https://bioconductor.org/packages/release/bioc/html/maSigPro.html
425 ]]>
426 </help>
427 <citations>
428 <citation type="doi">10.1093/bioinformatics/btl056</citation>
429 </citations>
430 </tool>