comparison deseq2.xml @ 0:d983d19fbbab draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 625a9632df44b1b623941e35c22f71f0e55ee371
author iuc
date Mon, 28 Sep 2015 05:07:29 -0400
parents
children 89bbd1dbf48d
comparison
equal deleted inserted replaced
-1:000000000000 0:d983d19fbbab
1 <tool id="deseq2" name="DESeq2" version="2.1.8.0">
2 <description>Determines differentially expressed features from count tables</description>
3 <requirements>
4 <!-- odering is crucial, otherwise R will override the ENV variables from deseq2 -->
5 <requirement type="package" version="1.8.1">deseq2</requirement>
6 </requirements>
7 <stdio>
8 <regex match="Execution halted"
9 source="both"
10 level="fatal"
11 description="Execution halted." />
12 <regex match="Error in"
13 source="both"
14 level="fatal"
15 description="An undefined error occured, please check your intput carefully and contact your administrator." />
16 <regex match="Fatal error"
17 source="both"
18 level="fatal"
19 description="An undefined error occured, please check your intput carefully and contact your administrator." />
20 </stdio>
21 <version_command>
22 <![CDATA[
23 echo $(R --version | grep version | grep -v GNU)", DESeq2 version" $(R --vanilla --slave -e "library(DESeq2); cat(sessionInfo()\$otherPkgs\$DESeq2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
24 ]]>
25 </version_command>
26 <command>
27 <![CDATA[
28 #import json
29 Rscript \$DESEQ2_ROOT_PATH/DESeq2/script/deseq2.R
30 -o "$deseq_out"
31 #if $pdf:
32 -p "$plots"
33 #end if
34 #set $temp_factor_names = list()
35 #for $factor in $rep_factorName:
36 #set $temp_factor = list()
37 #for $level in $factor.rep_factorLevel:
38 #set $count_files = list()
39 #for $file in $level.countsFile:
40 $count_files.append(str($file))
41 #end for
42 $temp_factor.append( {str($level.factorLevel): $count_files} )
43 #end for
44 $temp_factor_names.append([str($factor.factorName), $temp_factor])
45 #end for
46 -f '#echo json.dumps(temp_factor_names)#'
47 -t "$fit_type"
48 #if $outlier_replace_off:
49 -a
50 #end if
51 #if $outlier_filter_off:
52 -b
53 #end if
54 #if $many_contrasts:
55 -m
56 #end if
57 ]]>
58 </command>
59 <inputs>
60 <repeat name="rep_factorName" title="Factor" min="1">
61 <param name="factorName" type="text" value="FactorName" label="Specify a factor name"
62 help="Only letters, numbers and underscores will be retained in this field">
63 <sanitizer>
64 <valid initial="string.letters,string.digits"><add value="_" /></valid>
65 </sanitizer>
66 </param>
67 <repeat name="rep_factorLevel" title="Factor level" min="2" default="2">
68 <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level"
69 help="Only letters, numbers and underscores will be retained in this field">
70 <sanitizer>
71 <valid initial="string.letters,string.digits"><add value="_" /></valid>
72 </sanitizer>
73 </param>
74 <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts file(s)"/>
75 </repeat>
76 </repeat>
77 <param name="pdf" type="boolean" truevalue="1" falsevalue="0" checked="true"
78 label="Visualising the analysis results"
79 help="output an additional PDF files" />
80 <param name="many_contrasts" type="boolean" truevalue="1" falsevalue="0" checked="false"
81 label="Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)"
82 help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
83 <param name="fit_type" type="select" label="Fit type">
84 <option value="1" selected="true">parametric</option>
85 <option value="2">local</option>
86 <option value="3">mean</option>
87 </param>
88 <param name="outlier_replace_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
89 label="Turn off outliers replacement (only affects with >6 replicates)"
90 help="When there are more than 6 replicates for a given sample, the DESeq2 will automatically replace
91 counts with large Cook’s distance with the trimmed mean over all samples, scaled up by the size factor
92 or normalization factor for that sample" />
93 <param name="outlier_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
94 label="Turn off outliers filtering (only affects with >2 replicates)"
95 help="When there are more than 2 replicates for a given sample, the DESeq2 will automatically
96 filter genes which contain a Cook’s distance above a cutoff" />
97 <param name="auto_mean_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
98 label="Turn off independent filtering"
99 help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
100 </inputs>
101 <outputs>
102 <data format="tabular" name="deseq_out" label="DESeq2 result file on ${on_string}">
103 <filter>many_contrasts is False</filter>
104 </data>
105 <collection name="split_output" type="list" label="DESeq2 result files on ${on_string}">
106 <filter>many_contrasts is True</filter>
107 <discover_datasets pattern="vs" visible="true"/>
108 </collection>
109 <data format="pdf" name="plots" label="DESeq2 plots on ${on_string}">
110 <filter>pdf == True</filter>
111 </data>
112 </outputs>
113 <tests>
114 <test>
115 <repeat name="rep_factorName">
116 <param name="factorName" value="Treatment"/>
117 <repeat name="rep_factorLevel">
118 <param name="factorLevel" value="Treated"/>
119 <param name="countsFile" value="GSM461179_treat_single.counts,GSM461180_treat_paired.counts,GSM461181_treat_paired.counts"/>
120 </repeat>
121 <repeat name="rep_factorLevel">
122 <param name="factorLevel" value="Untreated"/>
123 <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/>
124 </repeat>
125 </repeat>
126 <param name="pdf" value="no"/>
127 <output name="deseq_out" file="deseq2_out.tab"/>
128 <output name="deseq_out_filtered" file="deseq2_out_filtered.tab"/>
129 </test>
130 </tests>
131 <help>
132 <![CDATA[
133 .. class:: infomark
134
135 **What it does**
136
137 Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution
138
139
140 **Inputs**
141
142 DESeq2_ takes count tables that generated from the htseq-count as input. Count tables must be generated for each sample individually. DESeq2 is capable of handling multiple factors that effect your experiment. The first factor you input is considered as the primary factor that affects gene expressions. You also input several secondary factors that might influence your experiment. But the final output will be changes in genes due to primary factor in presence of secondary factors. Each factor has two levels/states. You need to select appropriate count table from your history for each factor level.
143
144 The following table gives some examples of factors and their levels:
145
146 ========= ============== ===============
147 Factor Factor level 1 Factor level 2
148 --------- -------------- ---------------
149 Treatment Treated Untreated
150 --------- -------------- ---------------
151 Condition Knockdown Wildtype
152 --------- -------------- ---------------
153 TimePoint Day4 Day1
154 --------- -------------- ---------------
155 SeqType SingleEnd PairedEnd
156 --------- -------------- ---------------
157 Gender Female Male
158 ========= ============== ===============
159
160 *Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2. Here the order of factor levels is important. For example, for the factor 'Treatment' given in above table, DESeq2 computes fold changes of 'Treated' samples against 'Untreated', i.e. the values correspond to up or down regulations of genes in Treated samples.
161
162 **Output**
163
164 DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF.
165
166 ====== ==========================================================
167 Column Description
168 ------ ----------------------------------------------------------
169 1 Gene Identifiers
170 2 mean normalised counts, averaged over all samples from both conditions
171 3 the logarithm (to basis 2) of the fold change (See the note in inputs section)
172 4 standard error estimate for the log2 fold change estimate
173 5 p value for the statistical significance of this change
174 6 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
175 which controls false discovery rate (FDR)
176 ====== ==========================================================
177
178
179 .. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html
180 ]]>
181 </help>
182 <citations>
183 <citation type="doi">10.1186/s13059-014-0550-8</citation>
184 </citations>
185 </tool>