comparison cuffdiff_wrapper.xml @ 0:0dabb2ed6eb1

Uploaded tool tarball.
author devteam
date Tue, 01 Oct 2013 12:54:00 -0400
parents
children 60a52f8460a1
comparison
equal deleted inserted replaced
-1:000000000000 0:0dabb2ed6eb1
1 <tool id="cuffdiff" name="Cuffdiff" version="0.0.6">
2 <!-- Wrapper supports Cuffdiff versions 2.1.0-2.1.1 -->
3 <description>find significant changes in transcript expression, splicing, and promoter use</description>
4 <requirements>
5 <requirement type="package" version="2.1.1">cufflinks</requirement>
6 </requirements>
7 <version_command>cuffdiff 2>&amp;1 | head -n 1</version_command>
8 <command>
9 cuffdiff
10 --FDR=$fdr
11 --num-threads="4"
12 --min-alignment-count=$min_alignment_count
13 --library-norm-method=$library_norm_method
14 --dispersion-method=$dispersion_method
15
16 ## Set advanced data parameters?
17 #if $additional.sAdditional == "Yes":
18 -m $additional.frag_mean_len
19 -s $additional.frag_len_std_dev
20 #end if
21
22 ## Multi-read correct?
23 #if str($multiread_correct) == "Yes":
24 -u
25 #end if
26
27 ## Bias correction?
28 #if $bias_correction.do_bias_correction == "Yes":
29 -b
30 #if $bias_correction.seq_source.index_source == "history":
31 --ref_file=$bias_correction.seq_source.ref_file
32 #else:
33 --ref_file="None"
34 #end if
35 --dbkey=${gtf_input.metadata.dbkey}
36 --index_dir=${GALAXY_DATA_INDEX_DIR}
37 #end if
38
39 #set labels = ','.join( [ str( $condition.name ) for $condition in $conditions ] )
40 --labels $labels
41
42 ## Inputs.
43 $gtf_input
44 #for $condition in $conditions:
45 #set samples = ','.join( [ str( $sample.sample ) for $sample in $condition.samples ] )
46 $samples
47 #end for
48 </command>
49 <inputs>
50 <param format="gtf,gff3" name="gtf_input" type="data" label="Transcripts" help="A transcript GFF3 or GTF file produced by cufflinks, cuffcompare, or other source."/>
51
52 <repeat name="conditions" title="Condition" min="2">
53 <param name="name" title="Condition name" type="text" label="Name"/>
54 <repeat name="samples" title="Replicate" min="1">
55 <param name="sample" label="Add replicate" type="data" format="sam,bam"/>
56 </repeat>
57 </repeat>
58
59 <param name="library_norm_method" type="select" label="Library normalization method">
60 <option value="geometric" selected="True">geometric</option>
61 <option value="classic-fpkm">classic-fpkm</option>
62 <option value="quartile">quartile</option>
63 </param>
64
65 <param name="dispersion_method" type="select" label="Dispersion estimation method" help="If using only one sample per condition, you must use 'blind.'">
66 <option value="pooled" selected="True">pooled</option>
67 <option value="per-condition">per-condition</option>
68 <option value="blind">blind</option>
69 </param>
70
71 <param name="fdr" type="float" value="0.05" label="False Discovery Rate" help="The allowed false discovery rate."/>
72
73 <param name="min_alignment_count" type="integer" value="10" label="Min Alignment Count" help="The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples."/>
74
75 <param name="multiread_correct" type="select" label="Use multi-read correct" help="Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome.">
76 <option value="No" selected="true">No</option>
77 <option value="Yes">Yes</option>
78 </param>
79
80 <conditional name="bias_correction">
81 <param name="do_bias_correction" type="select" label="Perform Bias Correction" help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates.">
82 <option value="No">No</option>
83 <option value="Yes">Yes</option>
84 </param>
85 <when value="Yes">
86 <conditional name="seq_source">
87 <param name="index_source" type="select" label="Reference sequence data">
88 <option value="cached">Locally cached</option>
89 <option value="history">History</option>
90 </param>
91 <when value="cached"></when>
92 <when value="history">
93 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
94 </when>
95 </conditional>
96 </when>
97 <when value="No"></when>
98 </conditional>
99
100 <param name="include_read_group_files" type="select" label="Include Read Group Datasets" help="Read group datasets provide information on replicates.">
101 <option value="No" selected="true">No</option>
102 <option value="Yes">Yes</option>
103 </param>
104
105 <conditional name="additional">
106 <param name="sAdditional" type="select" label="Set Additional Parameters? (not recommended for paired-end reads)">
107 <option value="No">No</option>
108 <option value="Yes">Yes</option>
109 </param>
110 <when value="No"></when>
111 <when value="Yes">
112 <param name="frag_mean_len" type="integer" value="200" label="Average Fragment Length"/>
113 <param name="frag_len_std_dev" type="integer" value="80" label="Fragment Length Standard Deviation"/>
114 </when>
115 </conditional>
116 </inputs>
117
118 <stdio>
119 <regex match=".*" source="both" level="log" description="tool progress"/>
120 </stdio>
121
122 <outputs>
123 <!-- Optional read group datasets. -->
124 <data format="tabular" name="isoforms_read_group" label="${tool.name} on ${on_string}: isoforms read group tracking" from_work_dir="isoforms.read_group_tracking" >
125 <filter>(params['include_read_group_files'] == 'Yes'</filter>
126 </data>
127 <data format="tabular" name="genes_read_group" label="${tool.name} on ${on_string}: genes read group tracking" from_work_dir="genes.read_group_tracking" >
128 <filter>(params['include_read_group_files'] == 'Yes'</filter>
129 </data>
130 <data format="tabular" name="cds_read_group" label="${tool.name} on ${on_string}: CDs read group tracking" from_work_dir="cds.read_group_tracking" >
131 <filter>(params['include_read_group_files'] == 'Yes'</filter>
132 </data>
133 <data format="tabular" name="tss_groups_read_group" label="${tool.name} on ${on_string}: TSS groups read group tracking" from_work_dir="tss_groups.read_group_tracking" >
134 <filter>(params['include_read_group_files'] == 'Yes'</filter>
135 </data>
136
137 <!-- Standard datasets. -->
138 <data format="tabular" name="splicing_diff" label="${tool.name} on ${on_string}: splicing differential expression testing" from_work_dir="splicing.diff" />
139 <data format="tabular" name="promoters_diff" label="${tool.name} on ${on_string}: promoters differential expression testing" from_work_dir="promoters.diff" />
140 <data format="tabular" name="cds_diff" label="${tool.name} on ${on_string}: CDS overloading diffential expression testing" from_work_dir="cds.diff" />
141 <data format="tabular" name="cds_exp_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM differential expression testing" from_work_dir="cds_exp.diff" />
142 <data format="tabular" name="cds_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM tracking" from_work_dir="cds.fpkm_tracking" />
143 <data format="tabular" name="tss_groups_exp" label="${tool.name} on ${on_string}: TSS groups differential expression testing" from_work_dir="tss_group_exp.diff" />
144 <data format="tabular" name="tss_groups_fpkm_tracking" label="${tool.name} on ${on_string}: TSS groups FPKM tracking" from_work_dir="tss_groups.fpkm_tracking" />
145 <data format="tabular" name="genes_exp" label="${tool.name} on ${on_string}: gene differential expression testing" from_work_dir="gene_exp.diff" />
146 <data format="tabular" name="genes_fpkm_tracking" label="${tool.name} on ${on_string}: gene FPKM tracking" from_work_dir="genes.fpkm_tracking" />
147 <data format="tabular" name="isoforms_exp" label="${tool.name} on ${on_string}: transcript differential expression testing" from_work_dir="isoform_exp.diff" />
148 <data format="tabular" name="isoforms_fpkm_tracking" label="${tool.name} on ${on_string}: transcript FPKM tracking" from_work_dir="isoforms.fpkm_tracking" />
149 </outputs>
150
151 <tests>
152 <test>
153 <!--
154 cuffdiff cuffcompare_out5.gtf cuffdiff_in1.sam cuffdiff_in2.sam
155 -->
156 <!--
157 NOTE: as of version 0.0.6 of the wrapper, tests cannot be run because multiple inputs to a repeat
158 element are not supported.
159 <param name="gtf_input" value="cuffcompare_out5.gtf" ftype="gtf" />
160 <param name="do_groups" value="No" />
161 <param name="aligned_reads1" value="cuffdiff_in1.sam" ftype="sam" />
162 <param name="aligned_reads2" value="cuffdiff_in2.sam" ftype="sam" />
163 <param name="fdr" value="0.05" />
164 <param name="min_alignment_count" value="0" />
165 <param name="do_bias_correction" value="No" />
166 <param name="do_normalization" value="No" />
167 <param name="multiread_correct" value="No"/>
168 <param name="sAdditional" value="No"/>
169 <output name="splicing_diff" file="cuffdiff_out9.txt"/>
170 <output name="promoters_diff" file="cuffdiff_out10.txt"/>
171 <output name="cds_diff" file="cuffdiff_out11.txt"/>
172 <output name="cds_exp_fpkm_tracking" file="cuffdiff_out4.txt"/>
173 <output name="cds_fpkm_tracking" file="cuffdiff_out8.txt"/>
174 <output name="tss_groups_exp" file="cuffdiff_out3.txt" lines_diff="200"/>
175 <output name="tss_groups_fpkm_tracking" file="cuffdiff_out7.txt"/>
176 <output name="genes_exp" file="cuffdiff_out2.txt" lines_diff="200"/>
177 <output name="genes_fpkm_tracking" file="cuffdiff_out6.txt" lines_diff="200"/>
178 <output name="isoforms_exp" file="cuffdiff_out1.txt" lines_diff="200"/>
179 <output name="isoforms_fpkm_tracking" file="cuffdiff_out5.txt" lines_diff="200"/>
180 -->
181 </test>
182 </tests>
183
184 <help>
185 **Cuffdiff Overview**
186
187 Cuffdiff is part of Cufflinks_. Cuffdiff find significant changes in transcript expression, splicing, and promoter use. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
188
189 .. _Cufflinks: http://cufflinks.cbcb.umd.edu/
190
191 ------
192
193 **Know what you are doing**
194
195 .. class:: warningmark
196
197 There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
198
199 .. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffdiff
200
201 ------
202
203 **Input format**
204
205 Cuffdiff takes Cufflinks or Cuffcompare GTF files as input along with two SAM files containing the fragment alignments for two or more samples.
206
207 ------
208
209 **Outputs**
210
211 Cuffdiff produces many output files:
212
213 1. Transcript FPKM expression tracking.
214 2. Gene FPKM expression tracking; tracks the summed FPKM of transcripts sharing each gene_id
215 3. Primary transcript FPKM tracking; tracks the summed FPKM of transcripts sharing each tss_id
216 4. Coding sequence FPKM tracking; tracks the summed FPKM of transcripts sharing each p_id, independent of tss_id
217 5. Transcript differential FPKM.
218 6. Gene differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each gene_id
219 7. Primary transcript differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each tss_id
220 8. Coding sequence differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each p_id independent of tss_id
221 9. Differential splicing tests: this tab delimited file lists, for each primary transcript, the amount of overloading detected among its isoforms, i.e. how much differential splicing exists between isoforms processed from a single primary transcript. Only primary transcripts from which two or more isoforms are spliced are listed in this file.
222 10. Differential promoter tests: this tab delimited file lists, for each gene, the amount of overloading detected among its primary transcripts, i.e. how much differential promoter use exists between samples. Only genes producing two or more distinct primary transcripts (i.e. multi-promoter genes) are listed here.
223 11. Differential CDS tests: this tab delimited file lists, for each gene, the amount of overloading detected among its coding sequences, i.e. how much differential CDS output exists between samples. Only genes producing two or more distinct CDS (i.e. multi-protein genes) are listed here.
224
225 -------
226
227 **Settings**
228
229 All of the options have a default value. You can change any of them. Most of the options in Cuffdiff have been implemented here.
230
231 ------
232
233 **Cuffdiff parameter list**
234
235 This is a list of implemented Cuffdiff options::
236
237 -m INT Average fragement length; default 200
238 -s INT Fragment legnth standard deviation; default 80
239 -c INT The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not significant, and the locus' observed changes don't contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).
240 --FDR FLOAT The allowed false discovery rate. The default is 0.05.
241 --num-importance-samples INT Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000
242 --max-mle-iterations INT Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000
243 -N With this option, Cufflinks excludes the contribution of the top 25 percent most highly expressed genes from the number of mapped fragments used in the FPKM denominator. This can improve robustness of differential expression calls for less abundant genes and transcripts.
244
245 </help>
246 </tool>