comparison cuffcompare_wrapper.xml @ 6:8e534225baa9 draft

Uploaded
author devteam
date Fri, 19 Dec 2014 11:55:55 -0500
parents 8b22e9adae34
children b77178f66fc3
comparison
equal deleted inserted replaced
5:67695d7ff787 6:8e534225baa9
1 <tool id="cuffcompare" name="Cuffcompare" version="0.0.6"> 1 <tool id="cuffcompare" name="Cuffcompare" version="2.2.1.0">
2 <!-- Wrapper supports Cuffcompare versions v1.3.0 and newer -->
3 <description>compare assembled transcripts to a reference annotation and track Cufflinks transcripts across multiple experiments</description> 2 <description>compare assembled transcripts to a reference annotation and track Cufflinks transcripts across multiple experiments</description>
4 <requirements> 3 <expand macro="requirements" />
5 <requirement type="package" version="2.1.1">cufflinks</requirement> 4 <expand macro="stdio" />
6 </requirements> 5 <macros>
6 <import>cuff_macros.xml</import>
7 </macros>
7 <version_command>cuffcompare 2>&amp;1 | head -n 1</version_command> 8 <version_command>cuffcompare 2>&amp;1 | head -n 1</version_command>
8 <command interpreter="python"> 9 <command interpreter="python">
9 cuffcompare_wrapper.py 10 cuffcompare_wrapper.py
10
11 ## Use annotation reference? 11 ## Use annotation reference?
12 #if $annotation.use_ref_annotation == "Yes": 12 #if $annotation.use_ref_annotation == "Yes":
13 -r $annotation.reference_annotation 13 -r $annotation.reference_annotation
14 #if $annotation.ignore_nonoverlapping_reference: 14 #if $annotation.ignore_nonoverlapping_reference:
15 -R 15 -R
16 #end if 16 #end if
17 #if $annotation.ignore_nonoverlapping_transfrags:
18 -Q
19 #end if
20
17 #end if 21 #end if
18 22
19 ## Use sequence data? 23 ## Use sequence data?
20 #if $seq_data.use_seq_data == "Yes": 24 #if $seq_data.use_seq_data == "Yes":
21 -s 25 -s
24 #else: 28 #else:
25 --index=${seq_data.seq_source.index.fields.path} 29 --index=${seq_data.seq_source.index.fields.path}
26 #end if 30 #end if
27 #end if 31 #end if
28 32
33 $discard_single_exon
34
35 -e $max_dist_exon
36 -d $max_dist_group
37
38 #if $discard_intron_redundant_transfrags:
39 -F
40 #end if
41
29 ## Outputs. 42 ## Outputs.
30 --combined-transcripts=${transcripts_combined} 43 --combined-transcripts=${transcripts_combined}
31 44
32 ## Inputs. 45 @CUFFLINKS_GTF_INPUTS@
33 ${first_input}
34 #for $input_file in $input_files:
35 ${input_file.additional_input}
36 #end for
37
38 </command> 46 </command>
39 <inputs> 47 <inputs>
40 <param format="gtf" name="first_input" type="data" label="GTF file produced by Cufflinks" help=""/> 48 <expand macro="cufflinks_gtf_inputs" />
41 <repeat name="input_files" title="Additional GTF Input Files">
42 <param format="gtf" name="additional_input" type="data" label="GTF file produced by Cufflinks" help=""/>
43 </repeat>
44 <conditional name="annotation"> 49 <conditional name="annotation">
45 <param name="use_ref_annotation" type="select" label="Use Reference Annotation"> 50 <param name="use_ref_annotation" type="select" label="Use Reference Annotation">
46 <option value="No">No</option> 51 <option value="No">No</option>
47 <option value="Yes">Yes</option> 52 <option value="Yes">Yes</option>
48 </param> 53 </param>
49 <when value="Yes"> 54 <when value="Yes">
50 <param format="gff3,gtf" name="reference_annotation" type="data" label="Reference Annotation" help="Requires an annotation file in GFF3 or GTF format."/> 55 <param format="gff3,gtf" name="reference_annotation" type="data" label="Reference Annotation" help="Requires an annotation file in GFF3 or GTF format."/>
51 <param name="ignore_nonoverlapping_reference" type="boolean" label="Ignore reference transcripts that are not overlapped by any transcript in input files"/> 56 <param name="ignore_nonoverlapping_reference" type="boolean" label="Ignore reference transcripts that are not overlapped by any input transfrags" help="consider only the reference transcripts that overlap any of the input transfrags (Sn correction)" />
57 <param name="ignore_nonoverlapping_transfrags" type="boolean" label="Ignore input transcripts that are not overlapped by any reference transcripts" help="consider only the input transcripts that overlap any of the reference transcripts (Sp correction). Warning: this will discard all 'novel' loci!" />
52 </when> 58 </when>
53 <when value="No"> 59 <when value="No">
54 </when> 60 </when>
55 </conditional> 61 </conditional>
56 <conditional name="seq_data"> 62 <conditional name="seq_data">
57 <param name="use_seq_data" type="select" label="Use Sequence Data" help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff."> 63 <param name="use_seq_data" type="select" label="Use Sequence Data"
64 help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff.">
58 <option value="Yes">Yes</option> 65 <option value="Yes">Yes</option>
59 <option value="No">No</option> 66 <option value="No">No</option>
60 </param> 67 </param>
61 <when value="No"></when> 68 <when value="No"></when>
62 <when value="Yes"> 69 <when value="Yes">
66 <option value="history">History</option> 73 <option value="history">History</option>
67 </param> 74 </param>
68 <when value="cached"> 75 <when value="cached">
69 <param name="index" type="select" label="Using reference genome"> 76 <param name="index" type="select" label="Using reference genome">
70 <options from_data_table="fasta_indexes"> 77 <options from_data_table="fasta_indexes">
71 <filter type="data_meta" ref="first_input" key="dbkey" column="1" /> 78 <filter type="data_meta" ref="inputs" key="dbkey" column="1" />
72 <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> 79 <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
73 </options> 80 </options>
74 </param> 81 </param>
75 </when> 82 </when>
76 <when value="history"> 83 <when value="history">
77 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> 84 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
78 </when> 85 </when>
79 </conditional> 86 </conditional>
80 </when> 87 </when>
81 </conditional> 88 </conditional>
89 <param type="select" name="discard_single_exon" label="discard (ignore) single-exon transcripts">
90 <option value="" selected="True">No</option>
91 <option value="-M">Discard single-exon transfrags and reference transcripts</option>
92 <option value="-N">Discard single-exon reference transcripts</option>
93 </param>
94 <param type="integer" name="max_dist_exon" value="100" label="Max. Distance for assessing exon accuracy"
95 help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" />
96 <param type="integer" name="max_dist_group" value="100" label="Max.Distance for transcript grouping"
97 help="max. distance (range) for grouping transcript start sites. Default: 100" />
98 <param type="boolean" name="discard_intron_redundant_transfrags" label="discard intron-redundant transfrags sharing 5'"
99 help="Discard intron-redundant transfrags if they share the 5' end (if they differ only at the 3' end)" />
82 </inputs> 100 </inputs>
83 101
84 <outputs> 102 <outputs>
85 <data format="txt" name="transcripts_accuracy" label="${tool.name} on ${on_string}: transcript accuracy" 103 <data format="txt" name="transcripts_accuracy" label="${tool.name} on ${on_string}: transcript accuracy"
86 from_work_dir="cc_output.stats" /> 104 from_work_dir="cc_output.stats" />
87 <data format="tabular" name="input1_tmap" label="${tool.name} on ${on_string}: data ${first_input.hid} tmap file" 105 <data format="tabular" name="input1_tmap" label="${tool.name} on ${on_string}: data ${inputs[0].hid} tmap file"
88 from_work_dir="cc_output.input1.tmap" /> 106 from_work_dir="cc_output.input1.tmap" />
89 <data format="tabular" name="input1_refmap" 107 <data format="tabular" name="input1_refmap"
90 label="${tool.name} on ${on_string}: data ${first_input.hid} refmap file" 108 label="${tool.name} on ${on_string}: data ${inputs[0].hid} refmap file"
91 from_work_dir="cc_output.input1.refmap"> 109 from_work_dir="cc_output.input1.refmap">
92 <filter>annotation['use_ref_annotation'] == 'Yes'</filter> 110 <filter>annotation['use_ref_annotation'] == 'Yes'</filter>
93 </data> 111 </data>
94 <data format="tabular" name="input2_tmap" label="${tool.name} on ${on_string}: data ${input_files[0]['additional_input'].hid} tmap file" from_work_dir="cc_output.input2.tmap"> 112 <data format="tabular" name="input2_tmap" label="${tool.name} on ${on_string}: data ${inputs[1].hid} tmap file" from_work_dir="cc_output.input2.tmap">
95 <filter>len( input_files ) >= 1</filter> 113 <filter>@HAS_MULTIPLE_INPUTS@</filter>
96 </data> 114 </data>
97 <data format="tabular" name="input2_refmap" 115 <data format="tabular" name="input2_refmap"
98 label="${tool.name} on ${on_string}: data ${input_files[0]['additional_input'].hid} refmap file" 116 label="${tool.name} on ${on_string}: data ${inputs[1].hid} refmap file"
99 from_work_dir="cc_output.input2.refmap"> 117 from_work_dir="cc_output.input2.refmap">
100 <filter>annotation['use_ref_annotation'] == 'Yes' and len( input_files ) >= 1</filter> 118 <filter>annotation['use_ref_annotation'] == 'Yes' and @HAS_MULTIPLE_INPUTS@</filter>
101 </data> 119 </data>
102 <data format="tabular" name="transcripts_tracking" label="${tool.name} on ${on_string}: transcript tracking" from_work_dir="cc_output.tracking"> 120 <data format="tabular" name="transcripts_tracking" label="${tool.name} on ${on_string}: transcript tracking" from_work_dir="cc_output.tracking">
103 <filter>len( input_files ) > 0</filter> 121 <filter>@HAS_MULTIPLE_INPUTS@</filter>
104 </data> 122 </data>
105 <data format="gtf" name="transcripts_combined" label="${tool.name} on ${on_string}: combined transcripts"/> 123 <data format="gtf" name="transcripts_combined" label="${tool.name} on ${on_string}: combined transcripts"/>
106 </outputs> 124 </outputs>
107 125
108 <tests> 126 <tests>
109 <!-- 127 <!--
110 cuffcompare -r cuffcompare_in3.gtf -R cuffcompare_in1.gtf cuffcompare_in2.gtf 128 cuffcompare -r cuffcompare_in3.gtf -R cuffcompare_in1.gtf cuffcompare_in2.gtf
111 --> 129 -->
112 <test> 130 <test>
113 <param name="first_input" value="cuffcompare_in1.gtf" ftype="gtf"/> 131 <param name="inputs" value="cuffcompare_in1.gtf,cuffcompare_in2.gtf" ftype="gtf"/>
114 <param name="additional_input" value="cuffcompare_in2.gtf" ftype="gtf"/>
115 <param name="use_ref_annotation" value="Yes"/> 132 <param name="use_ref_annotation" value="Yes"/>
116 <param name="reference_annotation" value="cuffcompare_in3.gtf" ftype="gtf"/> 133 <param name="reference_annotation" value="cuffcompare_in3.gtf" ftype="gtf"/>
117 <param name="ignore_nonoverlapping_reference" value="Yes"/> 134 <param name="ignore_nonoverlapping_reference" value="Yes"/>
135 <param name="ignore_nonoverlapping_transfrags" value="No"/>
118 <param name="use_seq_data" value="No"/> 136 <param name="use_seq_data" value="No"/>
137 <param name="discard_single_exon" value="" />
138 <param name="max_dist_exon" value="100" />
139 <param name="max_dist_group" value="100" />
140 <param name="discard_intron_redundant_transfrags" value="No" />
119 <!-- Line diffs are the result of different locations for input files; this cannot be fixed as cuffcompare outputs 141 <!-- Line diffs are the result of different locations for input files; this cannot be fixed as cuffcompare outputs
120 full input path for each input. --> 142 full input path for each input. -->
121 <output name="transcripts_accuracy" file="cuffcompare_out7.txt" lines_diff="16"/> 143 <output name="transcripts_accuracy" file="cuffcompare_out7.txt" lines_diff="2"/>
122 <output name="input1_tmap" file="cuffcompare_out1.tmap"/> 144 <output name="input1_tmap" file="cuffcompare_out1.tmap"/>
123 <output name="input1_refmap" file="cuffcompare_out2.refmap"/> 145 <output name="input1_refmap" file="cuffcompare_out2.refmap"/>
124 <output name="input2_tmap" file="cuffcompare_out3.tmap"/> 146 <output name="input2_tmap" file="cuffcompare_out3.tmap"/>
125 <output name="input2_refmap" file="cuffcompare_out4.refmap"/> 147 <output name="input2_refmap" file="cuffcompare_out4.refmap"/>
126 <output name="transcripts_tracking" file="cuffcompare_out6.tracking"/> 148 <output name="transcripts_tracking" file="cuffcompare_out6.tracking"/>
131 <help> 153 <help>
132 **Cuffcompare Overview** 154 **Cuffcompare Overview**
133 155
134 Cuffcompare is part of Cufflinks_. Cuffcompare helps you: (a) compare your assembled transcripts to a reference annotation and (b) track Cufflinks transcripts across multiple experiments (e.g. across a time course). Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621 156 Cuffcompare is part of Cufflinks_. Cuffcompare helps you: (a) compare your assembled transcripts to a reference annotation and (b) track Cufflinks transcripts across multiple experiments (e.g. across a time course). Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
135 157
136 .. _Cufflinks: http://cufflinks.cbcb.umd.edu/ 158 .. _Cufflinks: http://cole-trapnell-lab.github.io/cufflinks/
137 159
138 ------ 160 ------
139 161
140 **Know what you are doing** 162 **Know what you are doing**
141 163
142 .. class:: warningmark 164 .. class:: warningmark
143 165
144 There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. 166 There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
145 167
146 .. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffcompare 168 .. __: http://cole-trapnell-lab.github.io/cufflinks/cuffcompare/
147 169
148 ------ 170 ------
149 171
150 **Input format** 172 **Input format**
151 173
172 This file matches transcripts up between samples. Each row contains a transcript structure that is present in one or more input GTF files. Because the transcripts will generally have different IDs (unless you assembled your RNA-Seq reads against a reference transcriptome), cuffcompare examines the structure of each the transcripts, matching transcripts that agree on the coordinates and order of all of their introns, as well as strand. Matching transcripts are allowed to differ on the length of the first and last exons, since these lengths will naturally vary from sample to sample due to the random nature of sequencing. 194 This file matches transcripts up between samples. Each row contains a transcript structure that is present in one or more input GTF files. Because the transcripts will generally have different IDs (unless you assembled your RNA-Seq reads against a reference transcriptome), cuffcompare examines the structure of each the transcripts, matching transcripts that agree on the coordinates and order of all of their introns, as well as strand. Matching transcripts are allowed to differ on the length of the first and last exons, since these lengths will naturally vary from sample to sample due to the random nature of sequencing.
173 If you ran cuffcompare with the -r option, the first and second columns contain the closest matching reference transcript to the one described by each row. 195 If you ran cuffcompare with the -r option, the first and second columns contain the closest matching reference transcript to the one described by each row.
174 196
175 Here's an example of a line from the tracking file:: 197 Here's an example of a line from the tracking file::
176 198
177 TCONS_00000045 XLOC_000023 Tcea|uc007afj.1 j \ 199 TCONS_00000045 XLOC_000023 Tcea|uc007afj.1 j \
178 q1:exp.115|exp.115.0|100|3.061355|0.350242|0.350207 \ 200 q1:exp.115|exp.115.0|100|3.061355|0.350242|0.350207 \
179 q2:60hr.292|60hr.292.0|100|4.094084|0.000000|0.000000 201 q2:60hr.292|60hr.292.0|100|4.094084|0.000000|0.000000
180 202
181 In this example, a transcript present in the two input files, called exp.115.0 in the first and 60hr.292.0 in the second, doesn't match any reference transcript exactly, but shares exons with uc007afj.1, an isoform of the gene Tcea, as indicated by the class code j. The first three columns are as follows:: 203 In this example, a transcript present in the two input files, called exp.115.0 in the first and 60hr.292.0 in the second, doesn't match any reference transcript exactly, but shares exons with uc007afj.1, an isoform of the gene Tcea, as indicated by the class code j. The first three columns are as follows::
182 204
195 217
196 Class Codes 218 Class Codes
197 219
198 If you ran cuffcompare with the -r option, tracking rows will contain the following values. If you did not use -r, the rows will all contain "-" in their class code column:: 220 If you ran cuffcompare with the -r option, tracking rows will contain the following values. If you did not use -r, the rows will all contain "-" in their class code column::
199 221
200 Priority Code Description 222 Priority Code Description
201 --------------------------------- 223 ---------------------------------
202 1 = Match 224 1 = Match
203 2 c Contained 225 2 c Contained
204 3 j New isoform 226 3 j New isoform
205 4 e A single exon transcript overlapping a reference exon and at least 10 bp of a reference intron, indicating a possible pre-mRNA fragment. 227 4 e A single exon transcript overlapping a reference exon and at least 10 bp of a reference intron, indicating a possible pre-mRNA fragment.
206 5 i A single exon transcript falling entirely with a reference intron 228 5 i A single exon transcript falling entirely with a reference intron
207 6 r Repeat. Currently determined by looking at the reference sequence and applied to transcripts where at least 50% of the bases are lower case 229 6 r Repeat. Currently determined by looking at the reference sequence and applied to transcripts where at least 50% of the bases are lower case
208 7 p Possible polymerase run-on fragment 230 7 p Possible polymerase run-on fragment
209 8 u Unknown, intergenic transcript 231 8 u Unknown, intergenic transcript
210 9 o Unknown, generic overlap with reference 232 9 o Unknown, generic overlap with reference
211 10 . (.tracking file only, indicates multiple classifications) 233 10 . (.tracking file only, indicates multiple classifications)
212 234
213 ------- 235 -------
214 236
215 **Settings** 237 **Settings**
216 238
223 This is a list of implemented Cuffcompare options:: 245 This is a list of implemented Cuffcompare options::
224 246
225 -r An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below. 247 -r An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.
226 -R If -r was specified, this option causes cuffcompare to ignore reference transcripts that are not overlapped by any transcript in one of cuff1.gtf,...,cuffN.gtf. Useful for ignoring annotated transcripts that are not present in your RNA-Seq samples and thus adjusting the "sensitivity" calculation in the accuracy report written in the transcripts_accuracy file 248 -R If -r was specified, this option causes cuffcompare to ignore reference transcripts that are not overlapped by any transcript in one of cuff1.gtf,...,cuffN.gtf. Useful for ignoring annotated transcripts that are not present in your RNA-Seq samples and thus adjusting the "sensitivity" calculation in the accuracy report written in the transcripts_accuracy file
227 </help> 249 </help>
250 <citations>
251 <citation type="doi">10.1038/nbt.1621</citation>
252 </citations>
228 </tool> 253 </tool>