comparison gffcompare.xml @ 0:3c97c841a443 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/packages/gffcompare commit 33ef7ef2f829bf46a6fde7637715d974c17f898a
author iuc
date Fri, 07 Apr 2017 16:02:07 -0400
parents
children c80cdc2eac6d
comparison
equal deleted inserted replaced
-1:000000000000 0:3c97c841a443
1 <tool id="gffcompare" name="GffCompare" version="0.9.8">
2 <description>compare assembled transcripts to a reference annotation</description>
3 <requirements>
4 <requirement type="package" version="0.9.8">gffcompare</requirement>
5 </requirements>
6 <stdio>
7 <exit_code range="1:" />
8 <exit_code range=":-1" />
9 <regex match="Error" />
10 <regex match="Exception" />
11 </stdio>
12 <version_command>gffcompare -v | awk '{print $2}'</version_command>
13 <command>
14 <![CDATA[
15 #set $input_gtf = "' '".join(str($inputs).split(','))
16 #if $seq_data.use_seq_data == "Yes":
17 #if $seq_data.seq_source.index_source == "history":
18 ln -s '$seq_data.seq_source.ref_file' ref_seq.fa &&
19 #else:
20 ln -s '${seq_data.seq_source.index.fields.path}' ref_seq.fa &&
21 #end if
22 #end if
23 gffcompare
24 ## Use annotation reference?
25 #if $annotation.use_ref_annotation == "Yes":
26 -r '$annotation.reference_annotation' $annotation.ignore_nonoverlapping_reference $annotation.ignore_nonoverlapping_transfrags
27 #end if
28
29 ## Use sequence data?
30
31 #if $seq_data.use_seq_data == "Yes":
32 -s ref_seq.fa
33 #end if
34
35 $discard_single_exon -e $max_dist_exon -d $max_dist_group $discard_intron_redundant_transfrags
36
37 '$input_gtf'
38 ]]>
39 </command>
40 <inputs>
41 <param format="gtf" name="inputs" type="data" label="GTF inputs for comparison" help="" multiple="true" />
42 <conditional name="annotation">
43 <param label="Use Reference Annotation" name="use_ref_annotation" type="select">
44 <option value="No">No</option>
45 <option value="Yes">Yes</option>
46 </param>
47 <when value="Yes">
48 <param argument="-r" format="gff3,gtf" help="Requires an annotation file in GFF3 or GTF format." label="Reference Annotation" name="reference_annotation" type="data" />
49 <param argument="-R" falsevalue="" help="consider only the reference transcripts that overlap any of the input transfrags (Sn correction)" label="Ignore reference transcripts that are not overlapped by any input transfrags" name="ignore_nonoverlapping_reference" truevalue="-R" type="boolean" />
50 <param argument="-Q" falsevalue="" help="consider only the input transcripts that overlap any of the reference transcripts (Sp correction). Warning: this will discard all 'novel' loci!" label="Ignore input transcripts that are not overlapped by any reference transcripts" name="ignore_nonoverlapping_transfrags" truevalue="-Q" type="boolean" />
51 </when>
52 <when value="No">
53 </when>
54 </conditional>
55 <conditional name="seq_data">
56 <param help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff." label="Use Sequence Data" name="use_seq_data" type="select">
57 <option value="Yes">Yes</option>
58 <option value="No">No</option>
59 </param>
60 <when value="No" />
61 <when value="Yes">
62 <conditional name="seq_source">
63 <param label="Choose the source for the reference list" name="index_source" type="select">
64 <option value="cached">Locally cached</option>
65 <option value="history">History</option>
66 </param>
67 <when value="cached">
68 <param argument="-s" label="Using reference genome" name="index" type="select">
69 <options from_data_table="fasta_indexes">
70 <filter column="1" key="dbkey" ref="inputs" type="data_meta" />
71 <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
72 </options>
73 </param>
74 </when>
75 <when value="history">
76 <param argument="-s" format="fasta" label="Using reference file" name="ref_file" type="data" />
77 </when>
78 </conditional>
79 </when>
80 </conditional>
81 <param argument="-M/-N" label="discard (ignore) single-exon transcripts" name="discard_single_exon" type="select">
82 <option selected="True" value="">No</option>
83 <option value="-M">Discard single-exon transfrags and reference transcripts</option>
84 <option value="-N">Discard single-exon reference transcripts</option>
85 </param>
86 <param argument="-e" help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" label="Max. Distance for assessing exon accuracy" name="max_dist_exon" type="integer" value="100" />
87 <param argument="-d" help="max. distance (range) for grouping transcript start sites. Default: 100" label="Max distance for transcript grouping" name="max_dist_group" type="integer" value="100" />
88 <param argument="-F" help="Discard intron-redundant transfrags if they share the 5' end (if they differ only at the 3' end)" truevalue="-F" falsevalue="" label="discard intron-redundant transfrags sharing 5'" name="discard_intron_redundant_transfrags" type="boolean" />
89 </inputs>
90 <outputs>
91 <data format="txt" from_work_dir="gffcmp.stats" label="${tool.name} on ${on_string}: transcript accuracy" name="transcripts_stats" />
92 <data format="tabular" from_work_dir="gffcmp.loci" label="${tool.name} on ${on_string}: loci" name="transcripts_loci" />
93 <data format="tabular" from_work_dir="gffcmp.tracking" label="${tool.name} on ${on_string}: data ${inputs[0].hid} tracking file" name="transcripts_tracking" />
94 <data format="gtf" from_work_dir="gffcmp.combined.gtf" label="${tool.name} on ${on_string}: combined transcripts" name="transcripts_combined">
95 <filter>(use_seq_data == 'No')</filter>
96 </data>
97 <data format="gtf" from_work_dir="gffcmp.annotated.gtf" label="${tool.name} on ${on_string}: annotated transcripts" name="transcripts_annotated">
98 <filter>(use_seq_data == 'Yes')</filter>
99 </data>
100 </outputs>
101 <tests>
102 <test>
103 <param ftype="gtf" name="inputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
104 <param name="use_ref_annotation" value="Yes" />
105 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" />
106 <param name="ignore_nonoverlapping_reference" value="Yes" />
107 <param name="ignore_nonoverlapping_transfrags" value="No" />
108 <param name="use_seq_data" value="No" />
109 <param name="discard_single_exon" value="" />
110 <param name="max_dist_exon" value="100" />
111 <param name="max_dist_group" value="100" />
112 <param name="discard_intron_redundant_transfrags" value="No" />
113 <output file="gffcompare_out1.stats" name="transcripts_stats" lines_diff="6" />
114 <output file="gffcompare_out1.loci" name="transcripts_loci" />
115 <output file="gffcompare_out1.tracking" name="transcripts_tracking" />
116 <output file="gffcompare_out1.gtf" name="transcripts_combined" />
117 </test>
118 <test>
119 <param ftype="gtf" name="inputs" value="gffcompare_in4.gtf" />
120 <param name="use_ref_annotation" value="Yes" />
121 <param ftype="gtf" name="reference_annotation" value="gffcompare_in5.gtf" />
122 <param name="ignore_nonoverlapping_reference" value="Yes" />
123 <param name="ignore_nonoverlapping_transfrags" value="No" />
124 <param name="use_seq_data" value="No" />
125 <param name="discard_single_exon" value="" />
126 <param name="max_dist_exon" value="100" />
127 <param name="max_dist_group" value="100" />
128 <param name="discard_intron_redundant_transfrags" value="No" />
129 <output file="gffcompare_out2.stats" name="transcripts_stats" lines_diff="6" />
130 <output file="gffcompare_out2.loci" name="transcripts_loci" lines_diff="2" />
131 <output file="gffcompare_out2.tracking" name="transcripts_tracking" />
132 <output file="gffcompare_out2.gtf" name="transcripts_annotated" />
133 </test>
134 </tests>
135 <help>
136 <![CDATA[
137 **GffCompare Overview**
138
139 ## GffCompare
140 * compare and evaluate the accuracy of RNA-Seq transcript assemblers (Cufflinks, Stringtie).
141 * collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g. resulted from assembly of different samples)
142 * classify transcripts from one or multiple GTF/GFF3 files as they relate to reference transcripts provided in a
143 annotation file (also in GTF/GFF3 format)
144
145 The original form of this program is also distributed as part of the Cufflinks suite, under the name "CuffCompare"
146 (see manual: http://cole-trapnell-lab.github.io/cufflinks/cuffcompare/). Most of the options and parameters of CuffCompare
147 are supported by GffCompare, while new features will likely be added to GffCompare in the future.
148
149 A notable difference from GffCompare is that when a single query GTF/GFF file is given as input, along with a reference annotation (-r option),
150 gffcompare switches into "annotation mode" and it generates a .annotated.gtf file instead of the .merged.gtf produced by CuffCompare with the
151 same parameters. This file has the same general format as CuffCompare's .merged.gtf file (with "class codes" assigned to transcripts as per
152 their relationship with the matching/overlapping reference transcript), but the original transcript IDs are preserved, so gffcompare can thus
153 be used as a simple way of annotating a set of transcripts.
154
155 Another important difference is that the input transcripts are no longer discarded when they are found to be "intron redundant", i.e.
156 contained within other, longer isoforms. CuffCompare had the -G option to prevent collapsing of such intron redundant isoforms into
157 their longer "containers", but GffCompare has made this the default mode of operation (hence the -G option is no longer needed
158 and is simply ignored when given).
159 ]]>
160 </help>
161 <citations>
162 <citation type="doi">10.1038/nbt.1621</citation>
163 </citations>
164 </tool>