diff gffcompare.xml @ 0:3c97c841a443 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/packages/gffcompare commit 33ef7ef2f829bf46a6fde7637715d974c17f898a
author iuc
date Fri, 07 Apr 2017 16:02:07 -0400
parents
children c80cdc2eac6d
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gffcompare.xml	Fri Apr 07 16:02:07 2017 -0400
@@ -0,0 +1,164 @@
+<tool id="gffcompare" name="GffCompare" version="0.9.8">
+    <description>compare assembled transcripts to a reference annotation</description>
+    <requirements>
+        <requirement type="package" version="0.9.8">gffcompare</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <regex match="Error" />
+        <regex match="Exception" />
+    </stdio>
+    <version_command>gffcompare -v | awk '{print $2}'</version_command>
+    <command>
+        <![CDATA[
+            #set $input_gtf = "' '".join(str($inputs).split(','))
+            #if $seq_data.use_seq_data == "Yes":
+                #if $seq_data.seq_source.index_source == "history":
+                    ln -s '$seq_data.seq_source.ref_file' ref_seq.fa &&
+                #else:
+                    ln -s '${seq_data.seq_source.index.fields.path}' ref_seq.fa &&
+                #end if
+            #end if
+            gffcompare
+            ## Use annotation reference?
+            #if $annotation.use_ref_annotation == "Yes":
+                -r '$annotation.reference_annotation' $annotation.ignore_nonoverlapping_reference $annotation.ignore_nonoverlapping_transfrags
+            #end if
+
+            ## Use sequence data?
+
+            #if $seq_data.use_seq_data == "Yes":
+                -s ref_seq.fa
+            #end if
+
+            $discard_single_exon -e $max_dist_exon -d $max_dist_group $discard_intron_redundant_transfrags
+
+            '$input_gtf'
+        ]]>
+    </command>
+    <inputs>
+        <param format="gtf" name="inputs" type="data" label="GTF inputs for comparison" help="" multiple="true" />
+        <conditional name="annotation">
+            <param label="Use Reference Annotation" name="use_ref_annotation" type="select">
+                <option value="No">No</option>
+                <option value="Yes">Yes</option>
+            </param>
+            <when value="Yes">
+                <param argument="-r" format="gff3,gtf" help="Requires an annotation file in GFF3 or GTF format." label="Reference Annotation" name="reference_annotation" type="data" />
+                <param argument="-R" falsevalue="" help="consider only the reference transcripts that overlap any of the input transfrags (Sn correction)" label="Ignore reference transcripts that are not overlapped by any input transfrags" name="ignore_nonoverlapping_reference" truevalue="-R" type="boolean" />
+                <param argument="-Q" falsevalue="" help="consider only the input transcripts that overlap any of the reference transcripts (Sp correction). Warning: this will discard all 'novel' loci!" label="Ignore input transcripts that are not overlapped by any reference transcripts" name="ignore_nonoverlapping_transfrags" truevalue="-Q" type="boolean" />
+            </when>
+            <when value="No">
+            </when>
+        </conditional>
+        <conditional name="seq_data">
+            <param help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff." label="Use Sequence Data" name="use_seq_data" type="select">
+                <option value="Yes">Yes</option>
+                <option value="No">No</option>
+            </param>
+            <when value="No" />
+            <when value="Yes">
+                <conditional name="seq_source">
+                    <param label="Choose the source for the reference list" name="index_source" type="select">
+                        <option value="cached">Locally cached</option>
+                        <option value="history">History</option>
+                    </param>
+                    <when value="cached">
+                        <param argument="-s" label="Using reference genome" name="index" type="select">
+                            <options from_data_table="fasta_indexes">
+                                <filter column="1" key="dbkey" ref="inputs" type="data_meta" />
+                                <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
+                            </options>
+                        </param>
+                    </when>
+                    <when value="history">
+                        <param argument="-s" format="fasta" label="Using reference file" name="ref_file" type="data" />
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+        <param argument="-M/-N" label="discard (ignore) single-exon transcripts" name="discard_single_exon" type="select">
+            <option selected="True" value="">No</option>
+            <option value="-M">Discard single-exon transfrags and reference transcripts</option>
+            <option value="-N">Discard single-exon reference transcripts</option>
+        </param>
+        <param argument="-e" help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" label="Max. Distance for assessing exon accuracy" name="max_dist_exon" type="integer" value="100" />
+        <param argument="-d" help="max. distance (range) for grouping transcript start sites. Default: 100" label="Max distance for transcript grouping" name="max_dist_group" type="integer" value="100" />
+        <param argument="-F" help="Discard intron-redundant transfrags if they share the 5' end (if they differ only at the 3' end)" truevalue="-F" falsevalue="" label="discard intron-redundant transfrags sharing 5'" name="discard_intron_redundant_transfrags" type="boolean" />
+    </inputs>
+    <outputs>
+        <data format="txt" from_work_dir="gffcmp.stats" label="${tool.name} on ${on_string}: transcript accuracy" name="transcripts_stats" />
+        <data format="tabular" from_work_dir="gffcmp.loci" label="${tool.name} on ${on_string}: loci" name="transcripts_loci" />
+        <data format="tabular" from_work_dir="gffcmp.tracking" label="${tool.name} on ${on_string}: data ${inputs[0].hid} tracking file" name="transcripts_tracking" />
+        <data format="gtf" from_work_dir="gffcmp.combined.gtf" label="${tool.name} on ${on_string}: combined transcripts" name="transcripts_combined">
+            <filter>(use_seq_data == 'No')</filter>
+        </data>
+        <data format="gtf" from_work_dir="gffcmp.annotated.gtf" label="${tool.name} on ${on_string}: annotated transcripts" name="transcripts_annotated">
+            <filter>(use_seq_data == 'Yes')</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param ftype="gtf" name="inputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
+            <param name="use_ref_annotation" value="Yes" />
+            <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" />
+            <param name="ignore_nonoverlapping_reference" value="Yes" />
+            <param name="ignore_nonoverlapping_transfrags" value="No" />
+            <param name="use_seq_data" value="No" />
+            <param name="discard_single_exon" value="" />
+            <param name="max_dist_exon" value="100" />
+            <param name="max_dist_group" value="100" />
+            <param name="discard_intron_redundant_transfrags" value="No" />
+            <output file="gffcompare_out1.stats" name="transcripts_stats" lines_diff="6" />
+            <output file="gffcompare_out1.loci" name="transcripts_loci" />
+            <output file="gffcompare_out1.tracking" name="transcripts_tracking" />
+            <output file="gffcompare_out1.gtf" name="transcripts_combined" />
+        </test>
+        <test>
+            <param ftype="gtf" name="inputs" value="gffcompare_in4.gtf" />
+            <param name="use_ref_annotation" value="Yes" />
+            <param ftype="gtf" name="reference_annotation" value="gffcompare_in5.gtf" />
+            <param name="ignore_nonoverlapping_reference" value="Yes" />
+            <param name="ignore_nonoverlapping_transfrags" value="No" />
+            <param name="use_seq_data" value="No" />
+            <param name="discard_single_exon" value="" />
+            <param name="max_dist_exon" value="100" />
+            <param name="max_dist_group" value="100" />
+            <param name="discard_intron_redundant_transfrags" value="No" />
+            <output file="gffcompare_out2.stats" name="transcripts_stats" lines_diff="6" />
+            <output file="gffcompare_out2.loci" name="transcripts_loci" lines_diff="2" />
+            <output file="gffcompare_out2.tracking" name="transcripts_tracking" />
+            <output file="gffcompare_out2.gtf" name="transcripts_annotated" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**GffCompare Overview**
+
+## GffCompare
+* compare and evaluate the accuracy of RNA-Seq transcript assemblers (Cufflinks, Stringtie).
+* collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g. resulted from assembly of different samples)
+* classify transcripts from one or multiple GTF/GFF3 files as they relate to reference transcripts provided in a
+annotation file (also in GTF/GFF3 format)
+
+The original form of this program is also distributed as part of the Cufflinks suite, under the name "CuffCompare"
+(see manual: http://cole-trapnell-lab.github.io/cufflinks/cuffcompare/). Most of the options and parameters of CuffCompare
+are supported by GffCompare, while new features will likely be added to GffCompare in the future.
+
+A notable difference from GffCompare is that when a single query GTF/GFF file is given as input, along with a reference annotation (-r option),
+gffcompare switches into "annotation mode" and it generates a .annotated.gtf file instead of the .merged.gtf produced by CuffCompare with the
+same parameters. This file has the same general format as CuffCompare's .merged.gtf file (with "class codes" assigned to transcripts as per
+their relationship with the matching/overlapping reference transcript),  but the original transcript IDs are preserved, so gffcompare can thus
+be used as a simple way of annotating a set of transcripts.
+
+Another important difference is that the input transcripts are no longer discarded when they are found to be "intron redundant", i.e.
+contained within other, longer isoforms. CuffCompare had the -G option to prevent collapsing of such intron redundant isoforms into
+their longer "containers", but GffCompare has made this the default mode of operation (hence the -G option is no longer needed
+and is simply ignored when given).
+    ]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1038/nbt.1621</citation>
+    </citations>
+</tool>