Mercurial > repos > iuc > gffcompare
diff gffcompare.xml @ 0:3c97c841a443 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/packages/gffcompare commit 33ef7ef2f829bf46a6fde7637715d974c17f898a
author | iuc |
---|---|
date | Fri, 07 Apr 2017 16:02:07 -0400 |
parents | |
children | c80cdc2eac6d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gffcompare.xml Fri Apr 07 16:02:07 2017 -0400 @@ -0,0 +1,164 @@ +<tool id="gffcompare" name="GffCompare" version="0.9.8"> + <description>compare assembled transcripts to a reference annotation</description> + <requirements> + <requirement type="package" version="0.9.8">gffcompare</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <regex match="Error" /> + <regex match="Exception" /> + </stdio> + <version_command>gffcompare -v | awk '{print $2}'</version_command> + <command> + <![CDATA[ + #set $input_gtf = "' '".join(str($inputs).split(',')) + #if $seq_data.use_seq_data == "Yes": + #if $seq_data.seq_source.index_source == "history": + ln -s '$seq_data.seq_source.ref_file' ref_seq.fa && + #else: + ln -s '${seq_data.seq_source.index.fields.path}' ref_seq.fa && + #end if + #end if + gffcompare + ## Use annotation reference? + #if $annotation.use_ref_annotation == "Yes": + -r '$annotation.reference_annotation' $annotation.ignore_nonoverlapping_reference $annotation.ignore_nonoverlapping_transfrags + #end if + + ## Use sequence data? + + #if $seq_data.use_seq_data == "Yes": + -s ref_seq.fa + #end if + + $discard_single_exon -e $max_dist_exon -d $max_dist_group $discard_intron_redundant_transfrags + + '$input_gtf' + ]]> + </command> + <inputs> + <param format="gtf" name="inputs" type="data" label="GTF inputs for comparison" help="" multiple="true" /> + <conditional name="annotation"> + <param label="Use Reference Annotation" name="use_ref_annotation" type="select"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <param argument="-r" format="gff3,gtf" help="Requires an annotation file in GFF3 or GTF format." label="Reference Annotation" name="reference_annotation" type="data" /> + <param argument="-R" falsevalue="" help="consider only the reference transcripts that overlap any of the input transfrags (Sn correction)" label="Ignore reference transcripts that are not overlapped by any input transfrags" name="ignore_nonoverlapping_reference" truevalue="-R" type="boolean" /> + <param argument="-Q" falsevalue="" help="consider only the input transcripts that overlap any of the reference transcripts (Sp correction). Warning: this will discard all 'novel' loci!" label="Ignore input transcripts that are not overlapped by any reference transcripts" name="ignore_nonoverlapping_transfrags" truevalue="-Q" type="boolean" /> + </when> + <when value="No"> + </when> + </conditional> + <conditional name="seq_data"> + <param help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff." label="Use Sequence Data" name="use_seq_data" type="select"> + <option value="Yes">Yes</option> + <option value="No">No</option> + </param> + <when value="No" /> + <when value="Yes"> + <conditional name="seq_source"> + <param label="Choose the source for the reference list" name="index_source" type="select"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param argument="-s" label="Using reference genome" name="index" type="select"> + <options from_data_table="fasta_indexes"> + <filter column="1" key="dbkey" ref="inputs" type="data_meta" /> + <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" /> + </options> + </param> + </when> + <when value="history"> + <param argument="-s" format="fasta" label="Using reference file" name="ref_file" type="data" /> + </when> + </conditional> + </when> + </conditional> + <param argument="-M/-N" label="discard (ignore) single-exon transcripts" name="discard_single_exon" type="select"> + <option selected="True" value="">No</option> + <option value="-M">Discard single-exon transfrags and reference transcripts</option> + <option value="-N">Discard single-exon reference transcripts</option> + </param> + <param argument="-e" help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" label="Max. Distance for assessing exon accuracy" name="max_dist_exon" type="integer" value="100" /> + <param argument="-d" help="max. distance (range) for grouping transcript start sites. Default: 100" label="Max distance for transcript grouping" name="max_dist_group" type="integer" value="100" /> + <param argument="-F" help="Discard intron-redundant transfrags if they share the 5' end (if they differ only at the 3' end)" truevalue="-F" falsevalue="" label="discard intron-redundant transfrags sharing 5'" name="discard_intron_redundant_transfrags" type="boolean" /> + </inputs> + <outputs> + <data format="txt" from_work_dir="gffcmp.stats" label="${tool.name} on ${on_string}: transcript accuracy" name="transcripts_stats" /> + <data format="tabular" from_work_dir="gffcmp.loci" label="${tool.name} on ${on_string}: loci" name="transcripts_loci" /> + <data format="tabular" from_work_dir="gffcmp.tracking" label="${tool.name} on ${on_string}: data ${inputs[0].hid} tracking file" name="transcripts_tracking" /> + <data format="gtf" from_work_dir="gffcmp.combined.gtf" label="${tool.name} on ${on_string}: combined transcripts" name="transcripts_combined"> + <filter>(use_seq_data == 'No')</filter> + </data> + <data format="gtf" from_work_dir="gffcmp.annotated.gtf" label="${tool.name} on ${on_string}: annotated transcripts" name="transcripts_annotated"> + <filter>(use_seq_data == 'Yes')</filter> + </data> + </outputs> + <tests> + <test> + <param ftype="gtf" name="inputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" /> + <param name="use_ref_annotation" value="Yes" /> + <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" /> + <param name="ignore_nonoverlapping_reference" value="Yes" /> + <param name="ignore_nonoverlapping_transfrags" value="No" /> + <param name="use_seq_data" value="No" /> + <param name="discard_single_exon" value="" /> + <param name="max_dist_exon" value="100" /> + <param name="max_dist_group" value="100" /> + <param name="discard_intron_redundant_transfrags" value="No" /> + <output file="gffcompare_out1.stats" name="transcripts_stats" lines_diff="6" /> + <output file="gffcompare_out1.loci" name="transcripts_loci" /> + <output file="gffcompare_out1.tracking" name="transcripts_tracking" /> + <output file="gffcompare_out1.gtf" name="transcripts_combined" /> + </test> + <test> + <param ftype="gtf" name="inputs" value="gffcompare_in4.gtf" /> + <param name="use_ref_annotation" value="Yes" /> + <param ftype="gtf" name="reference_annotation" value="gffcompare_in5.gtf" /> + <param name="ignore_nonoverlapping_reference" value="Yes" /> + <param name="ignore_nonoverlapping_transfrags" value="No" /> + <param name="use_seq_data" value="No" /> + <param name="discard_single_exon" value="" /> + <param name="max_dist_exon" value="100" /> + <param name="max_dist_group" value="100" /> + <param name="discard_intron_redundant_transfrags" value="No" /> + <output file="gffcompare_out2.stats" name="transcripts_stats" lines_diff="6" /> + <output file="gffcompare_out2.loci" name="transcripts_loci" lines_diff="2" /> + <output file="gffcompare_out2.tracking" name="transcripts_tracking" /> + <output file="gffcompare_out2.gtf" name="transcripts_annotated" /> + </test> + </tests> + <help> +<![CDATA[ +**GffCompare Overview** + +## GffCompare +* compare and evaluate the accuracy of RNA-Seq transcript assemblers (Cufflinks, Stringtie). +* collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g. resulted from assembly of different samples) +* classify transcripts from one or multiple GTF/GFF3 files as they relate to reference transcripts provided in a +annotation file (also in GTF/GFF3 format) + +The original form of this program is also distributed as part of the Cufflinks suite, under the name "CuffCompare" +(see manual: http://cole-trapnell-lab.github.io/cufflinks/cuffcompare/). Most of the options and parameters of CuffCompare +are supported by GffCompare, while new features will likely be added to GffCompare in the future. + +A notable difference from GffCompare is that when a single query GTF/GFF file is given as input, along with a reference annotation (-r option), +gffcompare switches into "annotation mode" and it generates a .annotated.gtf file instead of the .merged.gtf produced by CuffCompare with the +same parameters. This file has the same general format as CuffCompare's .merged.gtf file (with "class codes" assigned to transcripts as per +their relationship with the matching/overlapping reference transcript), but the original transcript IDs are preserved, so gffcompare can thus +be used as a simple way of annotating a set of transcripts. + +Another important difference is that the input transcripts are no longer discarded when they are found to be "intron redundant", i.e. +contained within other, longer isoforms. CuffCompare had the -G option to prevent collapsing of such intron redundant isoforms into +their longer "containers", but GffCompare has made this the default mode of operation (hence the -G option is no longer needed +and is simply ignored when given). + ]]> + </help> + <citations> + <citation type="doi">10.1038/nbt.1621</citation> + </citations> +</tool>