diff cuffcompare_wrapper.xml @ 6:8e534225baa9 draft

Uploaded
author devteam
date Fri, 19 Dec 2014 11:55:55 -0500
parents 8b22e9adae34
children b77178f66fc3
line wrap: on
line diff
--- a/cuffcompare_wrapper.xml	Thu Jan 09 14:27:37 2014 -0500
+++ b/cuffcompare_wrapper.xml	Fri Dec 19 11:55:55 2014 -0500
@@ -1,19 +1,23 @@
-<tool id="cuffcompare" name="Cuffcompare" version="0.0.6">
-    <!-- Wrapper supports Cuffcompare versions v1.3.0 and newer -->
+<tool id="cuffcompare" name="Cuffcompare" version="2.2.1.0">
     <description>compare assembled transcripts to a reference annotation and track Cufflinks transcripts across multiple experiments</description>
-    <requirements>
-        <requirement type="package" version="2.1.1">cufflinks</requirement>
-    </requirements>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <macros>
+      <import>cuff_macros.xml</import>
+    </macros>
     <version_command>cuffcompare 2>&amp;1 | head -n 1</version_command>
     <command interpreter="python">
         cuffcompare_wrapper.py 
-            
             ## Use annotation reference?
             #if $annotation.use_ref_annotation == "Yes":
                 -r $annotation.reference_annotation
                 #if $annotation.ignore_nonoverlapping_reference:
                     -R
                 #end if
+                #if $annotation.ignore_nonoverlapping_transfrags:
+                    -Q
+                #end if
+
             #end if
             
             ## Use sequence data?
@@ -26,35 +30,38 @@
                 #end if
             #end if
             
+            $discard_single_exon
+
+            -e $max_dist_exon
+            -d $max_dist_group
+
+            #if $discard_intron_redundant_transfrags:
+                -F
+            #end if 
+
             ## Outputs.
             --combined-transcripts=${transcripts_combined}
-            
-            ## Inputs.
-            ${first_input}
-            #for $input_file in $input_files:
-              ${input_file.additional_input}
-            #end for
-            
+
+            @CUFFLINKS_GTF_INPUTS@
     </command>
     <inputs>
-        <param format="gtf" name="first_input" type="data" label="GTF file produced by Cufflinks" help=""/>
-        <repeat name="input_files" title="Additional GTF Input Files">
-            <param format="gtf" name="additional_input" type="data" label="GTF file produced by Cufflinks" help=""/>
-        </repeat>
+        <expand macro="cufflinks_gtf_inputs" />
         <conditional name="annotation">
             <param name="use_ref_annotation" type="select" label="Use Reference Annotation">
                 <option value="No">No</option>
                 <option value="Yes">Yes</option>
             </param>
             <when value="Yes">
-                <param format="gff3,gtf" name="reference_annotation" type="data" label="Reference Annotation" help="Requires an annotation file in GFF3 or GTF format."/>    
-                <param name="ignore_nonoverlapping_reference" type="boolean" label="Ignore reference transcripts that are not overlapped by any transcript in input files"/>
+                <param format="gff3,gtf" name="reference_annotation" type="data" label="Reference Annotation" help="Requires an annotation file in GFF3 or GTF format."/>
+                <param name="ignore_nonoverlapping_reference" type="boolean" label="Ignore reference transcripts that are not overlapped by any input transfrags" help="consider only the reference transcripts that overlap any of the input transfrags (Sn correction)" />
+                <param name="ignore_nonoverlapping_transfrags" type="boolean" label="Ignore input transcripts that are not overlapped by any reference transcripts" help="consider only the input transcripts that overlap any of the reference transcripts (Sp correction). Warning: this will discard all 'novel' loci!" />
             </when>
             <when value="No">
             </when>
         </conditional>
         <conditional name="seq_data">
-            <param name="use_seq_data" type="select" label="Use Sequence Data" help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff.">
+            <param name="use_seq_data" type="select" label="Use Sequence Data" 
+                help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff.">
                 <option value="Yes">Yes</option>
                 <option value="No">No</option>
             </param>
@@ -68,7 +75,7 @@
                   <when value="cached">
                     <param name="index" type="select" label="Using reference genome">
                       <options from_data_table="fasta_indexes">
-                        <filter type="data_meta" ref="first_input" key="dbkey" column="1" />
+                        <filter type="data_meta" ref="inputs" key="dbkey" column="1" />
                         <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
                       </options>
                     </param>
@@ -79,28 +86,39 @@
                 </conditional>
             </when>
         </conditional>
+        <param type="select" name="discard_single_exon" label="discard (ignore) single-exon transcripts">
+                <option value="" selected="True">No</option>
+                <option value="-M">Discard single-exon transfrags and reference transcripts</option>
+                <option value="-N">Discard single-exon reference transcripts</option>
+        </param>
+        <param type="integer" name="max_dist_exon" value="100" label="Max. Distance for assessing exon accuracy" 
+            help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" />
+        <param type="integer" name="max_dist_group" value="100" label="Max.Distance for transcript grouping" 
+            help="max. distance (range) for grouping transcript start sites. Default: 100" />
+        <param type="boolean" name="discard_intron_redundant_transfrags" label="discard intron-redundant transfrags sharing 5'" 
+            help="Discard intron-redundant transfrags if they share the 5' end (if they differ only at the 3' end)" />
     </inputs>
 
     <outputs>
         <data format="txt" name="transcripts_accuracy" label="${tool.name} on ${on_string}: transcript accuracy" 
             from_work_dir="cc_output.stats" />
-        <data format="tabular" name="input1_tmap" label="${tool.name} on ${on_string}: data ${first_input.hid} tmap file"
+        <data format="tabular" name="input1_tmap" label="${tool.name} on ${on_string}: data ${inputs[0].hid} tmap file"
             from_work_dir="cc_output.input1.tmap" />
         <data format="tabular" name="input1_refmap" 
-              label="${tool.name} on ${on_string}: data ${first_input.hid} refmap file" 
+              label="${tool.name} on ${on_string}: data ${inputs[0].hid} refmap file" 
               from_work_dir="cc_output.input1.refmap">
             <filter>annotation['use_ref_annotation'] == 'Yes'</filter>
         </data>
-        <data format="tabular" name="input2_tmap" label="${tool.name} on ${on_string}: data ${input_files[0]['additional_input'].hid} tmap file" from_work_dir="cc_output.input2.tmap">
-            <filter>len( input_files ) >= 1</filter>
+        <data format="tabular" name="input2_tmap" label="${tool.name} on ${on_string}: data ${inputs[1].hid} tmap file" from_work_dir="cc_output.input2.tmap">
+            <filter>@HAS_MULTIPLE_INPUTS@</filter>
         </data>
         <data format="tabular" name="input2_refmap" 
-              label="${tool.name} on ${on_string}: data ${input_files[0]['additional_input'].hid} refmap file" 
+              label="${tool.name} on ${on_string}: data ${inputs[1].hid} refmap file" 
               from_work_dir="cc_output.input2.refmap">
-            <filter>annotation['use_ref_annotation'] == 'Yes' and len( input_files ) >= 1</filter>
+            <filter>annotation['use_ref_annotation'] == 'Yes' and @HAS_MULTIPLE_INPUTS@</filter>
         </data>
         <data format="tabular" name="transcripts_tracking" label="${tool.name} on ${on_string}: transcript tracking" from_work_dir="cc_output.tracking">
-            <filter>len( input_files ) > 0</filter>
+            <filter>@HAS_MULTIPLE_INPUTS@</filter>
         </data>
         <data format="gtf" name="transcripts_combined" label="${tool.name} on ${on_string}: combined transcripts"/>
     </outputs>
@@ -110,15 +128,19 @@
             cuffcompare -r cuffcompare_in3.gtf -R cuffcompare_in1.gtf cuffcompare_in2.gtf
         -->
         <test>
-            <param name="first_input" value="cuffcompare_in1.gtf" ftype="gtf"/>
-            <param name="additional_input" value="cuffcompare_in2.gtf" ftype="gtf"/>
+            <param name="inputs" value="cuffcompare_in1.gtf,cuffcompare_in2.gtf" ftype="gtf"/>
             <param name="use_ref_annotation" value="Yes"/>
             <param name="reference_annotation" value="cuffcompare_in3.gtf" ftype="gtf"/>
             <param name="ignore_nonoverlapping_reference" value="Yes"/>
+            <param name="ignore_nonoverlapping_transfrags" value="No"/>
             <param name="use_seq_data" value="No"/>
+            <param name="discard_single_exon" value="" />
+            <param name="max_dist_exon" value="100" />
+            <param name="max_dist_group" value="100" />
+            <param name="discard_intron_redundant_transfrags" value="No" />
             <!-- Line diffs are the result of different locations for input files; this cannot be fixed as cuffcompare outputs
                 full input path for each input. -->
-            <output name="transcripts_accuracy" file="cuffcompare_out7.txt" lines_diff="16"/>
+            <output name="transcripts_accuracy" file="cuffcompare_out7.txt" lines_diff="2"/>
             <output name="input1_tmap" file="cuffcompare_out1.tmap"/>
             <output name="input1_refmap" file="cuffcompare_out2.refmap"/>
             <output name="input2_tmap" file="cuffcompare_out3.tmap"/>
@@ -133,8 +155,8 @@
 
 Cuffcompare is part of Cufflinks_. Cuffcompare helps you: (a) compare your assembled transcripts to a reference annotation and (b) track Cufflinks transcripts across multiple experiments (e.g. across a time course). Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
 
-.. _Cufflinks: http://cufflinks.cbcb.umd.edu/
-        
+.. _Cufflinks: http://cole-trapnell-lab.github.io/cufflinks/
+
 ------
 
 **Know what you are doing**
@@ -143,7 +165,7 @@
 
 There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
 
-.. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffcompare
+.. __: http://cole-trapnell-lab.github.io/cufflinks/cuffcompare/
 
 ------
 
@@ -174,7 +196,7 @@
 
 Here's an example of a line from the tracking file::
 
-  TCONS_00000045 XLOC_000023 Tcea|uc007afj.1	j	\
+  TCONS_00000045 XLOC_000023 Tcea|uc007afj.1        j        \
      q1:exp.115|exp.115.0|100|3.061355|0.350242|0.350207 \
      q2:60hr.292|60hr.292.0|100|4.094084|0.000000|0.000000
 
@@ -197,18 +219,18 @@
 
 If you ran cuffcompare with the -r option, tracking rows will contain the following values. If you did not use -r, the rows will all contain "-" in their class code column::
 
-  Priority	 Code	   Description
+  Priority         Code           Description
   ---------------------------------
-  1	         =	       Match
-  2	         c	       Contained	
-  3	         j	       New isoform	
-  4	         e	       A single exon transcript overlapping a reference exon and at least 10 bp of a reference intron, indicating a possible pre-mRNA fragment.	
-  5	         i	       A single exon transcript falling entirely with a reference intron	
-  6	         r	       Repeat. Currently determined by looking at the reference sequence and applied to transcripts where at least 50% of the bases are lower case	
-  7	         p	       Possible polymerase run-on fragment	
-  8	         u	       Unknown, intergenic transcript	
-  9	         o	       Unknown, generic overlap with reference	
-  10             .	       (.tracking file only, indicates multiple classifications)
+  1                 =               Match
+  2                 c               Contained        
+  3                 j               New isoform        
+  4                 e               A single exon transcript overlapping a reference exon and at least 10 bp of a reference intron, indicating a possible pre-mRNA fragment.        
+  5                 i               A single exon transcript falling entirely with a reference intron        
+  6                 r               Repeat. Currently determined by looking at the reference sequence and applied to transcripts where at least 50% of the bases are lower case        
+  7                 p               Possible polymerase run-on fragment        
+  8                 u               Unknown, intergenic transcript        
+  9                 o               Unknown, generic overlap with reference        
+  10             .               (.tracking file only, indicates multiple classifications)
     
 -------
 
@@ -225,4 +247,7 @@
   -r    An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.
   -R    If -r was specified, this option causes cuffcompare to ignore reference transcripts that are not overlapped by any transcript in one of cuff1.gtf,...,cuffN.gtf. Useful for ignoring annotated transcripts that are not present in your RNA-Seq samples and thus adjusting the "sensitivity" calculation in the accuracy report written in the transcripts_accuracy file
     </help>
+    <citations>
+        <citation type="doi">10.1038/nbt.1621</citation>
+    </citations>
 </tool>