diff get_length_and_gc_content.xml @ 1:f088370d2a3c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/length_and_gc_content commit 0a56599c36b4968095ec5a3cb589f94fb139466c
author iuc
date Sun, 28 Jan 2018 04:04:58 -0500
parents 2ca1baabdae0
children e3ba567abdf5
line wrap: on
line diff
--- a/get_length_and_gc_content.xml	Thu Nov 17 16:41:06 2016 -0500
+++ b/get_length_and_gc_content.xml	Sun Jan 28 04:04:58 2018 -0500
@@ -1,10 +1,10 @@
-<tool id="length_and_gc_content" name="Gene length and gc content" version="0.1.0">
-    <description>from GTF file</description>
+<tool id="length_and_gc_content" name="Gene length and GC content" version="0.1.1">
+    <description>from GTF and FASTA file</description>
     <requirements>
         <requirement type="package" version="1.3.2">r-optparse</requirement>
-        <requirement type="package" version="1.4.1">r-reshape2</requirement>
-        <requirement type="package" version="1.9.6">r-data.table</requirement>
-        <requirement type="package" version="1.34.1">bioconductor-rtracklayer</requirement>
+        <requirement type="package" version="1.4.2">r-reshape2</requirement>
+        <requirement type="package" version="1.10.4">r-data.table</requirement>
+        <requirement type="package" version="1.34.2">bioconductor-rtracklayer</requirement>
     </requirements>
     <stdio>
         <regex match="Execution halted"
@@ -20,70 +20,165 @@
                level="fatal"
                description="An undefined error occured, please check your input carefully and contact your administrator." />
     </stdio>
+    <version_command><![CDATA[
+        echo $(R --version | grep version | grep -v GNU)", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", reshape2 version" $(R --vanilla --slave -e "library(reshape2); cat(sessionInfo()\$otherPkgs\$reshape2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtracklayer version" $(R --vanilla --slave -e "library(rtracklayer); cat(sessionInfo()\$otherPkgs\$rtracklayer\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", data.table version" $(R --vanilla --slave -e "library(data.table); cat(sessionInfo()\$otherPkgs\$data.table\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+    ]]></version_command>
     <command><![CDATA[
-        Rscript '$__tool_directory__'/get_length_and_gc_content.r --gtf '$gtf'
-        #if $fastaSource.genomeSource == 'indexed':
-            --fasta '$fastaSource.fasta_pre_installed.fields.path'
-        #else:
-            --fasta '$fastaSource.fasta_history'
-        #end if
-        --length '$length'
-        --gc_content '$gc_content'
+
+## Get GTF
+
+#if $gtf_file.gtfSource == 'cached':
+    ln -s '$gtf_file.gtf_pre_installed.fields.path' gtf
+#else:
+    ln -s '$gtf_file.gtf_history' gtf
+#end if
+
+&&
+
+## Get FASTA
+
+#if $fasta_file.fastaSource == 'indexed':
+    ln -s '$fasta_file.fasta_pre_installed.fields.path' fasta
+#else:
+    ln -s '$fasta_file.fasta_history' fasta
+#end if
+
+&&
+
+Rscript '$__tool_directory__/get_length_and_gc_content.r'
+
+--gtf gtf
+--fasta fasta
+
+#if $length_out:
+    --length '$length'
+#end if
+
+#if $gc_out:
+    --gc_content '$gc_content'
+#end if
+
     ]]></command>
+
     <inputs>
-        <param name="gtf" type="data" format="gtf" help="The GTF must match the FASTA file" label="GTF file for length and GC calculation"/>
-        <conditional name="fastaSource">
-            <param help="choose history if you don't see the correct genome fasta" label="Select a reference fasta from your history or use a built-in fasta?" name="genomeSource" type="select">
-                <option value="indexed">Use a built-in fasta</option>
-                <option value="history">Use fasta from history</option>
+        <conditional name="gtf_file">
+            <param name="gtfSource" type="select" label="Select a built-in GTF file or one from your history"  help="Choose history if you don't see the correct GTF" >
+                <option value="cached" selected="true">Use a built-in GTF</option>
+                <option value="history">Use a GTF from history</option>
+            </param>
+            <when value="cached">
+                <param name="gtf_pre_installed" type="select" label="Select a GTF file" help="Select the GTF from a list of pre-installed files">
+                    <options from_data_table="gene_sets">
+                        <filter type="sort_by" column="1" />
+                    </options>
+                    <validator type="no_options" message="No annotations are available."/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="gtf_history" type="data" format="gtf" label="Select a GTF file" help="Make sure that the GTF corresponds to the same genome as the FASTA"/>
+            </when>
+        </conditional>
+
+        <conditional name="fasta_file">
+            <param name="fastaSource" type="select" label="Select a built-in FASTA or one from your history" help="Choose history if you don't see the correct FASTA. The FASTA must be the same genome version as the GTF.">
+                <option value="indexed" selected="true">Use a built-in FASTA </option>
+                <option value="history">Use a FASTA from history</option>
             </param>
             <when value="indexed">
-                <param name="fasta_pre_installed" type="select" help="Select the fasta file from a list of pre-installed genomes" label="Select a fasta sequence">
+                <param name="fasta_pre_installed" type="select" help="Select the FASTA file from a list of pre-installed genomes" label="Select a FASTA file">
                     <options from_data_table="all_fasta">
-                        <filter type="data_meta" key="dbkey" ref="gtf" column="0"/>
+                        <filter type="sort_by" column="2" />
                     </options>
                     <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
                 </param>
             </when>
-        <when value="history">
-            <param name="fasta_history" type="data" format="fasta" label="Select a fasta file that matches the supplied GTF file">
-                <options>
-                    <filter type="data_meta" key="dbkey" ref="gtf"/>
-                </options>
-                <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
-            </param>
-        </when>
+            <when value="history">
+                <param name="fasta_history" type="data" format="fasta" label="Select a FASTA file that matches the supplied GTF file" />
+            </when>
         </conditional>
+
+
+        <param name="length_out" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Output length file?" help="Default: Yes" />
+        <param name="gc_out" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Output GC content file?" help="Default: Yes" />
+
     </inputs>
+
     <outputs>
-        <data name="length" format="tabular" label="gene length">
+        <data name="length" format="tabular" label="Gene length">
+            <filter>length_out is True</filter>
             <actions>
-                <action name="column_names" type="metadata" default="gene,length" />
+                <action name="column_names" type="metadata" default="GeneID,Length" />
             </actions>
         </data>
-        <data name="gc_content" format="tabular" label="gene gc content">
+        <data name="gc_content" format="tabular" label="Gene GC content">
+            <filter>gc_out is True</filter>
              <actions>
-                <action name="column_names" type="metadata" default="gene,gc_content" />
+                <action name="column_names" type="metadata" default="GeneID,GC_content" />
             </actions>
         </data>
     </outputs>
+
     <tests>
-        <test>
-            <param name="gtf" value="in.gtf" ftype="gtf"></param>
-            <param name="fastaSource|genomeSource" value="history"></param>
-            <param name="fastaSource|fasta_history" value="in.fasta" ftype="fasta"></param>
-            <output name="length" file="length.tab"></output>
-            <output name="gc_content" file="gc.tab"></output>
+        <!-- Ensure length and GC files are output -->
+        <test expect_num_outputs="2">
+            <param name="gtfSource" value="history" />
+            <param name="gtf_history" ftype="gtf" value="in.gtf" />
+            <param name="fastaSource" value="history" />
+            <param name="fasta_history" ftype="fasta" value="in.fasta" />
+            <output name="length" file="length.tab" />
+            <output name="gc_content" file="gc.tab" />
+        </test>
+        <!-- Ensure built-in fasta and gtf work -->
+        <test expect_num_outputs="2">
+            <param name="gtfSource" value="cached" />
+            <param name="fastaSource" value="indexed" />
+            <output name="length" file="length.tab" />
+            <output name="gc_content" file="gc.tab" />
+        </test>
+        <!-- Ensure optional gc content works  -->
+        <test expect_num_outputs="1">
+            <param name="gtfSource" value="cached" />
+            <param name="fastaSource" value="indexed" />
+            <param name="gc_out" value="False" />
+            <output name="length" file="length.tab" />
+        </test>
+        <!-- Ensure optional length works -->
+        <test expect_num_outputs="1">
+            <param name="gtfSource" value="cached" />
+            <param name="fastaSource" value="indexed" />
+            <param name="length_out" value="False" />
+            <output name="gc_content" file="gc.tab" />
         </test>
     </tests>
-    <help>
+    <help><![CDATA[
+
+**What it does**
+
+.. class:: infomark
 
-        **What it does**
+This tool calculates the length and GC content for the genes in a GTF file. It requires a FASTA file that is the same genome version as the GTF.
+
+-----
+
+**Inputs**
+
+- a GTF file
+- a FASTA file
 
-        Returns a tabular file with gene id and length and a tabular file with gene id and GC content, based on a supplied GTF and a FASTA file.
+-----
+
+**Outputs**
+
+- a tabular file with Gene ID and length
+- a tabular file with Gene ID and GC content
 
+-----
 
-        </help>
+**More Information**
+
+To calculate gene length, this tool counts the number of bases in all exons of a gene, after merging any overlapping exons from different transcripts.
+
+    ]]></help>
     <citations>
     </citations>
 </tool>