view get_length_and_gc_content.xml @ 1:f088370d2a3c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/length_and_gc_content commit 0a56599c36b4968095ec5a3cb589f94fb139466c
author iuc
date Sun, 28 Jan 2018 04:04:58 -0500
parents 2ca1baabdae0
children e3ba567abdf5
line wrap: on
line source

<tool id="length_and_gc_content" name="Gene length and GC content" version="0.1.1">
    <description>from GTF and FASTA file</description>
    <requirements>
        <requirement type="package" version="1.3.2">r-optparse</requirement>
        <requirement type="package" version="1.4.2">r-reshape2</requirement>
        <requirement type="package" version="1.10.4">r-data.table</requirement>
        <requirement type="package" version="1.34.2">bioconductor-rtracklayer</requirement>
    </requirements>
    <stdio>
        <regex match="Execution halted"
               source="both"
               level="fatal"
               description="Execution halted." />
        <regex match="Error in"
               source="both"
               level="fatal"
               description="An undefined error occured, please check your input carefully and contact your administrator." />
        <regex match="Fatal error"
               source="both"
               level="fatal"
               description="An undefined error occured, please check your input carefully and contact your administrator." />
    </stdio>
    <version_command><![CDATA[
        echo $(R --version | grep version | grep -v GNU)", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", reshape2 version" $(R --vanilla --slave -e "library(reshape2); cat(sessionInfo()\$otherPkgs\$reshape2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtracklayer version" $(R --vanilla --slave -e "library(rtracklayer); cat(sessionInfo()\$otherPkgs\$rtracklayer\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", data.table version" $(R --vanilla --slave -e "library(data.table); cat(sessionInfo()\$otherPkgs\$data.table\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
    ]]></version_command>
    <command><![CDATA[

## Get GTF

#if $gtf_file.gtfSource == 'cached':
    ln -s '$gtf_file.gtf_pre_installed.fields.path' gtf
#else:
    ln -s '$gtf_file.gtf_history' gtf
#end if

&&

## Get FASTA

#if $fasta_file.fastaSource == 'indexed':
    ln -s '$fasta_file.fasta_pre_installed.fields.path' fasta
#else:
    ln -s '$fasta_file.fasta_history' fasta
#end if

&&

Rscript '$__tool_directory__/get_length_and_gc_content.r'

--gtf gtf
--fasta fasta

#if $length_out:
    --length '$length'
#end if

#if $gc_out:
    --gc_content '$gc_content'
#end if

    ]]></command>

    <inputs>
        <conditional name="gtf_file">
            <param name="gtfSource" type="select" label="Select a built-in GTF file or one from your history"  help="Choose history if you don't see the correct GTF" >
                <option value="cached" selected="true">Use a built-in GTF</option>
                <option value="history">Use a GTF from history</option>
            </param>
            <when value="cached">
                <param name="gtf_pre_installed" type="select" label="Select a GTF file" help="Select the GTF from a list of pre-installed files">
                    <options from_data_table="gene_sets">
                        <filter type="sort_by" column="1" />
                    </options>
                    <validator type="no_options" message="No annotations are available."/>
                </param>
            </when>
            <when value="history">
                <param name="gtf_history" type="data" format="gtf" label="Select a GTF file" help="Make sure that the GTF corresponds to the same genome as the FASTA"/>
            </when>
        </conditional>

        <conditional name="fasta_file">
            <param name="fastaSource" type="select" label="Select a built-in FASTA or one from your history" help="Choose history if you don't see the correct FASTA. The FASTA must be the same genome version as the GTF.">
                <option value="indexed" selected="true">Use a built-in FASTA </option>
                <option value="history">Use a FASTA from history</option>
            </param>
            <when value="indexed">
                <param name="fasta_pre_installed" type="select" help="Select the FASTA file from a list of pre-installed genomes" label="Select a FASTA file">
                    <options from_data_table="all_fasta">
                        <filter type="sort_by" column="2" />
                    </options>
                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
                </param>
            </when>
            <when value="history">
                <param name="fasta_history" type="data" format="fasta" label="Select a FASTA file that matches the supplied GTF file" />
            </when>
        </conditional>


        <param name="length_out" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Output length file?" help="Default: Yes" />
        <param name="gc_out" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Output GC content file?" help="Default: Yes" />

    </inputs>

    <outputs>
        <data name="length" format="tabular" label="Gene length">
            <filter>length_out is True</filter>
            <actions>
                <action name="column_names" type="metadata" default="GeneID,Length" />
            </actions>
        </data>
        <data name="gc_content" format="tabular" label="Gene GC content">
            <filter>gc_out is True</filter>
             <actions>
                <action name="column_names" type="metadata" default="GeneID,GC_content" />
            </actions>
        </data>
    </outputs>

    <tests>
        <!-- Ensure length and GC files are output -->
        <test expect_num_outputs="2">
            <param name="gtfSource" value="history" />
            <param name="gtf_history" ftype="gtf" value="in.gtf" />
            <param name="fastaSource" value="history" />
            <param name="fasta_history" ftype="fasta" value="in.fasta" />
            <output name="length" file="length.tab" />
            <output name="gc_content" file="gc.tab" />
        </test>
        <!-- Ensure built-in fasta and gtf work -->
        <test expect_num_outputs="2">
            <param name="gtfSource" value="cached" />
            <param name="fastaSource" value="indexed" />
            <output name="length" file="length.tab" />
            <output name="gc_content" file="gc.tab" />
        </test>
        <!-- Ensure optional gc content works  -->
        <test expect_num_outputs="1">
            <param name="gtfSource" value="cached" />
            <param name="fastaSource" value="indexed" />
            <param name="gc_out" value="False" />
            <output name="length" file="length.tab" />
        </test>
        <!-- Ensure optional length works -->
        <test expect_num_outputs="1">
            <param name="gtfSource" value="cached" />
            <param name="fastaSource" value="indexed" />
            <param name="length_out" value="False" />
            <output name="gc_content" file="gc.tab" />
        </test>
    </tests>
    <help><![CDATA[

**What it does**

.. class:: infomark

This tool calculates the length and GC content for the genes in a GTF file. It requires a FASTA file that is the same genome version as the GTF.

-----

**Inputs**

- a GTF file
- a FASTA file

-----

**Outputs**

- a tabular file with Gene ID and length
- a tabular file with Gene ID and GC content

-----

**More Information**

To calculate gene length, this tool counts the number of bases in all exons of a gene, after merging any overlapping exons from different transcripts.

    ]]></help>
    <citations>
    </citations>
</tool>