view sleuth.xml @ 0:5f1cb4c28d73 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sleuth commit 6b943159b4d68812dc6911309f23d54ec659282f
author iuc
date Thu, 01 Jun 2023 07:56:00 +0000
parents
children d3e447dd52c8
line wrap: on
line source

<tool id="sleuth" name="Sleuth" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="@PROFILE@">
    <description>differential expression analysis</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro='xrefs'/>
    <expand macro='requirements'/>
    <stdio>
        <regex match="Execution halted"
           source="both"
           level="fatal"
           description="Execution halted." />
        <regex match="Error in"
           source="both"
           level="fatal"
           description="An undefined error occurred, please check your input carefully and contact your administrator." />
        <regex match="Fatal error"
           source="both"
           level="fatal"
           description="An undefined error occurred, please check your input carefully and contact your administrator." />
    </stdio>
    <version_command><![CDATA[echo $(R --version | grep version | grep -v GNU)", sleuth version" $(R --vanilla --slave -e "library(sleuth); cat(sessionInfo()\$otherPkgs\$sleuth\$Version)" 2> /dev/null | grep -v -i "WARNING: ")]]></version_command>
    <command><![CDATA[
        #set $factor_levels = list()
        #set $cond_files = list()
        #set $cond_n_files = list()
        #for $level in $rep_factorLevel
            $factor_levels.append(str($level.factorLevel))
            $cond_n_files.append(len(str($level.countsFile).split(",")))
            #for $i, $count in enumerate(str($level.countsFile).split(","))
                #set $fname = str($level.factorLevel) + "_"  + str($i) + '.h5'
                ln -s '${count}' "${fname}" &&
                $cond_files.append($fname)
            #end for
        #end for
        Rscript '${__tool_directory__}/sleuth.R'
            #for $i, $factor in enumerate($factor_levels)
                --factorLevel $factor
                --factorLevel_n $cond_n_files[$i]
            #end for
            #for $file in $cond_files
                --factorLevel_counts $file
            #end for
            --cores  \${GALAXY_SLOTS:-4}
            $advanced_options.normalization
            --nbins $advanced_options.nbins
            --lwr $advanced_options.lwr
            --upr $advanced_options.upr
    ]]></command>
    <inputs>
        <repeat name="rep_factorLevel" title="Factor level" min="2" default="2">
            <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'"
                help="Only letters, numbers and underscores will be retained in this field">
                <sanitizer>
                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
                </sanitizer>
            </param>
            <param name="countsFile" type="data" format="h5" multiple="true" label="Counts file(s)"/>
        </repeat>
        <section name="advanced_options" title="Advanced options" expanded="true">
            <param argument="normalization" type="boolean" truevalue="--normalize" falsevalue="" checked="true" label="Normalize data" 
                help="If this is set to false, bootstraps will not be read and transformation of the data will not be done. This should 
                    only be set to false if one desires to do a quick check of the raw data. " />
            <param argument="nbins" type="integer" min="0" value="100" label="NBins" help="The number of bins that the data should be 
                split for the sliding window shrinkage using the mean-variance curve." />
            <param argument="lwr" type="float" min="0" max="1" value="0.25" label="LWR" help="The lower range of variances within each 
                bin that should be included for the shrinkage procedure. " />
            <param argument="upr" type="float" min="0" max="1" value="0.75" label="UPR" help="The upper range of variances within each 
                bin that should be included for the shrinkage procedure." />
        </section>
    </inputs>
    <outputs>
        <data name="sleuth_table" from_work_dir="sleuth_table.tab" format="tabular" label="${tool.name} on ${on_string}: DE table">
            <actions>
                <action name="column_names" type="metadata" default="target_id,pval,qval,test_stat,rss,degrees_free,mean_obs,var_obs,tech_var,sigma_sq,smooth_sigma_sq,final_sigma_sq" />
            </actions>
        </data>
        <data name="pca_plot" from_work_dir="pca_plot.pdf" format="pdf" label="${tool.name} on ${on_string}: PCA plot"/>
        <data name="density_plot" from_work_dir="group_density.pdf" format="pdf" label="${tool.name} on ${on_string}: density plot"/>
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <repeat name="rep_factorLevel">
                <param name="factorLevel" value="Control"/>
                <param name="countsFile" value="kallisto_output_01.h5,kallisto_output_02.h5"/>
            </repeat>
            <repeat name="rep_factorLevel">
                <param name="factorLevel" value="Cancer"/>
                <param name="countsFile" value="kallisto_output_03.h5,kallisto_output_04.h5"/>
            </repeat>
            <section name="advanced_options">
                <param name="normalization" value="true"/>
                <param name="nbins" value="100"/>
                <param name="lwr" value="0.25"/>
                <param name="upr" value="0.75"/>
            </section>
            <output name="sleuth_table" ftype="tabular">
                <assert_contents>
                    <has_size value="689791" delta="100"/>
                    <has_text text="ENST00000281092.9"/>
                    <has_text text="ENST00000700211.1"/>
                </assert_contents>
            </output>
            <output name="pca_plot" file="test01_pca.pdf" ftype="pdf" compare="sim_size"/>
            <output name="density_plot" file="test01_density.pdf" ftype="pdf" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[

.. class:: infomark

**Purpose**

Sleuth is a tool for the analysis and comparison of multiple related RNA-Seq experiments. Key features include:

- The ability to perform both transcript-level and gene-level analysis.
- Compatibility with kallisto enabling a fast and accurate workflow from reads to results.
- The use of bootstraps to ascertain and correct for technical variation in experiments.
- An interactive app for exploratory data analysis.

To use sleuth, RNA-Seq data must first be quantified with kallisto, which is a program for very fast RNA-Seq quantification based on 
pseudo-alignment. An important feature of kallisto is that it outputs bootstraps along with the estimates of transcript abundances. 
These can serve as proxies for technical replicates, allowing for an ascertainment of the variability in estimates due to the random 
processes underlying RNA-Seq as well as the statistical procedure of read assignment. 

    ]]></help>
    <expand macro="citations" />
</tool>