changeset 0:5f1cb4c28d73 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sleuth commit 6b943159b4d68812dc6911309f23d54ec659282f
author iuc
date Thu, 01 Jun 2023 07:56:00 +0000
parents
children d3e447dd52c8
files macros.xml sleuth.R sleuth.xml test-data/kallisto_output_01.h5 test-data/kallisto_output_02.h5 test-data/kallisto_output_03.h5 test-data/kallisto_output_04.h5 test-data/test01_density.pdf test-data/test01_pca.pdf
diffstat 9 files changed, 267 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Jun 01 07:56:00 2023 +0000
@@ -0,0 +1,23 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">r-sleuth</requirement>
+            <requirement type="package" version="0.2.0">r-annotables</requirement>
+            <requirement type="package" version="2.2.1">r-argparse</requirement>
+            <requirement type="package" version="2.0.0">r-tidyverse</requirement>
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">0.30.1</token>
+    <token name="@SUFFIX_VERSION@">0</token>
+    <token name="@PROFILE@">20.01</token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/nmeth.4324</citation>
+        </citations>
+    </xml>
+    <xml name="xrefs">
+        <xrefs>
+            <xref type="bio.tools">sleuth</xref>
+        </xrefs>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sleuth.R	Thu Jun 01 07:56:00 2023 +0000
@@ -0,0 +1,116 @@
+library(sleuth,
+        quietly = TRUE,
+        warn.conflicts = FALSE)
+library(annotables, quietly = TRUE, warn.conflicts = FALSE)
+library(argparse, quietly = TRUE, warn.conflicts = FALSE)
+library(tidyverse)
+
+
+# setup R error handling to go to stderr
+options(
+  show.error.messages = FALSE,
+  error = function() {
+    cat(geterrmessage(), file = stderr())
+    q("no", 1, FALSE)
+  }
+)
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+################################################################################
+### Input Processing
+################################################################################
+
+
+# Collect arguments from command line
+parser <- ArgumentParser(description = "Sleuth R script")
+
+parser$add_argument("--factorLevel", action = "append", required = TRUE)
+parser$add_argument("--factorLevel_counts",
+                    action = "append",
+                    required = TRUE)
+parser$add_argument("--factorLevel_n", action = "append",  required = TRUE)
+parser$add_argument("--cores",  type = "integer", required = TRUE)
+parser$add_argument("--normalize", action = "store_true", required = FALSE)
+parser$add_argument("--nbins", type = "integer", required = TRUE)
+parser$add_argument("--lwr", type = "numeric", required = TRUE)
+parser$add_argument("--upr", type = "numeric", required = TRUE)
+
+args <- parser$parse_args()
+
+all_files <- args$factorLevel_counts
+
+conditions <- c()
+for (x in seq_along(args$factorLevel)) {
+  temp <- append(conditions, rep(args$factorLevel[[x]]))
+  conditions <- temp
+}
+
+sample_names <- all_files %>%
+  str_replace(pattern = "\\.tab", "")
+
+design <-
+  data.frame(list(
+    sample = sample_names,
+    condition = conditions,
+    path = all_files
+  ))
+so <- sleuth_prep(design,
+                  cores = args$cores,
+                  normalize = args$normalize)
+
+so <- sleuth_fit(
+  so,
+  ~ condition,
+  "full",
+  n_bins = args$nbins,
+  lwr = args$lwr,
+  upr = args$upr
+)
+
+so <- sleuth_fit(
+  so,
+  ~ 1,
+  "reduced",
+  n_bins = args$nbins,
+  lwr = args$lwr,
+  upr = args$upr
+)
+
+so <- sleuth_lrt(so, "reduced", "full")
+
+sleuth_table <-
+  sleuth_results(so, "reduced:full", "lrt", show_all = FALSE)
+
+write.table(
+  sleuth_table,
+  file = "sleuth_table.tab",
+  quote = FALSE,
+  sep = "\t",
+  col.names = TRUE,
+  row.names = FALSE
+)
+
+
+outputFile <- file.path(getwd(), "pca_plot.pdf")
+pdf(file = outputFile,
+    height = 6,
+    width = 9)
+plot_pca(so, color_by = "condition")
+dev.off()
+
+outputFile <- file.path(getwd(), "group_density.pdf")
+pdf(file = outputFile,
+    height = 6,
+    width = 9)
+plot_group_density(
+  so,
+  use_filtered = TRUE,
+  units = "est_counts",
+  trans = "log",
+  grouping = setdiff(colnames(so$sample_to_covariates),
+                     "sample"),
+  offset = 1
+)
+dev.off()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sleuth.xml	Thu Jun 01 07:56:00 2023 +0000
@@ -0,0 +1,128 @@
+<tool id="sleuth" name="Sleuth" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="@PROFILE@">
+    <description>differential expression analysis</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro='xrefs'/>
+    <expand macro='requirements'/>
+    <stdio>
+        <regex match="Execution halted"
+           source="both"
+           level="fatal"
+           description="Execution halted." />
+        <regex match="Error in"
+           source="both"
+           level="fatal"
+           description="An undefined error occurred, please check your input carefully and contact your administrator." />
+        <regex match="Fatal error"
+           source="both"
+           level="fatal"
+           description="An undefined error occurred, please check your input carefully and contact your administrator." />
+    </stdio>
+    <version_command><![CDATA[echo $(R --version | grep version | grep -v GNU)", sleuth version" $(R --vanilla --slave -e "library(sleuth); cat(sessionInfo()\$otherPkgs\$sleuth\$Version)" 2> /dev/null | grep -v -i "WARNING: ")]]></version_command>
+    <command><![CDATA[
+        #set $factor_levels = list()
+        #set $cond_files = list()
+        #set $cond_n_files = list()
+        #for $level in $rep_factorLevel
+            $factor_levels.append(str($level.factorLevel))
+            $cond_n_files.append(len(str($level.countsFile).split(",")))
+            #for $i, $count in enumerate(str($level.countsFile).split(","))
+                #set $fname = str($level.factorLevel) + "_"  + str($i) + '.h5'
+                ln -s '${count}' "${fname}" &&
+                $cond_files.append($fname)
+            #end for
+        #end for
+        Rscript '${__tool_directory__}/sleuth.R'
+            #for $i, $factor in enumerate($factor_levels)
+                --factorLevel $factor
+                --factorLevel_n $cond_n_files[$i]
+            #end for
+            #for $file in $cond_files
+                --factorLevel_counts $file
+            #end for
+            --cores  \${GALAXY_SLOTS:-4}
+            $advanced_options.normalization
+            --nbins $advanced_options.nbins
+            --lwr $advanced_options.lwr
+            --upr $advanced_options.upr
+    ]]></command>
+    <inputs>
+        <repeat name="rep_factorLevel" title="Factor level" min="2" default="2">
+            <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'"
+                help="Only letters, numbers and underscores will be retained in this field">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+            </param>
+            <param name="countsFile" type="data" format="h5" multiple="true" label="Counts file(s)"/>
+        </repeat>
+        <section name="advanced_options" title="Advanced options" expanded="true">
+            <param argument="normalization" type="boolean" truevalue="--normalize" falsevalue="" checked="true" label="Normalize data" 
+                help="If this is set to false, bootstraps will not be read and transformation of the data will not be done. This should 
+                    only be set to false if one desires to do a quick check of the raw data. " />
+            <param argument="nbins" type="integer" min="0" value="100" label="NBins" help="The number of bins that the data should be 
+                split for the sliding window shrinkage using the mean-variance curve." />
+            <param argument="lwr" type="float" min="0" max="1" value="0.25" label="LWR" help="The lower range of variances within each 
+                bin that should be included for the shrinkage procedure. " />
+            <param argument="upr" type="float" min="0" max="1" value="0.75" label="UPR" help="The upper range of variances within each 
+                bin that should be included for the shrinkage procedure." />
+        </section>
+    </inputs>
+    <outputs>
+        <data name="sleuth_table" from_work_dir="sleuth_table.tab" format="tabular" label="${tool.name} on ${on_string}: DE table">
+            <actions>
+                <action name="column_names" type="metadata" default="target_id,pval,qval,test_stat,rss,degrees_free,mean_obs,var_obs,tech_var,sigma_sq,smooth_sigma_sq,final_sigma_sq" />
+            </actions>
+        </data>
+        <data name="pca_plot" from_work_dir="pca_plot.pdf" format="pdf" label="${tool.name} on ${on_string}: PCA plot"/>
+        <data name="density_plot" from_work_dir="group_density.pdf" format="pdf" label="${tool.name} on ${on_string}: density plot"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="3">
+            <repeat name="rep_factorLevel">
+                <param name="factorLevel" value="Control"/>
+                <param name="countsFile" value="kallisto_output_01.h5,kallisto_output_02.h5"/>
+            </repeat>
+            <repeat name="rep_factorLevel">
+                <param name="factorLevel" value="Cancer"/>
+                <param name="countsFile" value="kallisto_output_03.h5,kallisto_output_04.h5"/>
+            </repeat>
+            <section name="advanced_options">
+                <param name="normalization" value="true"/>
+                <param name="nbins" value="100"/>
+                <param name="lwr" value="0.25"/>
+                <param name="upr" value="0.75"/>
+            </section>
+            <output name="sleuth_table" ftype="tabular">
+                <assert_contents>
+                    <has_size value="689791" delta="100"/>
+                    <has_text text="ENST00000281092.9"/>
+                    <has_text text="ENST00000700211.1"/>
+                </assert_contents>
+            </output>
+            <output name="pca_plot" file="test01_pca.pdf" ftype="pdf" compare="sim_size"/>
+            <output name="density_plot" file="test01_density.pdf" ftype="pdf" compare="sim_size"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+**Purpose**
+
+Sleuth is a tool for the analysis and comparison of multiple related RNA-Seq experiments. Key features include:
+
+- The ability to perform both transcript-level and gene-level analysis.
+- Compatibility with kallisto enabling a fast and accurate workflow from reads to results.
+- The use of bootstraps to ascertain and correct for technical variation in experiments.
+- An interactive app for exploratory data analysis.
+
+To use sleuth, RNA-Seq data must first be quantified with kallisto, which is a program for very fast RNA-Seq quantification based on 
+pseudo-alignment. An important feature of kallisto is that it outputs bootstraps along with the estimates of transcript abundances. 
+These can serve as proxies for technical replicates, allowing for an ascertainment of the variability in estimates due to the random 
+processes underlying RNA-Seq as well as the statistical procedure of read assignment. 
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
Binary file test-data/kallisto_output_01.h5 has changed
Binary file test-data/kallisto_output_02.h5 has changed
Binary file test-data/kallisto_output_03.h5 has changed
Binary file test-data/kallisto_output_04.h5 has changed
Binary file test-data/test01_density.pdf has changed
Binary file test-data/test01_pca.pdf has changed