Mercurial > repos > ethevenot > multivariate
changeset 4:5526f8258e8a draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/multivariate.git commit 0f382a5296aae9bfc77df06b0a5ad493eb3c01f3
| author | ethevenot | 
|---|---|
| date | Wed, 28 Feb 2018 09:59:25 -0500 | 
| parents | e91de3b04320 | 
| children | |
| files | Makefile build.xml multivariate_config.xml multivariate_wrapper.R runit/NA runit/output/figure.pdf runit/output/information.txt test-data/output-sampleMetadata.tsv test/.gitignore test/test-multi | 
| diffstat | 10 files changed, 168 insertions(+), 94 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Makefile Wed Feb 28 09:59:25 2018 -0500 @@ -0,0 +1,23 @@ +all: + +test: + test/test-multi + +planemo-venv/bin/planemo: planemo-venv + . planemo-venv/bin/activate && pip install --upgrade pip setuptools + . planemo-venv/bin/activate && pip install planemo + +planemo-venv: + virtualenv planemo-venv + +planemolint: planemo-venv/bin/planemo + . planemo-venv/bin/activate && planemo lint + +planemotest: planemo-venv/bin/planemo + . planemo-venv/bin/activate && planemo test --conda_dependency_resolution --galaxy_branch release_17.05 + +clean: + $(RM) -r $(HOME)/.planemo + $(RM) -r planemo-venv + +.PHONY: all clean test planemolint planemotest
--- a/build.xml Sat Oct 22 03:02:47 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,78 +0,0 @@ -<project name="multivariate" default="all"> - - <property name="tool.xml" value="multivariate_config.xml"/> - <property name="conda.dir" value="${user.home}/w4m-conda"/> - - <!--~~~ - ~ ALL ~ - ~~~~~--> - - <target name="all"/> - - <!--~~~~ - ~ TEST ~ - ~~~~~--> - - <target name="test" depends="planemo.lint,planemo.test"/> - - <!--~~~~~~~~~~~~ - ~ PLANEMO LINT ~ - ~~~~~~~~~~~~~--> - - <target name="planemo.lint"> - <exec executable="planemo" failonerror="true"> - <arg value="lint"/> - <arg value="${tool.xml}"/> - </exec> - </target> - - <!--~~~~~~~~~~~~ - ~ PLANEMO TEST ~ - ~~~~~~~~~~~~~--> - - <target name="planemo.test" depends="planemo.conda.install"> - <exec executable="planemo" failonerror="true"> - <arg value="test"/> - <arg value="--conda_prefix"/> - <arg value="${conda.dir}"/> - <arg value="--galaxy_branch"/> - <arg value="release_16.07"/> - <arg value="--conda_dependency_resolution"/> - <arg value="${tool.xml}"/> - </exec> - </target> - - <!--~~~~~~~~~~~~~~~~~~~~~ - ~ PLANEMO CONDA INSTALL ~ - ~~~~~~~~~~~~~~~~~~~~~~--> - - <target name="planemo.conda.install" depends="planemo.conda.init"> - <exec executable="planemo" failonerror="true"> - <arg value="conda_install"/> - <arg value="--conda_prefix"/> - <arg value="${conda.dir}"/> - <arg value="${tool.xml}"/> - </exec> - </target> - - <!--~~~~~~~~~~~~~~~~~~ - ~ PLANEMO CONDA INIT ~ - ~~~~~~~~~~~~~~~~~~~--> - - <target name="planemo.conda.init"> - <exec executable="planemo" failonerror="true"> - <arg value="conda_init"/> - <arg value="--conda_prefix"/> - <arg value="${conda.dir}"/> - </exec> - </target> - - <!--~~~~~ - ~ CLEAN ~ - ~~~~~~--> - - <target name="clean"> - <delete dir="${conda.dir}"/> - </target> - -</project>
--- a/multivariate_config.xml Sat Oct 22 03:02:47 2016 -0400 +++ b/multivariate_config.xml Wed Feb 28 09:59:25 2018 -0500 @@ -1,10 +1,9 @@ -<tool id="Multivariate" name="Multivariate" version="2.3.8"> +<tool id="Multivariate" name="Multivariate" version="2.3.10"> <description>PCA, PLS and OPLS</description> <requirements> - <requirement type="package" version="3.3.1">R</requirement> - <requirement type="package">r-batch</requirement> - <requirement type="package">bioconductor-ropls</requirement> + <requirement type="package" version="1.1_4">r-batch</requirement> + <requirement type="package" version="1.10.0">bioconductor-ropls</requirement> </requirements> <stdio> @@ -45,6 +44,10 @@ variableMetadata_out "$variableMetadata_out" figure "$figure" information "$information" + + #if $save_rdata: + ropls_out "$ropls_out" + #end if ]]></command> <inputs> @@ -181,14 +184,19 @@ </when> </conditional> - + <param name="save_rdata" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Save RData in your history" + help="Save the R ropls::opls object in your history for ad hoc analysis and graphing (outside Galaxy, this requires package 'bioconductor-ropls')"/> </inputs> <outputs> <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data> <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data> - <data name="figure" label="${tool.name}__figure.pdf" format="pdf"/> - <data name="information" label="${tool.name}__information.txt" format="txt"/> + <data name="figure" label="${tool.name}_figure.pdf" format="pdf"/> + <data name="information" label="${tool.name}_information.txt" format="txt"/> + <!-- how would an ropls.rdata datatype be added if that were the desired choice here instead? Specifically, could it be created as part of tool installation? --> + <data name="ropls_out" label="${tool.name}_${dataMatrix_in.name}.RData" format="rdata"> + <filter>save_rdata</filter> + </data> </outputs> <tests>
--- a/multivariate_wrapper.R Sat Oct 22 03:02:47 2016 -0400 +++ b/multivariate_wrapper.R Wed Feb 28 09:59:25 2018 -0500 @@ -2,6 +2,41 @@ library(batch) ## parseCommandArgs +# Constants +argv <- commandArgs(trailingOnly = FALSE) +script.path <- sub("--file=","",argv[grep("--file=",argv)]) +prog.name <- basename(script.path) + +# Print help +if (length(grep('-h', argv)) >0) { + cat("Usage:", prog.name, + "dataMatrix_in myDataMatrix.tsv", + "sampleMetadata_in mySampleData.tsv", + "variableMetadata_in myVariableMetadata.tsv", + "respC ...", + "predI ...", + "orthoI ...", + "testL ...", + "typeC ...", + "parAsColC ...", + "parCexN ...", + "parPc1I ...", + "parPc2I ...", + "parMahalC ...", + "parLabVc ...", + "algoC ...", + "crossvalI ...", + "log10L ...", + "permI ...", + "scaleC ...", + "sampleMetadata_out mySampleMetadata_out.tsv", + "variableMetadata_out myVariableMetadata_out.tsv", + "figure figure.pdf", + "information information.txt", + "\n") + quit(status = 0) +} + ######## # MAIN # ######## @@ -45,7 +80,7 @@ if(!tesL) { - sink(NULL) + sink() stpTxtC <- ifelse(is.na(txtC), paste0(tesC, " is FALSE"), txtC) @@ -74,20 +109,23 @@ check.names = FALSE, header = TRUE, row.names = 1, - sep = "\t"))) + sep = "\t", + comment.char = ""))) samDF <- read.table(argVc["sampleMetadata_in"], check.names = FALSE, header = TRUE, row.names = 1, - sep = "\t") + sep = "\t", + comment.char = "") flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section") varDF <- read.table(argVc["variableMetadata_in"], check.names = FALSE, header = TRUE, row.names = 1, - sep = "\t") + sep = "\t", + comment.char = "") flgF("identical(colnames(xMN), rownames(varDF))", txtC = "Variable names (or number) in the data matrix (first column) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section") flgF("argVc['respC'] == 'none' || (argVc['respC'] %in% colnames(samDF))", @@ -427,8 +465,8 @@ sep = "\t") # Output ropLs -if ( ! is.null(argVc['ropls_out'])) - save(ropLs, file = argVc['ropls_out']) +if (!is.null(argVc['ropls_out']) && !is.na(argVc['ropls_out'])) + save(ropLs, file = argVc['ropls_out']) ## Closing ##-------- @@ -436,6 +474,21 @@ cat("\nEnd of '", modNamC, "' Galaxy module call: ", as.character(Sys.time()), "\n", sep = "") +cat("\n\n\n============================================================================") +cat("\nAdditional information about the call:\n") +cat("\n1) Parameters:\n") +print(cbind(value = argVc)) + +cat("\n2) Session Info:\n") +sessioninfo <- sessionInfo() +cat(sessioninfo$R.version$version.string,"\n") +cat("Main packages:\n") +for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") +cat("Other loaded packages:\n") +for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") + +cat("============================================================================\n") + sink() options(stringsAsFactors = strAsFacL)
--- a/runit/output/information.txt Sat Oct 22 03:02:47 2016 -0400 +++ b/runit/output/information.txt Wed Feb 28 09:59:25 2018 -0500 @@ -1,5 +1,5 @@ -Start of the 'Multivariate' Galaxy module call: Fri 05 Aug 2016 07:03:07 PM +Start of the 'Multivariate' Galaxy module call: Mon 08 Jan 2018 05:24:55 PM PLS-DA @@ -11,4 +11,34 @@ p1 0.0505 0.0505 0.664 0.664 0.4220 0.422 R1 1 p2 0.0678 0.1180 0.105 0.769 -0.0431 0.397 NS 1 -End of 'Multivariate' Galaxy module call: 2016-08-05 19:03:07 +End of 'Multivariate' Galaxy module call: 2018-01-08 17:24:56 + + + +============================================================================ +Additional information about the call: + +1) Parameters: + value +dataMatrix_in "./sacurineTest/dataMatrix.tsv" +sampleMetadata_in "./sacurineTest/sampleMetadata.tsv" +variableMetadata_in "./sacurineTest/variableMetadata.tsv" +sampleMetadata_out "./output/sampleMetadata.tsv" +variableMetadata_out "./output/variableMetadata.tsv" +figure "./output/figure.pdf" +information "./output/information.txt" +respC "gender" +predI "2" +orthoI "0" +testL "TRUE" +parAsColC "none" +parMahalC "gender" +parLabVc "none" + +2) Session Info: +R version 3.3.1 (2016-06-21) +Main packages: +ropls 1.6.2 batch 1.1.4 +Other loaded packages: +parallel 3.3.1 Biobase 2.34.0 methods 3.3.1 BiocGenerics 0.20.0 +============================================================================
--- a/test-data/output-sampleMetadata.tsv Sat Oct 22 03:02:47 2016 -0400 +++ b/test-data/output-sampleMetadata.tsv Wed Feb 28 09:59:25 2018 -0500 @@ -1,5 +1,5 @@ sampleMetadata injectionOrder mode age bmi gender age_OPLS_XSCOR-h1 age_OPLS_XSCOR-o1 age_OPLS_predictions -HU_017 2 pos 41 23.03 M -0.429194614454735 2.37235221029276 38.3893561920685 +HU_017 2 pos 41 23.03 M -0.429194614454734 2.37235221029276 38.3893561920685 HU_028 7 pos 41 23.92 F -0.043550585350191 -0.997753656394117 40.9597290876492 HU_034 9 pos 52 23.37 M 0.470935190870015 1.68258805852438 44.3888507505175 HU_051 20 pos 24 23.23 F -0.30848953411015 3.52230161266618 39.1938729055699
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/.gitignore Wed Feb 28 09:59:25 2018 -0500 @@ -0,0 +1,1 @@ +*.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/test-multi Wed Feb 28 09:59:25 2018 -0500 @@ -0,0 +1,37 @@ +#!/bin/bash + +# Constants {{{1 +################################################################ + +PROG_PATH=$(dirname $0) + +# MAIN {{{1 +################################################################ + +$PROG_PATH/../multivariate_wrapper.R dataMatrix_in $PROG_PATH/../test-data/input-dataMatrix.tsv sampleMetadata_in $PROG_PATH/../test-data/input-sampleMetadata.tsv variableMetadata_in $PROG_PATH/../test-data/input-variableMetadata.tsv respC age predI 1 orthoI 1 testL FALSE sampleMetadata_out $PROG_PATH/outputSampleMetadata.tsv variableMetadata_out $PROG_PATH/outputVariableMetadata.tsv + +# Computed numbers are not always the same. We cannot use diff directly on files. + +# 0) Check they have the same number of lines +nlines=$(wc -l <"$PROG_PATH/outputSampleMetadata.tsv") +ref_nlines=$(wc -l <"$PROG_PATH/../test-data/output-sampleMetadata.tsv") +if [ $nlines != $ref_nlines ] ; then + echo "Incorrect output sample metadata." >&2 + exit 1 +fi + +# 1) We check the header line +head -n 1 "$PROG_PATH/outputSampleMetadata.tsv" > "$PROG_PATH/header.tsv" +head -n 1 "$PROG_PATH/../test-data/output-sampleMetadata.tsv" > "$PROG_PATH/ref_header.tsv" +if ! diff "$PROG_PATH/header.tsv" "$PROG_PATH/ref_header.tsv" ; then + echo "Incorrect output sample metadata." >&2 + exit 1 +fi + +# 2) We remove computed values and compare what's left +awk 'BEGIN{FS=OFS="\t"} {$7=$8=$9="";sub("\t\t","\t")}1' "$PROG_PATH/outputSampleMetadata.tsv" > "$PROG_PATH/nocomput.tsv" +awk 'BEGIN{FS=OFS="\t"} {$7=$8=$9="";sub("\t\t","\t")}1' "$PROG_PATH/../test-data/output-sampleMetadata.tsv" > "$PROG_PATH/ref_nocomput.tsv" +if ! diff "$PROG_PATH/nocomput.tsv" "$PROG_PATH/ref_nocomput.tsv" ; then + echo "Incorrect output sample metadata." >&2 + exit 1 +fi
