view w4mclassfilter.xml @ 3:191a720488ce draft

planemo upload for repository https://github.com/HegemanLab/w4mclassfilter_galaxy_wrapper/tree/master commit dacc1bae8e60340a431907fe8a8b490357e1bfc0
author eschen42
date Thu, 11 May 2017 00:05:51 -0400
parents 23c6d271def9
children 499c7ecfa834
line wrap: on
line source

<tool id="w4mclassfilter" name="Sample_Subset" version="0.98.1">
  <description>Filter W4M data by sample class</description>

  <requirements>
    <requirement type="package" version="3.3.1">r-base</requirement>
    <requirement type="package" version="1.1_4">r-batch</requirement>
    <requirement type="package" version="0.98.1">w4mclassfilter</requirement>
  </requirements>

  <stdio>
    <exit_code range="1:" level="fatal" />
  </stdio>


  <command detect_errors="aggressive"><![CDATA[
  Rscript $__tool_directory__/w4mclassfilter_wrapper.R
  dataMatrix_in "$dataMatrix_in"
  sampleMetadata_in "$sampleMetadata_in"
  variableMetadata_in "$variableMetadata_in"
  sampleclassNames "$sampleclassNames"
  inclusive "$inclusive"
  classnameColumn "$classnameColumn"
  samplenameColumn "$samplenameColumn"
  dataMatrix_out "$dataMatrix_out"
  sampleMetadata_out "$sampleMetadata_out"
  variableMetadata_out "$variableMetadata_out"
  ]]></command>

  <inputs>
    <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: '.', missing: NA, mode: numerical, separator: tab" />
    <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" />
    <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata columns, separator: tab" />
    <param name="sampleclassNames" label="Names of sample classes" type="text" value = "" help="comma-separated names of sample classes to filter in or out; defaults to no names" />
    <param name="inclusive" label="Include named classes" type="select" help="filter-in - include only the named sample classes; filter-out (the default) - exclude only the named sample classes">
      <option value="TRUE">filter-in</option>
      <option value="FALSE" selected="true">filter-out</option>
    </param>
    <param name="classnameColumn" label="Column that names the sample-class" type="text" value = "class" help="name of the column in sample metadata that has the values to be tested against the 'classes' input parameter - defaults to 'class'" />
    <param name="samplenameColumn" label="Column that names the sample" type="text" value = "sampleMetadata" help="name of the column in sample metadata that has the name of the sample - defaults to 'sampleMetadata'" />
  </inputs>
  <outputs>
    <data name="dataMatrix_out" label="${tool.name}_${dataMatrix_in.name}" format="tabular" ></data>
    <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>
    <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>
  </outputs>

  <tests>
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="classnameColumn" value="gender"/>
      <param name="sampleclassNames" value="M"/>
      <param name="samplenameColumn" value="sampleMetadata"/>
      <param name="inclusive" value="filter-in"/>
      <output name="dataMatrix_out">
        <assert_contents>
          <not_has_text text="HU_028" />
          <not_has_text text="HU_051" />
          <not_has_text text="HU_060" />
          <not_has_text text="HU_110" />
          <not_has_text text="HU_149" />
          <not_has_text text="HU_152" />
          <not_has_text text="HU_175" />
          <not_has_text text="HU_178" />
          <not_has_text text="HU_185" />
          <not_has_text text="HU_204" />
          <not_has_text text="HU_208" />
          <has_text     text="HU_017" />
          <has_text     text="HU_034" />
          <has_text     text="HU_078" />
          <has_text     text="HU_091" />
          <has_text     text="HU_093" />
          <has_text     text="HU_099" />
          <has_text     text="HU_130" />
          <has_text     text="HU_134" />
          <has_text     text="HU_138" />
          <has_text     text="HMDB03193" />
          <not_has_text text="HMDB00822" />
          <has_text     text="HMDB01101" />
          <has_text     text="HMDB01101.1" />
          <has_text     text="HMDB10348" />
          <has_text     text="HMDB59717" />
          <has_text     text="HMDB13189" />
          <has_text     text="HMDB00299" />
          <has_text     text="HMDB00191" />
          <has_text     text="HMDB00518" />
          <has_text     text="HMDB00715" />
          <has_text     text="HMDB01032" />
          <has_text     text="HMDB00208" />
          <has_text     text="HMDB04824" />
          <has_text     text="HMDB00512" />
          <has_text     text="HMDB00251" />
        </assert_contents>
      </output>
    </test>
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="classnameColumn" value="gender"/>
      <param name="sampleclassNames" value="M"/>
      <param name="samplenameColumn" value="sampleMetadata"/>
      <param name="inclusive" value="filter-in"/>
      <output name="sampleMetadata_out">
        <assert_contents>
          <not_has_text text="HU_028" />
          <not_has_text text="HU_051" />
          <not_has_text text="HU_060" />
          <not_has_text text="HU_110" />
          <not_has_text text="HU_149" />
          <not_has_text text="HU_152" />
          <not_has_text text="HU_175" />
          <not_has_text text="HU_178" />
          <not_has_text text="HU_185" />
          <not_has_text text="HU_204" />
          <not_has_text text="HU_208" />
          <has_text     text="HU_017" />
          <has_text     text="HU_034" />
          <has_text     text="HU_078" />
          <has_text     text="HU_091" />
          <has_text     text="HU_093" />
          <has_text     text="HU_099" />
          <has_text     text="HU_130" />
          <has_text     text="HU_134" />
          <has_text     text="HU_138" />
        </assert_contents>
      </output>
    </test>
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="classnameColumn" value="gender"/>
      <param name="sampleclassNames" value="M"/>
      <param name="samplenameColumn" value="sampleMetadata"/>
      <param name="inclusive" value="filter-in"/>
      <output name="variableMetadata_out">
        <assert_contents>
          <has_text     text="HMDB03193" />
          <not_has_text text="HMDB00822" />
          <has_text     text="HMDB01101" />
          <has_text     text="HMDB01101.1" />
          <has_text     text="HMDB10348" />
          <has_text     text="HMDB59717" />
          <has_text     text="HMDB13189" />
          <has_text     text="HMDB00299" />
          <has_text     text="HMDB00191" />
          <has_text     text="HMDB00518" />
          <has_text     text="HMDB00715" />
          <has_text     text="HMDB01032" />
          <has_text     text="HMDB00208" />
          <has_text     text="HMDB04824" />
          <has_text     text="HMDB00512" />
          <has_text     text="HMDB00251" />
        </assert_contents>
      </output>
    </test>
  </tests>



	<help>
		<![CDATA[

.. class:: infomark

**Author**	Arthur Eschenlauer (University of Minnesota, esch0041@umn.edu)

--------------------------------------------------------------------------

.. class:: infomark

**R package**

The *w4mclassfilter* package is available from the Hegeman lab github repository (https://github.com/HegemanLab/w4mclassfilter/releases).

-----------------------------------------------------------------------------------------------------------------------------------------

.. class:: infomark

**Tool updates**

See the **NEWS** section at the bottom of this page

---------------------------------------------------

==============================================
Filter Workflow4Metabolomics data matrix files
==============================================

-----------
Description
-----------

Filter a set of retention-corrected W4M files (dataMatrix, sampleMetadata, variableMetadata) by sample-class

-----------------
Workflow Position
-----------------

  - Upstream tool category: Preprocessing
  - Downstream tool categories: Normalisation, Statistical Analysis, Quality Control

----------
Motivation
----------

GC-MS1 and LC-MS1 experiments seek to resolve chemicals as features that have distinct chromatographic behavior and (after ionization) mass-to-charge ratio. 
Data for a sample are collected as MS intensities, each of which is associated with a position on a 2D plane with dimensions of m/z ratio and chromatographic retention time.
Ideally, features would be sufficiently reproducible from sample-run to sample-run to identify features that are commmon among samples and those that differ. 
However, the chromatographic retention time for a chemical can vary from one run to another.
In the Workflow4Metabolomics (W4M, [Giacomoni *et al.*, 2014]) "flavor" of Galaxy, the XCMS [Smith *et al.*, 2006] preprocessing tools provide for "retention time correction" to align features among samples, but features may be better aligned if pooled samples and blanks are included.

Multivariate statistical techniques may be used to discover clusters of similar samples, and sometimes it is desirable to apply clustering iteratively to smaller and smaller subsets of samples until observable separation of clusters is no longer significant.
Once feature-alignment has been achieved among samples in GC-MS and LC-MS datasets, however, the presence of pools and blanks may confound identification and separation of clusters.
Multivariate statistical algorithms also may be impacted by missing values or dimensions that have zero variance.

The w4mclassfilter tool provides a way to choose subsets of samples for further analysis.
The tool takes as input the data matrix, sample metadata, and variable metadata Galaxy datasets produced by W4M and produces the same trio of datasets with data only for the selected samples.
The tool uses a "sample-class" column in the sample metadata as the basis for including or eliminating samples for further analysis.
Class-values to be considered are provided by the user as a comma-separated list.
The user also provides an indication whether the list specifies classes to be included in further analysis ("filter-in") or rather to be excluded from it ("filter-out").
Next, missing and negative intensites for features of the remaining samples are imputed to zero.
Finally, samples or features with zero variance are eliminated.


-----------
Input files
-----------

+---------------------------+------------+
| File                      |   Format   |
+===========================+============+
|     Data matrix           |   tabular  |
+---------------------------+------------+
|     Sample metadata       |   tabular  |
+---------------------------+------------+
|     Variable metadata     |   tabular  |
+---------------------------+------------+


----------
Parameters
----------

Data matrix file
	| variable x sample **dataMatrix** (tabular separated values) file of the numeric data matrix, with . as decimal, and NA for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the sample and variable metadata, respectively (see below)
	|

Sample metadata file
	| sample x metadata **sampleMetadata** (tabular separated values) file of the numeric and/or character sample metadata, with . as decimal and NA for missing values
	|

Variable metadata file
	| variable x metadata **variableMetadata** (tabular separated values) file of the numeric and/or character variable metadata, with . as decimal and NA for missing values
	|

Names of sample classes (default = no names)
	| comma-separated names of sample classes to include or exclude
	|

Include named classes (default = filter-out)
	| *filter-in* - include only the named sample classes
	| *filter-out* - exclude only the named sample classes
	|


Column that names the sample-class (default = 'class')
	| name of the column in sample metadata that has the values to be tested against the 'classes' input parameter
	|

Column that names the sample (default = 'sampleMetadata')
	| name of the column in sample metadata that has the name of the sample
	|


------------
Output files
------------


sampleMetadata
	| (tabular separated values) file identical to the **sampleMetadata** file given as an input argument, excepting lacking rows for samples (xC-MS features) that have been filtered out (by the sample-class filter or because of zero variance)
	|

variableMetadata
	| (tabular separated values) file identical to the **variableMetadata** file given as an input argument, excepting lacking rows for variables (xC-MS features) that have been filtered out (because of zero variance)
	|

dataMatrix
	| (tabular separated values) file identical to the **dataMatrix** file given as an input argument, excepting lacking rows for variables (xC-MS features) that have been filtered out (because of zero variance) and columns that have been filtered out (by the sample-class filter or because of zero variance)
	|


---------------
Working example
---------------

.. class:: infomark

**Input files**

+-------------------+--------------------------------------------------------------------------------------------------------------+
| Input File        | Download from URL                                                                                            |
+===================+==============================================================================================================+
| Data matrix       | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/input_dataMatrix.tsv       |
+-------------------+--------------------------------------------------------------------------------------------------------------+
| Sample metadata   | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/input_sampleMetadata.tsv   |
+-------------------+--------------------------------------------------------------------------------------------------------------+
| Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/input_variableMetadata.tsv |
+-------------------+--------------------------------------------------------------------------------------------------------------+

.. class:: infomark

**Other input parameters**

+------------------------------------+-----------------+
| Input Parameter                    | Value           |
+====================================+=================+
| Names of sample classes            | M               |
+------------------------------------+-----------------+
| Include named classes              | filter-in       |
+------------------------------------+-----------------+
| Column that names the sample-class | gender          |
+------------------------------------+-----------------+
| Column that names the sample       | sampleMetadata  |
+------------------------------------+-----------------+

.. class:: infomark

**Expected outputs**

+-------------------+-----------------------------------------------------------------------------------------------------------------+
| Expected Output   | Download from URL                                                                                               |
+===================+=================================================================================================================+
| Data matrix       | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/expected_dataMatrix.tsv       |
+-------------------+-----------------------------------------------------------------------------------------------------------------+
| Sample metadata   | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/expected_sampleMetadata.tsv   |
+-------------------+-----------------------------------------------------------------------------------------------------------------+
| Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/expected_variableMetadata.tsv |
+-------------------+-----------------------------------------------------------------------------------------------------------------+


-----------------------------------------------------------------------------

----
NEWS
----

CHANGES IN VERSION 0.98.1
=========================

NEW FEATURES

First release - Wrap the w4mclassfilter R package that implements filtering of W4M data matrix, variable metadata, and sample metadata by class of sample.

*dataMatrix* *is* modified by the tool, so it *does* appear as an output file
*sampleMetadata* *is* modified by the tool, so it *does* appear as an output file
*variableMetadata* *is* modified by the tool, so it *does* appear as an output file

INTERNAL MODIFICATIONS

none

    ]]>
  </help>
  <citations>
    <citation type="doi">10.1021/ac051437y</citation>
    <citation type="doi">10.1093/bioinformatics/btu813</citation>
  </citations>
  <!--
     vim:noet:sw=4:ts=4
--> </tool>