diff w4mclassfilter.xml @ 12:38f509903a0b draft

"planemo upload for repository https://github.com/HegemanLab/w4mclassfilter_galaxy_wrapper/tree/master commit b9712e554d16ed26f6c6d0c2e8cd74552b49f694"
author eschen42
date Tue, 01 Oct 2019 16:57:58 -0400
parents 9f5c0e23c205
children c18040b6e8b9
line wrap: on
line diff
--- a/w4mclassfilter.xml	Mon Sep 03 22:34:35 2018 -0400
+++ b/w4mclassfilter.xml	Tue Oct 01 16:57:58 2019 -0400
@@ -1,27 +1,28 @@
-<tool id="w4mclassfilter" name="W4m Data Subset" version="0.98.11">
+<tool id="w4mclassfilter" name="W4m Data Subset" version="0.98.13">
     <description>Filter W4m data by values or metadata</description>
     <!-- Here is the hyphenation standard that I *try* to apply consistently in my documentation: http://www.sandranoonan.com/dont-let-hyphenation-drive-crazy/ -->
     <requirements>
-        <!-- <requirement type="package" version="6.2">readline</requirement> -->
-        <requirement type="package" version="3.4.1">r-base</requirement>
-        <requirement type="package" version="1.1_4">r-batch</requirement>
-        <requirement type="package" version="0.98.8">w4mclassfilter</requirement>
+        <requirement type="package" version="3.6.1">r-base</requirement>
+        <requirement type="package" version="1.1_5">r-batch</requirement>
+        <requirement type="package" version="0.98.13">w4mclassfilter</requirement>
     </requirements>
     <command detect_errors="aggressive"><![CDATA[
+    unset R_HOME;
     Rscript $__tool_directory__/w4mclassfilter_wrapper.R
-    dataMatrix_in '$dataMatrix_in'
-    sampleMetadata_in '$sampleMetadata_in'
-    variableMetadata_in '$variableMetadata_in'
-    sampleclassNames '$sampleclassNames'
-    inclusive '$inclusive'
-    wildcards '$wildcards'
-    classnameColumn '$classnameColumn'
-    samplenameColumn 'sampleMetadata'
+    dataMatrix_in         '$dataMatrix_in'
+    sampleMetadata_in     '$sampleMetadata_in'
+    variableMetadata_in   '$variableMetadata_in'
+    sampleclassNames      '$sampleclassNames'
+    inclusive             '$inclusive'
+    wildcards             '$wildcards'
+    classnameColumn       '$classnameColumn'
+    samplenameColumn      'sampleMetadata'
     variable_range_filter '$variableRangeFilter'
-    transformation '$transformation'
-    dataMatrix_out '$dataMatrix_out'
-    sampleMetadata_out '$sampleMetadata_out'
-    variableMetadata_out '$variableMetadata_out'
+    transformation        '$transformation'
+    imputation            '$imputation'
+    dataMatrix_out        '$dataMatrix_out'
+    sampleMetadata_out    '$sampleMetadata_out'
+    variableMetadata_out  '$variableMetadata_out'
     ]]></command>
     <inputs>
         <param name="dataMatrix_in" format="tabular" label="Data matrix file" type="data"
@@ -95,6 +96,12 @@
             <option value="log2">log2</option>
             <option value="log10">log10</option>
         </param>
+        <param name="imputation" label="Imputation of missing values" type="select"
+            help="'zero' (the default) - replace missing values with zero; 'center' - replace missing values with feature-median; 'none' - perform no imputation">
+            <option value="zero" selected="true">zero</option>
+            <option value="center">center</option>
+            <option value="none">none</option>
+        </param>
     </inputs>
     <outputs>
         <data name="dataMatrix_out" format="tabular" label="${dataMatrix_in.name}.subset" ></data>
@@ -110,16 +117,27 @@
         <param name="classnameColumn" value="gender"/>
         <param name="sampleclassNames" value="M"/>
         <param name="wildcards" value="FALSE"/>
-        <param name="inclusive" value="filter-in"/>
-        <param name="variableRangeFilter" value="FEATMAX:6.30103:,mz:200:,rt::800"/>
-        <param name="transformation" value="log10"/>
+        <param name="inclusive" value="TRUE"/>
+        <param name="variableRangeFilter" value="FEATMAX:2e6:,mz:200:,rt::800"/>
+        <param name="transformation" value="none"/>
         <output name="dataMatrix_out">
           <assert_contents>
-            <has_text text="5.87336711011293" />
+            <has_text text="747080" />
+            <not_has_text text="13420742" />
+            <not_has_text text="47259" />
           </assert_contents>
         </output>
         <output name="sampleMetadata_out">
           <assert_contents>
+            <has_text text="HU_017" />
+            <has_text text="HU_034" />
+            <has_text text="HU_078" />
+            <has_text text="HU_091" />
+            <has_text text="HU_093" />
+            <has_text text="HU_099" />
+            <has_text text="HU_130" />
+            <has_text text="HU_134" />
+            <has_text text="HU_138" />
             <not_has_text text="HU_028" />
             <not_has_text text="HU_051" />
             <not_has_text text="HU_060" />
@@ -131,94 +149,87 @@
             <not_has_text text="HU_185" />
             <not_has_text text="HU_204" />
             <not_has_text text="HU_208" />
-            <has_text text="HU_017" />
-            <has_text text="HU_034" />
-            <has_text text="HU_078" />
-            <has_text text="HU_091" />
-            <has_text text="HU_093" />
-            <has_text text="HU_099" />
-            <has_text text="HU_130" />
-            <has_text text="HU_134" />
-            <has_text text="HU_138" />
           </assert_contents>
         </output>
         <output name="variableMetadata_out">
           <assert_contents>
+            <has_text     text="HMDB00208" />
+            <has_text     text="HMDB01032" />
+            <has_text     text="HMDB01101.1" />
+            <has_text     text="HMDB13189" />
             <not_has_text text="HMDB00191" />
-            <has_text     text="HMDB00208" />
             <not_has_text text="HMDB00251" />
             <not_has_text text="HMDB00299" />
             <not_has_text text="HMDB00512" />
             <not_has_text text="HMDB00518" />
             <not_has_text text="HMDB00715" />
             <not_has_text text="HMDB00822" />
-            <has_text     text="HMDB01032" />
-            <has_text     text="HMDB01101.1" />
             <not_has_text text="HMDB03193" />
             <not_has_text text="HMDB04824" />
             <not_has_text text="HMDB10348" />
-            <has_text     text="HMDB13189" />
             <not_has_text text="HMDB59717" />
           </assert_contents>
         </output>
       </test>
       <!-- test 2 -->
-      <test>
+	  <test>
         <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
         <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
         <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
-        <param name="classnameColumn" value="sampleMetadata"/>
         <!-- test that hyphens in regular expressions work -->
         <param name="sampleclassNames" value="HU_[0-9][0-9][0-9]"/>
+        <param name="inclusive" value="TRUE"/>
         <param name="wildcards" value="FALSE"/>
-        <param name="inclusive" value="filter-in"/>
-        <param name="variableRangeFilter" value="FEATMAX:20.93157:,mz:200:,rt::800"/>
-        <param name="transformation" value="log2"/>
-        <output name="dataMatrix_out">
+        <param name="classnameColumn" value="sampleMetadata"/>
+		<!-- test that variableRangeFilter works with tranformation -->
+		<param name="variableRangeFilter" value="FEATMAX:6.30103:,mz:200:,rt::800"/>
+        <param name="transformation" value="log10"/>
+        <param name="imputation" value="zero"/>
+        <output name="dataMatrix_out" md5="5644d2ea01d072ee1d0c40e29e9d0089">
           <assert_contents>
-            <has_text text="19.5109032146715" />
+			<has_text text="5.8733671" />
           </assert_contents>
         </output>
         <output name="sampleMetadata_out">
           <assert_contents>
+            <has_text text="HU_017" />
             <has_text text="HU_028" />
+            <has_text text="HU_034" />
             <has_text text="HU_051" />
             <has_text text="HU_060" />
+            <has_text text="HU_078" />
+            <has_text text="HU_091" />
+            <has_text text="HU_093" />
+            <has_text text="HU_099" />
             <has_text text="HU_110" />
+            <has_text text="HU_130" />
+            <has_text text="HU_134" />
+            <has_text text="HU_138" />
             <has_text text="HU_149" />
             <has_text text="HU_152" />
             <has_text text="HU_175" />
             <has_text text="HU_178" />
             <has_text text="HU_185" />
+            <has_text text="HU_208" />
             <not_has_text text="HU_204" />
-            <has_text text="HU_208" />
-            <has_text text="HU_017" />
-            <has_text text="HU_034" />
-            <has_text text="HU_078" />
-            <has_text text="HU_091" />
-            <has_text text="HU_093" />
-            <has_text text="HU_099" />
-            <has_text text="HU_130" />
-            <has_text text="HU_134" />
-            <has_text text="HU_138" />
           </assert_contents>
         </output>
         <output name="variableMetadata_out">
           <assert_contents>
             <has_text     text="HMDB00191" />
             <has_text     text="HMDB00208" />
+            <has_text     text="HMDB01032" />
+            <has_text     text="HMDB01101.1" />
+            <has_text     text="HMDB13189" />
             <not_has_text text="HMDB00251" />
             <not_has_text text="HMDB00299" />
             <not_has_text text="HMDB00512" />
             <not_has_text text="HMDB00518" />
             <not_has_text text="HMDB00715" />
             <not_has_text text="HMDB00822" />
-            <has_text     text="HMDB01032" />
-            <has_text     text="HMDB01101.1" />
             <not_has_text text="HMDB03193" />
             <not_has_text text="HMDB04824" />
             <not_has_text text="HMDB10348" />
-            <has_text     text="HMDB13189" />
             <not_has_text text="HMDB59717" />
           </assert_contents>
         </output>
@@ -230,7 +241,7 @@
         <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
         <param name="classnameColumn" value="gender"/>
         <param name="sampleclassNames" value="M"/>
-        <param name="inclusive" value="filter-in"/>
+        <param name="inclusive" value="TRUE"/>
         <param name="transformation" value="none"/>
         <output name="dataMatrix_out">
           <assert_contents>
@@ -281,7 +292,13 @@
         <param name="classnameColumn" value="gender"/>
         <param name="sampleclassNames" value="*"/>
         <param name="wildcards" value="TRUE"/>
-        <param name="inclusive" value="filter-in"/>
+        <param name="inclusive" value="TRUE"/>
+        <param name="imputation" value="zero"/>
+        <output name="dataMatrix_out" md5="b2eac4946d3803a07606286b50451af4">
+          <assert_contents>
+            <not_has_text text="NA" />
+          </assert_contents>
+        </output>
         <output name="sampleMetadata_out">
           <assert_contents>
             <not_has_text text="HU_204" />
@@ -315,7 +332,7 @@
         <param name="classnameColumn" value="gender"/>
         <param name="sampleclassNames" value="M"/>
         <param name="wildcards" value="FALSE"/>
-        <param name="inclusive" value="filter-in"/>
+        <param name="inclusive" value="TRUE"/>
         <output name="sampleMetadata_out">
           <assert_contents>
             <not_has_text text="HU_028" />
@@ -349,7 +366,7 @@
         <param name="classnameColumn" value="gender"/>
         <param name="sampleclassNames" value="M"/>
         <param name="wildcards" value="FALSE"/>
-        <param name="inclusive" value="filter-in"/>
+        <param name="inclusive" value="TRUE"/>
         <output name="variableMetadata_out">
           <assert_contents>
             <has_text     text="HMDB03193" />
@@ -379,7 +396,7 @@
         <param name="classnameColumn" value="gender"/>
         <param name="sampleclassNames" value="M"/>
         <param name="wildcards" value="FALSE"/>
-        <param name="inclusive" value="filter-in"/>
+        <param name="inclusive" value="TRUE"/>
         <output name="variableMetadata_out">
           <assert_contents>
             <has_text     text="HMDB03193" />
@@ -409,7 +426,7 @@
         <param name="classnameColumn" value="gender"/>
         <param name="sampleclassNames" value="[Mm],[fF]"/>
         <param name="wildcards" value="FALSE"/>
-        <param name="inclusive" value="filter-in"/>
+        <param name="inclusive" value="TRUE"/>
         <output name="sampleMetadata_out">
           <assert_contents>
             <has_text text="HU_028" />
@@ -443,7 +460,7 @@
         <param name="classnameColumn" value=""/>
         <param name="sampleclassNames" value="M"/>
         <param name="wildcards" value="FALSE"/>
-        <param name="inclusive" value="filter-in"/>
+        <param name="inclusive" value="TRUE"/>
         <output name="sampleMetadata_out">
           <assert_contents>
             <has_text text="HU_028" />
@@ -469,6 +486,54 @@
           </assert_contents>
         </output>
       </test>
+      <!-- test 10 - extends test4 with no imputation rather than zero imputation -->
+      <test>
+        <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
+        <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
+        <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
+        <param name="classnameColumn" value="gender"/>
+        <param name="sampleclassNames" value="*"/>
+        <param name="wildcards" value="TRUE"/>
+        <param name="inclusive" value="TRUE"/>
+        <param name="imputation" value="none"/>
+        <output name="dataMatrix_out" md5="cc9ab8bdb70b68b43b19b7327d285166">
+          <assert_contents>
+            <not_has_text text="HU_204" />
+            <has_text text="NA" />
+            <has_text text="HU_028" />
+          </assert_contents>
+        </output>
+        <output name="sampleMetadata_out">
+          <assert_contents>
+            <not_has_text text="HU_204" />
+            <has_text text="HU_028" />
+          </assert_contents>
+        </output>
+      </test>
+      <!-- test 11 - extends test4 with center imputation rather than zero imputation -->
+      <test>
+        <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
+        <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
+        <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
+        <param name="classnameColumn" value="gender"/>
+        <param name="sampleclassNames" value="*"/>
+        <param name="wildcards" value="TRUE"/>
+        <param name="inclusive" value="TRUE"/>
+        <param name="imputation" value="center"/>
+        <output name="dataMatrix_out" md5="75a4802bb8887709e4d4dec8c2c3d3cf">
+          <assert_contents>
+            <not_has_text text="HU_204" />
+            <not_has_text text="NA" />
+            <has_text text="HU_028" />
+          </assert_contents>
+        </output>
+        <output name="sampleMetadata_out">
+          <assert_contents>
+            <not_has_text text="HU_204" />
+            <has_text text="HU_028" />
+          </assert_contents>
+        </output>
+      </test>
     </tests>
     <help><![CDATA[
 
@@ -531,13 +596,13 @@
 
 This tool also performs several operations to address several data issues that may impede downstream statistical analysis:
 
-- Missing values in dataMatrix are imputed to zero.
-- The values in the dataMatrix may be log-transformed if desired.
 - Samples that are missing from either sampleMetadata or dataMatrix are eliminated.
 - Features that are missing from either variableMetadata or dataMatrix are eliminated.
 - Features and samples that have zero variance are eliminated.
 - Samples and features are sorted alphabetically in rows and columns of dataMatrix and in rows of variableMetadata and sampleMetadata.
 - The names of the first columns of variableMetadata and sampleMetadata are set respectively to "variableMetadata" and "sampleMetadata".
+- If desired, the values in the dataMatrix may be log-transformed.
+- If desired, each missing value in dataMatrix is replaced with zero or the median value observed for the corresponding feature.
 
 This tool may be applied several times sequentially, which may be useful for:
 
@@ -607,10 +672,15 @@
 	|
 
 Data-transformation (default = '``none``')
-	| '``none``' - do not transform data matrix values
-	| '``log2``' - take the log base 2 of the values in the data matrix
-	| '``log10``' - take the log base 10 of the values in the data matrix
-	| In both cases, negative and missing values are imputed to zero.
+	| '``none``' - Do not transform data matrix values.
+	| '``log2``' - Take the log base 2 of the values in the data matrix.
+	| '``log10``' - Take the log base 10 of the values in the data matrix.
+	|
+
+Data-imputation (default = '``zero``')
+	| '``none``' - Do not impute data matrix values.
+	| '``zero``' - Negative and missing values are imputed to zero.
+	| '``center``' - For each feature, negative and missing values are imputed to the median of other values.
 	|
 
 
@@ -708,14 +778,22 @@
 
   - Note, however, that when the "maximum" is greater than the "minimum" for the FEATMAX range specification, then the specification is ignored.
 
+----------------------------------
+Data transformation and imputation
+----------------------------------
+
+Data may optionally be log2- or log10-transformed.
+
+Negative intensity values are always substituted with zeros.
+
+Missing intensity data values may optionally be imputed.  Missing values may be substituted with zeros (as may be appropriate for univariate analysis) or with the median for the feature (as may be appropriate for multivariate analysis).  (Note that the median feature-intensity is computed for the samples *before* variable-range filters are applied.)
+
 -----------------------------------------------------------------------------
 
 ----------------
 WORKING EXAMPLES
 ----------------
 
-.. class:: infomark
-
 -----------
 Input Files
 -----------
@@ -730,8 +808,6 @@
 | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/input_variableMetadata.tsv          |
 +------------------------------------------------------------------------------------------------------------------------------------------------------+
 
-.. class:: infomark
-
 -------------------------------
 Running Without Range-Filtering
 -------------------------------
@@ -740,19 +816,23 @@
 
 **Input parameters**
 
-+------------------------------------+-------------------------------+
-| Input Parameter                    | Value                         |
-+====================================+===============================+
-| Names of sample-classes            | M                             |
-+------------------------------------+-------------------------------+
-| Include named classes              | filter-in                     |
-+------------------------------------+-------------------------------+
-| Column that names the sample-class | gender                        |
-+------------------------------------+-------------------------------+
-| Variable range-filters             | (Leave this field empty.)     |
-+------------------------------------+-------------------------------+
-| Data transforamtion                | none                          |
-+------------------------------------+-------------------------------+
++---------------------------------------------+-------------------------------+
+| Input Parameter                             | Value                         |
++=============================================+===============================+
+| Column that names the sample-class          | gender                        |
++---------------------------------------------+-------------------------------+
+| Names of sample-classes                     | M                             |
++---------------------------------------------+-------------------------------+
+| Use 'wild-cards' or 'regular expressions'   | wild-cards                    |
++---------------------------------------------+-------------------------------+
+| Exclude/include named classes               | filter-in                     |
++---------------------------------------------+-------------------------------+
+| Variable range-filters                      | (Leave this field empty.)     |
++---------------------------------------------+-------------------------------+
+| Data transforamtion                         | none                          |
++---------------------------------------------+-------------------------------+
+| Missing-value imputation                    | center                        |
++---------------------------------------------+-------------------------------+
 
 **Expected outputs**
 
@@ -766,8 +846,6 @@
 | Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/expected_variableMetadata.tsv  |
 +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+
 
-.. class:: infomark
-
 ----------------------------
 Running With Range-Filtering
 ----------------------------
@@ -777,19 +855,23 @@
 
 **Input parameters**
 
-+------------------------------------+------------------------------------+
-| Input Parameter                    | Value                              |
-+====================================+====================================+
-| Names of sample-classes            | (Leave this field empty.)          |
-+------------------------------------+------------------------------------+
-| Include named classes              | filter-out                         |
-+------------------------------------+------------------------------------+
-| Column that names the sample-class | gender                             |
-+------------------------------------+------------------------------------+
-| Variable range-filters             | FEATMAX:20.93157:,mz:200:,rt::800  |
-+------------------------------------+------------------------------------+
-| Data transforamtion                | log2                               |
-+------------------------------------+------------------------------------+
++---------------------------------------------+-----------------------------------+
+| Input Parameter                             | Value                             |
++=============================================+===================================+
+| Column that names the sample-class          | sampleMetadata                    |
++---------------------------------------------+-----------------------------------+
+| Names of sample-classes                     | HU_13[48]                         |
++---------------------------------------------+-----------------------------------+
+| Use 'wild-cards' or 'regular expressions'   | regular-expressions               |
++---------------------------------------------+-----------------------------------+
+| Exclude/include named classes               | filter-out                        |
++---------------------------------------------+-----------------------------------+
+| Variable range-filters                      | FEATMAX:20.93157:,mz:200:,rt::800 |
++---------------------------------------------+-----------------------------------+
+| Data transforamtion                         | log2                              |
++---------------------------------------------+-----------------------------------+
+| Missing-value imputation                    | zero                              |
++---------------------------------------------+-----------------------------------+
 
 **Expected outputs**