diff filter_kw_val.xml @ 5:33ca9ba2495a draft

planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
author proteore
date Tue, 05 Mar 2019 07:37:10 -0500
parents 2080e2a4f209
children b4641c0f8a82
line wrap: on
line diff
--- a/filter_kw_val.xml	Tue Feb 05 08:22:47 2019 -0500
+++ b/filter_kw_val.xml	Tue Mar 05 07:37:10 2019 -0500
@@ -1,4 +1,4 @@
-<tool id="MQoutputfilter" name="Filter by keywords and/or numerical value" version="2019.02.05">
+<tool id="MQoutputfilter" name="Filter by keywords and/or numerical value" version="2019.03.05">
     <description></description>
     <requirements>
     </requirements>
@@ -8,8 +8,9 @@
     <command><![CDATA[
         python $__tool_directory__/filter_kw_val.py
         -i "$input1,$header"
-        -o "$output1"
-        --filtered_file "$filtered_file"
+        -o "$kept_lines"
+        --discarded_lines "$discarded_lines"
+        --operation "$operation"
         --operator "$operator"
 
         ## Keywords
@@ -25,21 +26,8 @@
 
         ## value to filter
         #for $val in $value
-            #if $val.v.val != "None"
-                --value
-                #if $val.v.val == "Equal"
-                    $val.v.equal "$val.ncol" "="
-                #else if $val.v.val == "Higher"
-                    $val.v.higher "$val.ncol" ">"
-                #else if $val.v.val == "Equal or higher"
-                    $val.v.equal_higher "$val.ncol" ">="
-                #else if $val.v.val == "Lower"
-                    $val.v.lower "$val.ncol" "<"
-                #else if $val.v.val == "Equal or lower"
-                    $val.v.equal_lower "$val.ncol" "<="
-                #else 
-                    $val.v.different "$val.ncol" "!="
-                #end if
+            #if $val.value != "None"
+                --value $val.value $val.ncol $val.operator
             #end if
         #end for
 
@@ -50,21 +38,27 @@
             #end if
         #end for
 
-        #if $sort_column != ""
-            --sort_col "$sort_column,$reversed_sort"
+        #if $sort.sort_bool == "true"
+            --sort_col "$sort.sort_column,$sort.reversed_sort"
         #end if
 
     ]]></command>
     <inputs>
         <param type="data" name="input1" format="txt,tabular" label="Input file" />
         <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
+        <param name="operation" type="select" label="Operation" help="keep or discard word(s) or value(s) that match filters ?">
+            <option value="keep">Keep</option>
+            <option value="discard">Discard</option>
+        </param>
         <param name="operator" type="select" label="Select an operator to combine your filters (if more than one)" help="OR : only one filter must be satisfied to filter a row, AND : all your filters must be satisfied to filter a row" >
             <option value="OR" selected="True">OR</option>
             <option value="AND">AND</option>
         </param>
         
         <repeat name="keyword" title="Filter by keywords" >
-            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
+            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek keywords in the first column (and keep or discard them)'>
+                <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
+            </param>
             <param type="boolean" name="match" truevalue="True" label="Search for exact match?" help='Choosing "Yes" will only filter out exact match (e.g. case sensitive), see help section' />
             <conditional name="k" >
                 <param name="kw" type="select" label="Enter keywords" >
@@ -72,7 +66,7 @@
                     <option value="file">File containing keywords</option>
                 </param>
                 <when value="text" >
-                    <param name="txt" type="text" label="Copy/paste keywords to be filtered out" help='Keywords must be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175' >
+                    <param name="txt" type="text" label="Copy/paste keywords to find (keep or discard)" help='Keywords must be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175' >
                         <sanitizer>
                         <valid initial="string.printable">
                             <remove value="&apos;"/>
@@ -91,56 +85,53 @@
             </conditional>
         </repeat>
         <repeat name="value" title="Filter by numerical value" >
-            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
-            <conditional name="v" >
-                <param name="val" type="select" label="Select operator" >
-                    <option value="Equal">=</option>
-                    <option value="Higher">&gt;</option>
-                    <option value="Equal or higher">&gt;=</option>
-                    <option value="Lower">&lt;</option>
-                    <option value="Equal or lower">&lt;=</option>
-                    <option value="Different">!=</option>
-                </param>
-                <when value="Equal" >
-                    <param name="equal" type="float" value="" label="Value" />
-                </when>
-                <when value="Higher" >
-                    <param type="float" name="higher" value="" label="Value" />
-                </when>
-                <when value="Equal or higher" >
-                    <param type="float" name="equal_higher" value="" label="Value" />
-                </when>
-                <when value="Lower" >
-                    <param type="float" name="lower" value="" label="Value" />
-                </when>
-                <when value="Equal or lower" >
-                    <param type="float" name="equal_lower" value="" label="Value" />
-                </when>
-                <when value="Different">
-                    <param type="float" name="different" value="" label="Value"/>
-                </when>
-            </conditional>
+            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek values in the first column (and keep or discard them)'>
+                <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
+            </param>
+            <param name="operator" type="select" label="Select operator" >
+                <option value="Equal">=</option>
+                <option value="Higher">&gt;</option>
+                <option value="Equal-or-higher">&gt;=</option>
+                <option value="Lower">&lt;</option>
+                <option value="Equal-or-lower">&lt;=</option>
+                <option value="Different">!=</option>
+            </param>
+            <param name="value" type="float" value="" label="Value"></param>
         </repeat>
         <repeat name="values_range" title="Filter by range of numerical values">
-            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
+            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek values in the first column (and keep or discard them)'>
+                <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
+            </param>
             <param name="bottom_value" type="float" value="" label="Enter the bottom value" />
             <param name="top_value" type="float" value="" label="Enter the top value" />
             <param name="inclusive" type="boolean" label="inclusive range ?" checked="false" truevalue="true" falsevalue="false" />
         </repeat>
-        <param name="sort_column" type="text" value="" label="Sort result files by:" help="Fill in 'c1' if you want to sort your result file by the column 1 values" />
-        <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/>    
+        <conditional name="sort">
+            <param name="sort_bool" type="boolean" label="Sort by column ?" checked="false" truevalue="true" falsevalue="false" />
+            <when value="true">
+                <param name="sort_column" type="text" value="" label="Sort result files by:" help="Fill in 'c1' if you want to sort your result file by the column 1 values">
+                    <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]</validator>
+                </param>
+                <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/>
+            </when>
+            <when value="false"/>
+        </conditional>
     </inputs>    
     <outputs>
-        <data name="output1" format="tsv" label="${tool.name} on ${input1.name}" />
-        <data name="filtered_file" format="tsv" label="${tool.name} on ${input1.name} - Filtered lines" />
+        <data name="kept_lines" format="tsv" label="Filtered_${input1.name}" />
+        <data name="discarded_lines" format="tsv" label="Filtered_${input1.name} - discarded_lines" />
     </outputs>
     <tests>
         <test>
             <param name="input1" value="Lacombe_et_al_2017_OK.txt" />
             <param name="header" value="true" />
             <param name="operator" value="OR"/>
+            <param name="operation" value="discard"/>
             <param name="sort_column" value="c1"/>
-            <param name="reversed_sort" value="false"/>
+            <conditional name="sort">
+                <param name="sort_bool" value="false"/>
+                <param name="reversed_sort" value="false"/>
+            </conditional>
             <repeat name="keyword">
                 <param name="ncol" value="c1" />
                 <param name="match" value="True" />
@@ -151,19 +142,17 @@
             </repeat>
             <repeat name="value">
                 <param name="ncol" value="c3"/>
-                <conditional name="v">
-                    <param name="val" value="Higher"/>
-                    <param name="higher" value="20" />
-                </conditional>
+                <param name="operator" value="Higher"/>
+                <param name="value" value="20" />
             </repeat>
-            <output name="output1" file="output.csv" />
-            <output name="filtered_file" file="filtered_output.csv" />
+            <output name="kept_lines" file="output.tsv" />
+            <output name="discarded_lines" file="discarded_lines.tsv" />
         </test>
     </tests>
     <help><![CDATA[
 **Description**
 
-This tool allows to filter out data according to different criteria such as keywords (e.g. a list of contaminants) or numerical values (e.g. intensity measurements below a given threshold).
+This tool allows to keep/discard rows from your dataset according to different filter such as keywords (e.g. a list of contaminants) or numerical values (e.g. intensity measurements below a given threshold).
 A boolean operator "OR/AND" allows to combine different type of filters making this tool very powerful.
 
 -----
@@ -176,12 +165,21 @@
 
 **Parameters**
 
-**AND/OR operator**
+**Operation**
+
+- **Keep**: only keep lines with keyword(s) and/or value(s) concerned by defined filter(s) 
+- **Discard**: only keep lines with keyword(s) and/or value(s) NOT concerned by defined filter(s)
+
+.. class:: infomark
 
-As many filters as needed can be combined, you can choose how filters apply on your data by using the following boolean operators: 
+Two output files are created, one with kept lines and the other one with discarded lines. 
+
+**Select an operator to combine your filters (if more than one)**
 
-- OR: only one filter must be satisfied to remove one row
-- AND: all filters must be satisfied to remove one row
+Many filters (criteria) can be combined in a single execution making this tool quite powerful; this can be achieved using the following boolean operators: 
+
+- **OR**: only one of filters must be satisfied to keep/discard one row
+- **AND**: all filters must be satisfied to keep/discard one row
 
 -----
 
@@ -205,7 +203,7 @@
 
 "Search for exact match?": Keywords search can be applied by performing either exact match or partial one by using the following option:
 
-- If you choose **Yes**, only the fields that contains exactly the same content will be removed.
+- If you choose **Yes**, only the fields that contains exactly the same content will be removed (i.e. using the "discard" mode).
 
 - If you choose **No**, all the fields containing the keyword will be removed.
 
@@ -231,18 +229,17 @@
 - >= (greater than or equal to)
 
 Then enter the numerical threshold to apply by filling the "Value" box. 
-If you choose > 10, each row containing a numerical value (in the chosen column of your input file) that correspond to your settings will be filtered out.
+If you choose > 10, each row containing a numerical value (in the chosen column of your input file) that correspond to your settings will be kept or discarded (based on operation parameter).
 
 -----
 
 **Filter by a range of values**: You can also set a range of values to filter your file.
-Conversely to the numeric filter, rows with numerical values within the defined range will be kept while rows with values out of this range will be filtered out.
+Conversely to the numeric filter, rows with numerical values within the defined range will be kept while rows with values out of this range will be discarded (or the other way around based on operation parameter).
 
 -----
 
-**Sort results files**
-
-You can sort your results by column in ascending (default value) or descending by entering the column number on which to sort the data.
+**Sort by column ?**
+click on the "Yes" button allows to "Sort result files by:" a column number. this can be done in ascending (default value) or descending order by entering the column number on which to sort the data.
 
 -----
 
@@ -250,9 +247,9 @@
 
 The tool returns two output files.
 
-* A text file containing the results that pass your filters
+* A text file containing the results that satisfy your filters (i.e. "keep" mode).
 
-* A text file containing the rows removed from the input file (i.e. containing data taht do not pass your filter(s).
+* A text file containing the rows removed from the input file (i.e. "discard" mode).
 
 -----
 
@@ -260,7 +257,7 @@
 
 **Authors**
 
-T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
 
 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR