Mercurial > repos > proteore > filter_keywords_values

diff filter_kw_val.xml @ 7:6f32c1e12572 draft default tip
planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
author: proteore
date: Fri, 01 Jun 2018 11:10:47 -0400
parents: 1e9911190142
--- a/filter_kw_val.xml	Fri Apr 20 09:07:23 2018 -0400
+++ b/filter_kw_val.xml	Fri Jun 01 11:10:47 2018 -0400
@@ -9,10 +9,11 @@
         python $__tool_directory__/filter_kw_val.py
         -i "$input1,$header"
         -o "$output1"
-        --trash_file "$trash_file"
+        --filtered_file "$filtered_file"
+        --operator "$operator"
 
         ## Keywords
-        #for $i, $key in enumerate($keyword)
+        #for $key in $keyword
             #if $key.k.kw != "None"
                 #if $key.k.kw == "text"
                     --kw "$key.k.txt" "$key.ncol" "$key.match"
@@ -22,8 +23,8 @@
             #end if
         #end for
 
-        ## Number of proteins
-        #for $i, $val in enumerate($value)
+        ## value to filter
+        #for $val in $value
             #if $val.v.val != "None"
                 --value
                 #if $val.v.val == "Equal"
@@ -34,16 +35,35 @@
                     $val.v.equal_higher "$val.ncol" ">="
                 #else if $val.v.val == "Lower"
                     $val.v.lower "$val.ncol" "<"
-                #else
+                #else if $val.v.val == "Equal or lower"
                     $val.v.equal_lower "$val.ncol" "<="
+                #else 
+                    $val.v.different "$val.ncol" "!="
                 #end if
             #end if
         #end for
 
+        ##range of values to keep
+        #for $vr in $values_range
+            #if vr 
+                --values_range $vr.bottom_value $vr.top_value $vr.ncol $vr.inclusive
+            #end if
+        #end for
+
+        #if $sort_column != ""
+            --sort_col "$sort_column,$reversed_sort"
+        #end if
+
     ]]></command>
     <inputs>
         <param type="data" name="input1" format="txt,tabular" label="Input file" help="Input file is a tab-delimited file containing proteomics identification and/or quantitative results" />
         <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
+        <param name="operator" type="select" label="Please select your operator for your filters" help="OR : only one filter must be satisfied to filter a row, AND : all your filters must be satisfied to filter a row" >
+            <option value="OR" selected="True">OR</option>
+            <option value="AND">AND</option>
+        </param>
+        <param name="sort_column" type="text" value="" label="If you want to sort the result files by values from a column, please enter a column number" help="For example : fill in 'c1' if you want to sort your result file by the column 1 values." />
+        <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/>        
         <repeat name="keyword" title="Filter by keywords" >
             <param name="ncol" type="text" value="c1" label="Please specify the column number of the input file on which you want to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
             <param type="boolean" name="match" truevalue="True" label="Would you like to search for exact match?" help='Choosing "Yes" will only filter out exact match (i.e. case sensitive), see below for more details' />
@@ -71,7 +91,6 @@
                 </when>
             </conditional>
         </repeat>
-
         <repeat name="value" title="Filter by value" >
             <param name="ncol" type="text" value="c1" label="Please specify the column number of the input file on which you want to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
             <conditional name="v" >
@@ -82,6 +101,7 @@
                     <option value="Equal or higher">&gt;=</option>
                     <option value="Lower">&lt;</option>
                     <option value="Equal or lower">&lt;=</option>
+                    <option value="Different">!=</option>
                 </param>
                 <when value="None" >
                 </when>
@@ -100,18 +120,29 @@
                 <when value="Equal or lower" >
                     <param type="float" name="equal_lower" value="" label="Value" />
                 </when>
+                <when value="Different">
+                    <param type="float" name="different" value="" label="Value"/>
+                </when>
             </conditional>
         </repeat>
-
+        <repeat name="values_range" title="Filter by range of values">
+            <param name="ncol" type="text" value="c1" label="Please specify the column number of the input file on which you want to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
+            <param name="bottom_value" type="float" value="" label="Please enter the bottom value" />
+            <param name="top_value" type="float" value="" label="Please enter the top value" />
+            <param name="inclusive" type="boolean" label="inclusive range ?" checked="false" truevalue="true" falsevalue="false" />
+        </repeat>
     </inputs>
     <outputs>
         <data name="output1" format="tabular" label="${tool.name} on ${input1.name}" />
-        <data name="trash_file" format="tabular" label="${tool.name} on ${input1.name} - Filtered lines" />
+        <data name="filtered_file" format="tabular" label="${tool.name} on ${input1.name} - Filtered lines" />
     </outputs>
     <tests>
         <test>
             <param name="input1" value="Lacombe_et_al_2017_OK.txt" />
             <param name="header" value="true" />
+            <param name="operator" value="OR"/>
+            <param name="sort_column" value="c1"/>
+            <param name="reversed_sort" value="false"/>
             <repeat name="keyword">
                 <param name="ncol" value="c1" />
                 <param name="match" value="True" />
@@ -120,16 +151,21 @@
                     <param name="txt" value="P04264;P35908;P13645;Q5D862;Q5T749;Q8IW75;P81605;P22531;P59666;P78386" />
                 </conditional>
             </repeat>
-            <output name="output1" file="FKW_Lacombe_et_al_2017_OK.txt" />
-            <output name="trash_file" file="Trash_FKW_Lacombe_et_al_2017_OK.txt" />
+            <repeat name="value">
+                <param name="ncol" value="c3"/>
+                <conditional name="v">
+                    <param name="val" value="Higher"/>
+                    <param name="higher" value="20" />
+                </conditional>
+            </repeat>
+            <output name="output1" file="output.csv" />
+            <output name="filtered_file" file="filtered_output.csv" />
         </test>
     </tests>
     <help><![CDATA[
-This tool allows to filter out data according to your specific needs (e.g. contaminants, non-significant values or related to a particular annotation) from a proteomics results file (e.g. MaxQuant or Proline output).
+This tool allows to remove unneeded data (e.g. contaminants, non-significant values) from a proteomics results file (e.g. MaxQuant or Proline output).
 
-**For each row, if there are more than one protein IDs/protein names/gene names, only the first one will be considered in the output**
-
-**Filter the file by keywords**
+**Filter by keyword(s)**
 
 Several options can be used. For each option, you can fill in the field or upload a file which contains the keywords.
 
@@ -143,25 +179,69 @@
 
 ALDOA_RABBIT
 
-**The line that contains these keywords will be filtered from input file and provided in a separate file.**
+**The line that contains these keywords will be eliminated from input file.**
 
 **Keywords search can be applied by performing either exact match or partial one by using the following option**
 
-- If you choose **Yes**, only the fields that contains exactly the same content will be filtered.
+- If you choose **Yes**, only the fields that contains exactly the same content will be removed.
 
-- If you choose **No**, all the fields containing the keyword will be filtered.
+- If you choose **No**, all the fields containing the keyword will be removed.
 
 For example:
 
-**Yes** option (exact match) selected using the keyword "kinase": only lines which contain exactly "kinase" is filtered (and not "Kinase").
+**Yes** option (exact match) selected using the keyword "kinase": only lines which contain exactly "kinase" is removed.
 
 **No** option (partial match) for "kinase": not only lines which contain "kinase" but also lines with "alpha-kinase" (and so  on) are removed.
 
-**Filter the file by values**
+-----
+
+**Filter by values**
+
+You can filter your data by a column of numerical values.
+Enter the column to be use and select one operator in the list :
+
+- "="
+- "!="
+- "<"
+- "<="
+- ">"
+- ">="
+
+Then enter the value to filter and specify the column to apply that option.
+If a row contains a value that correspond to your settings, it will be filtered.
+
+-----
+
+**Filter by a range of values**
+
+You can also set a range of values to filter your file.
+In opposition to value filter, rows with values inside of the defined range are kept.
 
-You can choose to use one or more options (e.g. to filter out peptides of low intensity value, by q-value, etc.).
+Rows with values outside of the defined range will be filtered.
+
+-----
+
+**AND/OR operator**
+
+Since you can add as many filters as you want, you can choose how filters apply on your data.
+
+AND or OR operator option works on all filters :
+
+- OR : only one filter to be satisfied to remove one row
+- AND : all filters must be satisfied to remove one row
 
-* For each option, you can choose between "=", ">", ">=", "<" and "<=", then enter the value to filter and specify the column to apply that option.
+-----
+
+**Sort the results files**
+
+You can sort the result file if you wish, it can help you to check results. 
+
+In order to do so : enter the column to be used, all columns will be sorted according to the one filled in.
+
+Rows stay intact, just in different order like excel.
+You can also choose ascending or descending order, by default descending order is set.
+
+-----
 
 **Output**
 
@@ -169,7 +249,7 @@
 
 * A text file containing the resulting filtered input file.
 
-* A text file containing the rows that have been filtered from the input file.
+* A text file containing the rows removed from the input file.
 
 -----
 
@@ -177,7 +257,7 @@
 
 **Authors**
 
-T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
 
 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR
author	proteore
date	Fri, 01 Jun 2018 11:10:47 -0400
parents	1e9911190142
children