view filter_kw_val.xml @ 3:68cee865018e draft

planemo upload commit ce1b8c12b00aec52eb0f5f9c5a9e30ad7926f970-dirty
author proteore
date Wed, 16 Jan 2019 08:30:53 -0500
parents 52a7afd01c6d
children 2080e2a4f209
line wrap: on
line source

<tool id="MQoutputfilter" name="Filter by keywords and/or numerical value" version="2019.01.16">
    <description></description>
    <requirements>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command><![CDATA[
        python $__tool_directory__/filter_kw_val.py
        -i "$input1,$header"
        -o "$output1"
        --filtered_file "$filtered_file"
        --operator "$operator"

        ## Keywords
        #for $key in $keyword
            #if $key.k.kw != "None"
                #if $key.k.kw == "text"
                    --kw "$key.k.txt" "$key.ncol" "$key.match"
                #else if $key.k.kw == "file"
                    --kw_file "$key.k.file" "$key.k.header" "$key.k.ncol" "$key.ncol" "$key.match" 
                #end if
            #end if
        #end for

        ## value to filter
        #for $val in $value
            #if $val.v.val != "None"
                --value
                #if $val.v.val == "Equal"
                    $val.v.equal "$val.ncol" "="
                #else if $val.v.val == "Higher"
                    $val.v.higher "$val.ncol" ">"
                #else if $val.v.val == "Equal or higher"
                    $val.v.equal_higher "$val.ncol" ">="
                #else if $val.v.val == "Lower"
                    $val.v.lower "$val.ncol" "<"
                #else if $val.v.val == "Equal or lower"
                    $val.v.equal_lower "$val.ncol" "<="
                #else 
                    $val.v.different "$val.ncol" "!="
                #end if
            #end if
        #end for

        ##range of values to keep
        #for $vr in $values_range
            #if vr 
                --values_range $vr.bottom_value $vr.top_value $vr.ncol $vr.inclusive
            #end if
        #end for

        #if $sort_column != ""
            --sort_col "$sort_column,$reversed_sort"
        #end if

    ]]></command>
    <inputs>
        <param type="data" name="input1" format="txt,tabular" label="Input file" />
        <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
        <param name="operator" type="select" label="Select an operator to combine your filters (if more than one)" help="OR : only one filter must be satisfied to filter a row, AND : all your filters must be satisfied to filter a row" >
            <option value="OR" selected="True">OR</option>
            <option value="AND">AND</option>
        </param>
        
        <repeat name="keyword" title="Filter by keywords" >
            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
            <param type="boolean" name="match" truevalue="True" label="Search for exact match?" help='Choosing "Yes" will only filter out exact match (e.g. case sensitive), see help section' />
            <conditional name="k" >
                <param name="kw" type="select" label="Enter keywords" >
                    <option value="text" selected="true">copy/paste</option>
                    <option value="file">File containing keywords</option>
                </param>
                <when value="text" >
                    <param name="txt" type="text" label="Copy/paste keywords to be filtered out" help='Keywords must be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175' >
                        <sanitizer>
                        <valid initial="string.printable">
                            <remove value="&apos;"/>
                        </valid>
                        <mapping initial="none">
                            <add source="&apos;" target="__sq__"/>
                        </mapping>
                        </sanitizer>
                    </param>
                </when>
                <when value="file" >
                    <param name="file" type="data" format="txt,tabular" label="File containing keywords" />
                    <param name="ncol" type="text" value="c1" label="Specify the column containing keywords" help='For example, fill in "c1" if keywords are in the first column' />
                    <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
                </when>
            </conditional>
        </repeat>
        <repeat name="value" title="Filter by numerical value" >
            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
            <conditional name="v" >
                <param name="val" type="select" label="Select operator" >
                    <option value="Equal">=</option>
                    <option value="Higher">&gt;</option>
                    <option value="Equal or higher">&gt;=</option>
                    <option value="Lower">&lt;</option>
                    <option value="Equal or lower">&lt;=</option>
                    <option value="Different">!=</option>
                </param>
                <when value="Equal" >
                    <param name="equal" type="float" value="" label="Value" />
                </when>
                <when value="Higher" >
                    <param type="float" name="higher" value="" label="Value" />
                </when>
                <when value="Equal or higher" >
                    <param type="float" name="equal_higher" value="" label="Value" />
                </when>
                <when value="Lower" >
                    <param type="float" name="lower" value="" label="Value" />
                </when>
                <when value="Equal or lower" >
                    <param type="float" name="equal_lower" value="" label="Value" />
                </when>
                <when value="Different">
                    <param type="float" name="different" value="" label="Value"/>
                </when>
            </conditional>
        </repeat>
        <repeat name="values_range" title="Filter by range of numerical values">
            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
            <param name="bottom_value" type="float" value="" label="Enter the bottom value" />
            <param name="top_value" type="float" value="" label="Enter the top value" />
            <param name="inclusive" type="boolean" label="inclusive range ?" checked="false" truevalue="true" falsevalue="false" />
        </repeat>
        <param name="sort_column" type="text" value="" label="Sort result files by:" help="Fill in 'c1' if you want to sort your result file by the column 1 values" />
        <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/>    
    </inputs>    
    <outputs>
        <data name="output1" format="tsv" label="${tool.name} on ${input1.name}" />
        <data name="filtered_file" format="tsv" label="${tool.name} on ${input1.name} - Filtered lines" />
    </outputs>
    <tests>
        <test>
            <param name="input1" value="Lacombe_et_al_2017_OK.txt" />
            <param name="header" value="true" />
            <param name="operator" value="OR"/>
            <param name="sort_column" value="c1"/>
            <param name="reversed_sort" value="false"/>
            <repeat name="keyword">
                <param name="ncol" value="c1" />
                <param name="match" value="True" />
                <conditional name="k">
                    <param name="kw" value="text" />
                    <param name="txt" value="P04264 P35908 P13645 Q5D862 Q5T749 Q8IW75 P81605 P22531 P59666 P78386" />
                </conditional>
            </repeat>
            <repeat name="value">
                <param name="ncol" value="c3"/>
                <conditional name="v">
                    <param name="val" value="Higher"/>
                    <param name="higher" value="20" />
                </conditional>
            </repeat>
            <output name="output1" file="output.csv" />
            <output name="filtered_file" file="filtered_output.csv" />
        </test>
    </tests>
    <help><![CDATA[
**Description**

This tool allows to filter out data according to different criteria such as keywords (e.g. a list of contaminants) or numerical values (e.g. intensity measurements below a given threshold).
A boolean operator "OR/AND" allows to combine different type of filters making this tool very powerful.

-----

**Input**

A table (file in txt, tab, tsv, csv format) of your identification and/or quantification results for example.
  
-----

**Parameters**

**AND/OR operator**

As many filters as needed can be combined, you can choose how filters apply on your data by using the following boolean operators: 

- OR: only one filter must be satisfied to remove one row
- AND: all filters must be satisfied to remove one row

-----

**Filter by keyword(s)**

Click on the "Filter by keywords" box to use it. You can either fill in the field (copy/paste) or upload a file which contains the keywords. 

"Column number on which to apply the filter": You must then specify the column number of your input file on wich to apply the filter by keywords.  

- If you choose to fill in the field, the keywords should be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175

- If you choose to upload a file in a text format in which each line is a keyword, for example:

TRYP_PIG

ALDOA_RABBIT

LYSO_ECOLI

Lines that contains these keywords will be removed from input file.

"Search for exact match?": Keywords search can be applied by performing either exact match or partial one by using the following option:

- If you choose **Yes**, only the fields that contains exactly the same content will be removed.

- If you choose **No**, all the fields containing the keyword will be removed.

Example:

**Yes** option (exact match) selected using the keyword "kinase": only lines which contain exactly the word "kinase" will be removed.

**No** option (partial match) for "kinase": lines which contain "kinase" and lines with "alpha-kinase" (and so on) will be removed.

-----

**Filter by numerical values**: You can filter your data by a column of numerical values. 

"Column number on which to apply the filter": you must specify the column number of your input file on wich to apply the filter by numerical value. 

Then select one of the operators in the list :

- = (equal)
- != (not equal)
- < (lower than)
- <= (lower than or equal to)
- > (greater than)
- >= (greater than or equal to)

Then enter the numerical threshold to apply by filling the "Value" box. 
If you choose > 10, each row containing a numerical value (in the chosen column of your input file) that correspond to your settings will be filtered out.

-----

**Filter by a range of values**: You can also set a range of values to filter your file.
Conversely to the numeric filter, rows with numerical values within the defined range will be kept while rows with values out of this range will be filtered out.

-----

**Sort results files**

You can sort your results by column in ascending (default value) or descending by entering the column number on which to sort the data.

-----

**Output**

The tool returns two output files.

* A text file containing the results that pass your filters

* A text file containing the rows removed from the input file (i.e. containing data taht do not pass your filter(s).

-----

.. class:: infomark

**Authors**

T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.

    ]]></help>
    <citations>
    </citations>
</tool>