view filter_kw_val.xml @ 6:b4641c0f8a82 draft

planemo upload commit 77279e994f5751c6cd9aa165aa0604db3d241271-dirty
author proteore
date Mon, 11 Mar 2019 09:14:42 -0400
parents 33ca9ba2495a
children 5621406a4d2f
line wrap: on
line source

<tool id="MQoutputfilter" name="Filter by keywords and/or numerical value" version="2019.03.11">
    <description></description>
    <requirements>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command><![CDATA[
        python $__tool_directory__/filter_kw_val.py
        -i "$input1,$header"
        -o "$kept_lines"
        --discarded_lines "$discarded_lines"
        --operation "$operation"
        --operator "$operator"

        ## Keywords
        #for $key in $keyword
            #if $key.k.kw != "None"
                #if $key.k.kw == "text"
                    --kw "$key.k.txt" "$key.ncol" "$key.match"
                #else if $key.k.kw == "file"
                    --kw_file "$key.k.file" "$key.k.header" "$key.k.ncol" "$key.ncol" "$key.match" 
                #end if
            #end if
        #end for

        ## value to filter
        #for $val in $value
            #if $val.value != "None"
                --value $val.value $val.ncol $val.operator
            #end if
        #end for

        ##range of values to keep
        #for $vr in $values_range
            #if vr 
                --values_range $vr.bottom_value $vr.top_value $vr.ncol $vr.inclusive
            #end if
        #end for

        #if $sort.sort_bool == "true"
            --sort_col "$sort.sort_column,$sort.reversed_sort"
        #end if

    ]]></command>
    <inputs>
        <param type="data" name="input1" format="txt,tabular" label="Input file" />
        <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
        <param name="operation" type="select" label="Operation" help="keep or discard word(s) or value(s) that match filters ?">
            <option value="keep">Keep</option>
            <option value="discard">Discard</option>
        </param>
        <param name="operator" type="select" label="Select an operator to combine your filters (if more than one)" help="OR : only one filter must be satisfied to filter a row, AND : all your filters must be satisfied to filter a row" >
            <option value="OR" selected="True">OR</option>
            <option value="AND">AND</option>
        </param>
        
        <repeat name="keyword" title="Filter by keywords" >
            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek keywords in the first column (and keep or discard them)'>
                <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
            </param>
            <param type="boolean" name="match" truevalue="True" label="Search for exact match?" help='Choosing "Yes" will only filter out exact match (e.g. case sensitive), see help section' />
            <conditional name="k" >
                <param name="kw" type="select" label="Enter keywords" >
                    <option value="text" selected="true">copy/paste</option>
                    <option value="file">File containing keywords</option>
                </param>
                <when value="text" >
                    <param name="txt" type="text" label="Copy/paste keywords to find (keep or discard)" help='Keywords must be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175' >
                        <sanitizer>
                        <valid initial="string.printable">
                            <remove value="&apos;"/>
                        </valid>
                        <mapping initial="none">
                            <add source="&apos;" target="__sq__"/>
                        </mapping>
                        </sanitizer>
                    </param>
                </when>
                <when value="file" >
                    <param name="file" type="data" format="txt,tabular" label="File containing keywords" />
                    <param name="ncol" type="text" value="c1" label="Specify the column containing keywords" help='For example, fill in "c1" if keywords are in the first column' />
                    <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
                </when>
            </conditional>
        </repeat>
        <repeat name="value" title="Filter by numerical value" >
            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek values in the first column (and keep or discard them)'>
                <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
            </param>
            <param name="operator" type="select" label="Select operator" >
                <option value="Equal">=</option>
                <option value="Higher">&gt;</option>
                <option value="Equal-or-higher">&gt;=</option>
                <option value="Lower">&lt;</option>
                <option value="Equal-or-lower">&lt;=</option>
                <option value="Different">!=</option>
            </param>
            <param name="value" type="float" value="" label="Value"></param>
        </repeat>
        <repeat name="values_range" title="Filter by range of numerical values">
            <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek values in the first column (and keep or discard them)'>
                <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
            </param>
            <param name="bottom_value" type="float" value="" label="Enter the bottom value" />
            <param name="top_value" type="float" value="" label="Enter the top value" />
            <param name="inclusive" type="boolean" label="inclusive range ?" checked="false" truevalue="true" falsevalue="false" />
        </repeat>
        <conditional name="sort">
            <param name="sort_bool" type="boolean" label="Sort by column ?" checked="false" truevalue="true" falsevalue="false" />
            <when value="true">
                <param name="sort_column" type="text" value="" label="Sort result files by:" help="Fill in 'c1' if you want to sort your result file by the column 1 values">
                    <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]</validator>
                </param>
                <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/>
            </when>
            <when value="false"/>
        </conditional>
    </inputs>    
    <outputs>
        <data name="kept_lines" format="tsv" label="Filtered_${input1.name}" />
        <data name="discarded_lines" format="tsv" label="Filtered_${input1.name} - discarded_lines" />
    </outputs>
    <tests>
        <test>
            <param name="input1" value="Lacombe_et_al_2017_OK.txt" />
            <param name="header" value="true" />
            <param name="operator" value="OR"/>
            <param name="operation" value="discard"/>
            <param name="sort_column" value="c1"/>
            <conditional name="sort">
                <param name="sort_bool" value="false"/>
                <param name="reversed_sort" value="false"/>
            </conditional>
            <repeat name="keyword">
                <param name="ncol" value="c1" />
                <param name="match" value="True" />
                <conditional name="k">
                    <param name="kw" value="text" />
                    <param name="txt" value="P04264 P35908 P13645 Q5D862 Q5T749 Q8IW75 P81605 P22531 P59666 P78386" />
                </conditional>
            </repeat>
            <repeat name="value">
                <param name="ncol" value="c3"/>
                <param name="operator" value="Higher"/>
                <param name="value" value="20" />
            </repeat>
            <output name="kept_lines" file="output.tsv" />
            <output name="discarded_lines" file="discarded_lines.tsv" />
        </test>
    </tests>
    <help><![CDATA[
**Description**

This tool allows to keep/discard rows from your dataset according to different filter such as keywords (e.g. a list of contaminants) or numerical values (e.g. intensity measurements below a given threshold).
A boolean operator "OR/AND" allows to combine different type of filters making this tool very powerful.

-----

**Input**

A table (file in txt, tab, tsv, csv format) of your identification and/or quantification results for example.
  
-----

**Parameters**

**Operation**

- **Keep**: only keep lines with keyword(s) and/or value(s) concerned by defined filter(s) 
- **Discard**: only keep lines with keyword(s) and/or value(s) NOT concerned by defined filter(s)

.. class:: infomark

Two output files are created, one with kept lines and the other one with discarded lines. 

**Select an operator to combine your filters (if more than one)**

Many filters (criteria) can be combined in a single execution making this tool quite powerful; this can be achieved using the following boolean operators: 

- **OR**: only one of filters must be satisfied to keep/discard one row
- **AND**: all filters must be satisfied to keep/discard one row

-----

**Filter by keyword(s)**

Click on the "Filter by keywords" box to use it. You can either fill in the field (copy/paste) or upload a file which contains the keywords. 

"Column number on which to apply the filter": You must then specify the column number of your input file on wich to apply the filter by keywords.  

- If you choose to fill in the field, the keywords should be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175

- If you choose to upload a file in a text format in which each line is a keyword, for example:

TRYP_PIG

ALDOA_RABBIT

LYSO_ECOLI

Lines that contains these keywords will be removed from input file.

"Search for exact match?": Keywords search can be applied by performing either exact match or partial one by using the following option:

- If you choose **Yes**, only the fields that contains exactly the same content will be removed (i.e. using the "discard" mode).

- If you choose **No**, all the fields containing the keyword will be removed.

Example:

**Yes** option (exact match) selected using the keyword "kinase": only lines which contain exactly the word "kinase" will be removed.

**No** option (partial match) for "kinase": lines which contain "kinase" and lines with "alpha-kinase" (and so on) will be removed.

-----

**Filter by numerical values**: You can filter your data by a column of numerical values. 

"Column number on which to apply the filter": you must specify the column number of your input file on wich to apply the filter by numerical value. 

Then select one of the operators in the list :

- = (equal)
- != (not equal)
- < (lower than)
- <= (lower than or equal to)
- > (greater than)
- >= (greater than or equal to)

Then enter the numerical threshold to apply by filling the "Value" box. 
If you choose > 10, each row containing a numerical value (in the chosen column of your input file) that correspond to your settings will be kept or discarded (based on operation parameter).

-----

**Filter by a range of values**: You can also set a range of values to filter your file.
Conversely to the numeric filter, rows with numerical values within the defined range will be kept while rows with values out of this range will be discarded (or the other way around based on operation parameter).

-----

**Sort by column ?**
click on the "Yes" button allows to "Sort result files by:" a column number. this can be done in ascending (default value) or descending order by entering the column number on which to sort the data.

-----

**Output**

The tool returns two output files.

* A text file containing the results that satisfy your filters (i.e. "keep" mode).

* A text file containing the rows removed from the input file (i.e. "discard" mode).

-----

.. class:: infomark

**Authors**

David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.

    ]]></help>
    <citations>
    </citations>
</tool>