view filter_kw_val.xml @ 7:6f32c1e12572 draft default tip

planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
author proteore
date Fri, 01 Jun 2018 11:10:47 -0400
parents 1e9911190142
children
line wrap: on
line source

<tool id="MQoutputfilter" name="Filter by keywords or numerical value" version="0.1.0">
    <description></description>
    <requirements>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command><![CDATA[
        python $__tool_directory__/filter_kw_val.py
        -i "$input1,$header"
        -o "$output1"
        --filtered_file "$filtered_file"
        --operator "$operator"

        ## Keywords
        #for $key in $keyword
            #if $key.k.kw != "None"
                #if $key.k.kw == "text"
                    --kw "$key.k.txt" "$key.ncol" "$key.match"
                #else if $key.k.kw == "file"
                    --kw_file "$key.k.file" "$key.ncol" "$key.match"
                #end if
            #end if
        #end for

        ## value to filter
        #for $val in $value
            #if $val.v.val != "None"
                --value
                #if $val.v.val == "Equal"
                    $val.v.equal "$val.ncol" "="
                #else if $val.v.val == "Higher"
                    $val.v.higher "$val.ncol" ">"
                #else if $val.v.val == "Equal or higher"
                    $val.v.equal_higher "$val.ncol" ">="
                #else if $val.v.val == "Lower"
                    $val.v.lower "$val.ncol" "<"
                #else if $val.v.val == "Equal or lower"
                    $val.v.equal_lower "$val.ncol" "<="
                #else 
                    $val.v.different "$val.ncol" "!="
                #end if
            #end if
        #end for

        ##range of values to keep
        #for $vr in $values_range
            #if vr 
                --values_range $vr.bottom_value $vr.top_value $vr.ncol $vr.inclusive
            #end if
        #end for

        #if $sort_column != ""
            --sort_col "$sort_column,$reversed_sort"
        #end if

    ]]></command>
    <inputs>
        <param type="data" name="input1" format="txt,tabular" label="Input file" help="Input file is a tab-delimited file containing proteomics identification and/or quantitative results" />
        <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
        <param name="operator" type="select" label="Please select your operator for your filters" help="OR : only one filter must be satisfied to filter a row, AND : all your filters must be satisfied to filter a row" >
            <option value="OR" selected="True">OR</option>
            <option value="AND">AND</option>
        </param>
        <param name="sort_column" type="text" value="" label="If you want to sort the result files by values from a column, please enter a column number" help="For example : fill in 'c1' if you want to sort your result file by the column 1 values." />
        <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/>        
        <repeat name="keyword" title="Filter by keywords" >
            <param name="ncol" type="text" value="c1" label="Please specify the column number of the input file on which you want to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
            <param type="boolean" name="match" truevalue="True" label="Would you like to search for exact match?" help='Choosing "Yes" will only filter out exact match (i.e. case sensitive), see below for more details' />
            <conditional name="k" >
                <param argument="--kw" type="select" label="Filter by keyword" >
                    <option value="None" selected="True">---</option>
                    <option value="text">Enter keywords (copy/paste)</option>
                    <option value="file">Choose a file containing keywords</option>
                </param>
                <when value="None" />
                <when value="text" >
                    <param name="txt" type="text" label="Copy/paste keywords to be filtered out" help='Keywords should be separated by ";", for example: A8K2U0;Q5TA79;O43175' >
                        <sanitizer>
                        <valid initial="string.printable">
                            <remove value="&apos;"/>
                        </valid>
                        <mapping initial="none">
                            <add source="&apos;" target="__sq__"/>
                        </mapping>
                        </sanitizer>
                    </param>
                </when>
                <when value="file" >
                    <param name="file" type="data" format="txt,tabular" label="Choose a file containing keywords" />
                </when>
            </conditional>
        </repeat>
        <repeat name="value" title="Filter by value" >
            <param name="ncol" type="text" value="c1" label="Please specify the column number of the input file on which you want to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
            <conditional name="v" >
                <param argument="--val" type="select" label="Filter by value" >
                    <option value="None">---</option>
                    <option value="Equal">=</option>
                    <option value="Higher">&gt;</option>
                    <option value="Equal or higher">&gt;=</option>
                    <option value="Lower">&lt;</option>
                    <option value="Equal or lower">&lt;=</option>
                    <option value="Different">!=</option>
                </param>
                <when value="None" >
                </when>
                <when value="Equal" >
                    <param name="equal" type="float" value="" label="Value" />
                </when>
                <when value="Higher" >
                    <param type="float" name="higher" value="" label="Value" />
                </when>
                <when value="Equal or higher" >
                    <param type="float" name="equal_higher" value="" label="Value" />
                </when>
                <when value="Lower" >
                    <param type="float" name="lower" value="" label="Value" />
                </when>
                <when value="Equal or lower" >
                    <param type="float" name="equal_lower" value="" label="Value" />
                </when>
                <when value="Different">
                    <param type="float" name="different" value="" label="Value"/>
                </when>
            </conditional>
        </repeat>
        <repeat name="values_range" title="Filter by range of values">
            <param name="ncol" type="text" value="c1" label="Please specify the column number of the input file on which you want to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' />
            <param name="bottom_value" type="float" value="" label="Please enter the bottom value" />
            <param name="top_value" type="float" value="" label="Please enter the top value" />
            <param name="inclusive" type="boolean" label="inclusive range ?" checked="false" truevalue="true" falsevalue="false" />
        </repeat>
    </inputs>
    <outputs>
        <data name="output1" format="tabular" label="${tool.name} on ${input1.name}" />
        <data name="filtered_file" format="tabular" label="${tool.name} on ${input1.name} - Filtered lines" />
    </outputs>
    <tests>
        <test>
            <param name="input1" value="Lacombe_et_al_2017_OK.txt" />
            <param name="header" value="true" />
            <param name="operator" value="OR"/>
            <param name="sort_column" value="c1"/>
            <param name="reversed_sort" value="false"/>
            <repeat name="keyword">
                <param name="ncol" value="c1" />
                <param name="match" value="True" />
                <conditional name="k">
                    <param name="kw" value="text" />
                    <param name="txt" value="P04264;P35908;P13645;Q5D862;Q5T749;Q8IW75;P81605;P22531;P59666;P78386" />
                </conditional>
            </repeat>
            <repeat name="value">
                <param name="ncol" value="c3"/>
                <conditional name="v">
                    <param name="val" value="Higher"/>
                    <param name="higher" value="20" />
                </conditional>
            </repeat>
            <output name="output1" file="output.csv" />
            <output name="filtered_file" file="filtered_output.csv" />
        </test>
    </tests>
    <help><![CDATA[
This tool allows to remove unneeded data (e.g. contaminants, non-significant values) from a proteomics results file (e.g. MaxQuant or Proline output).

**Filter by keyword(s)**

Several options can be used. For each option, you can fill in the field or upload a file which contains the keywords.

- If you choose to fill in the field, the keywords should be separated by ";", for example: A8K2U0;Q5TA79;O43175

- If you choose to upload a file in a text format in which each line is a keyword, for example:

REV

TRYP_PIG

ALDOA_RABBIT

**The line that contains these keywords will be eliminated from input file.**

**Keywords search can be applied by performing either exact match or partial one by using the following option**

- If you choose **Yes**, only the fields that contains exactly the same content will be removed.

- If you choose **No**, all the fields containing the keyword will be removed.

For example:

**Yes** option (exact match) selected using the keyword "kinase": only lines which contain exactly "kinase" is removed.

**No** option (partial match) for "kinase": not only lines which contain "kinase" but also lines with "alpha-kinase" (and so  on) are removed.

-----

**Filter by values**

You can filter your data by a column of numerical values.
Enter the column to be use and select one operator in the list :

- "="
- "!="
- "<"
- "<="
- ">"
- ">="

Then enter the value to filter and specify the column to apply that option.
If a row contains a value that correspond to your settings, it will be filtered.

-----

**Filter by a range of values**

You can also set a range of values to filter your file.
In opposition to value filter, rows with values inside of the defined range are kept.

Rows with values outside of the defined range will be filtered.

-----

**AND/OR operator**

Since you can add as many filters as you want, you can choose how filters apply on your data.

AND or OR operator option works on all filters :

- OR : only one filter to be satisfied to remove one row
- AND : all filters must be satisfied to remove one row

-----

**Sort the results files**

You can sort the result file if you wish, it can help you to check results. 

In order to do so : enter the column to be used, all columns will be sorted according to the one filled in.

Rows stay intact, just in different order like excel.
You can also choose ascending or descending order, by default descending order is set.

-----

**Output**

The tool will produce 2 output files.

* A text file containing the resulting filtered input file.

* A text file containing the rows removed from the input file.

-----

.. class:: infomark

**Authors**

T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.

    ]]></help>
    <citations>
    </citations>
</tool>