Mercurial > repos > bgruening > ctb_im_max_min_picker

<tool id="ctb_im_max_min_picker" name="Pick" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">
    <description>diverse compounds from a library with Butina clustering</description>
    <macros>
        <import>macros.xml</import>
        <token name="@GALAXY_VERSION@">0</token>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        max_min_picker
            -i '$infile'
            -if sdf
            -t '$threshold'
            -d '$descriptor_opts'
            #if $number != ''
                -n $number
            #end if
            --fragment-method '$fragment_opts'
            #if $seedfile
                -s '$seedfile'
            #end if
            $out_fragment
            --meta
            -o outp
            -of sdf &>> $logfile &&
        cat outp_metrics.txt &>> $logfile &&
        gzip -d outp.sdf.gz
    ]]></command>
    <inputs>
        <param name="infile" type="data" format="sdf" label="Input file" help="Input file in SDF or JSON format"/>
        <param name="threshold" type="float" label="Similarity threshold" value="0.7" min="0" max="1" help="1.0 means identical"/>
        <param name="descriptor_opts" type="select" label="Type of descriptor or fingerprint" help="Default is morgan2">
            <option value="maccs">maccs</option>
            <option value="morgan2" selected="true">morgan2</option>
            <option value="morgan3">morgan3</option>
        </param>
        <param name="number" type="integer" optional="true" label="Maximum number to pick for diverse subset selection" help="ABC"/>
        <param name="fragment_opts" type="select" label="Fragment" help="Approach to find biggest fragment, if more than one is found">
            <option value="hac" selected="true">Biggest by heavy atom count</option>
            <option value="mw">Biggest by molecular weight</option>
        </param>
        <param name="seedfile" type="data" format="sdf" optional="true" label="Seed file" help="Optional file containing any seed molecules that have already been picked"/>
        <param name="out_fragment" type="boolean" label="Output fragment" truevalue="--output-fragment" falsevalue="" help="Output the biggest fragment rather than the original molecule"/>
    </inputs>

    <expand macro="outputs" />

    <tests>
        <test>
            <param name="infile" value="Kinase_inhibs.sdf" ftype="sdf"/>
            <param name="threshold" value="0.5"/>
            <param name="descriptor_opts" value="morgan2" />
            <param name="fragment_opts" value="hac" />
            <output name="outfile" ftype='sdf'>
                <assert_contents>
                    <has_text_matching expression="^([^\$]+?\$\$\$\$){33}?$"/>
                </assert_contents>
            </output>
            <output name="logfile">
                <assert_contents>
                    <has_text text="36 candidates 36 total"/>
                    <has_text text="Output 33 molecules"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
<![CDATA[

.. class:: infomark

**What this tool does**

This molecule selects a subset from a library, ensuring the subset reflects the diversity of the library overall by applying Butina clustering.


-----

.. class:: infomark

**Input**

| - Compounds in `SDF Format`_
| - Various options: similarity threshold, type of fingerprint, seem molecule.

.. _SDF Format: http://en.wikipedia.org/wiki/Chemical_table_file


-----

.. class:: infomark

 **Output**

SD-file containing picked compounds.

]]></help>
    <expand macro="citations" />
</tool>
author	bgruening
date	Mon, 27 Jul 2020 11:32:34 -0400
parents	2d9ca306538d
children