Mercurial > repos > bgruening > ctb_im_cluster_butina
diff cluster_butina.xml @ 0:ad2b25cca758 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/im-pipelines commit 598dd288a384481b7602a0f6322c5081dc8da5d9"
author | bgruening |
---|---|
date | Tue, 21 Jul 2020 05:23:57 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cluster_butina.xml Tue Jul 21 05:23:57 2020 -0400 @@ -0,0 +1,123 @@ +<tool id="ctb_im_cluster_butina" name="Butina Cluster" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@"> + <description>using RDKit</description> + <macros> + <import>macros.xml</import> + <token name="@GALAXY_VERSION@">0</token> + </macros> + <expand macro="requirements" /> + <command detect_errors="exit_code"><![CDATA[ + cluster_butina + -i '$infile' + -if sdf + -t '$threshold' + -d '$descriptor_opts' + -m '$metric_opts' + #if $number != '' + -n '$number' + #end if + -e '$exclude' + #if $fragment_opts: + --fragment-method '$fragment_opts' + #end if + #if $field_conditional.use_field == 't': + #if $field_conditional.field: + -f '$field_conditional.field' + $field_conditional.field_opts + #end if + #end if + $output_fragment + --meta + -o outp + -of sdf &>> $logfile && + cat outp_metrics.txt &>> $logfile && + gzip -d outp.sdf.gz + ]]></command> + <inputs> + <param name="infile" type="data" format="sdf" label="Input file" help="Input file in SDF format"/> + <param name="threshold" min="0" max="1" type="float" value="0.7" label="Threshold" help="Similarity clustering threshold (1.0 means identical)"/> + <param name="descriptor_opts" type="select" label="Descriptor Options" help="Descriptor or fingerprint type (default rdkit)"> + <option value="rdkit" selected="true">rdkit</option> + <option value="maccs">maccs</option> + <option value="morgan2">morgan2</option> + <option value="morgan3">morgan3</option> + </param> + <param name="metric_opts" type="select" label="Similarity metric" help="Default is the Tanimoto coefficient"> + <option value="tanimoto" selected="true">tanimoto</option> + <option value="asymmetric">asymmetric</option> + <option value="braunblanquet">braunblanquet</option> + <option value="cosine">cosine</option> + <option value="dice">dice</option> + <option value="kulczynski">kulczynski</option> + <option value="mcconnaughey">mcconnaughey</option> + <option value="rogotgoldberg">rogotgoldberg</option> + <option value="russel">russel</option> + <option value="sokal">sokal</option> + </param> + <param name="number" type="integer" optional="true" label="Maximum number for diverse subset selection" help="Maximum number to pick for diverse subset selection. Leave blank for no maximum."/> + <param name="exclude" min="0" max="1" type="float" value="0.9" label="Exclude threshold" help="Threshold for excluding structures in diverse subset selection (1.0 means identical)"/> + <param name="fragment_opts" type="select" optional="true" label="Find single fragment" help="Find single fragment if more than one"> + <option value="hac" selected="true">Biggest by heavy atom count</option> + <option value="mw">Biggest by molecular weight</option> + </param> + <param name="output_fragment" type="boolean" label="Output Fragment" truevalue="--output-fragment" falsevalue="" help="Output the biggest fragment rather than the original molecule"/> + <conditional name="field_conditional"> + <param name="use_field" type="boolean" label="Use field" truevalue="t" falsevalue="f" help="Use a field to optimize diverse subset selection"/> + <when value="t"> + <param name="field" type="text" label="Field" help="Field to use to optimise diverse subset selection"/> + <param name="field_opts" type="select" label="Field options" help="pick lowest or highest value specified by the 'field' component"> + <option value="--min" selected="true">min</option> + <option value="--max">max</option> + </param> + </when> + <when value="f" /> + </conditional> + </inputs> + + <expand macro="outputs" /> + <tests> + <test> + <param name="infile" value="Kinase_inhibs.sdf" ftype="sdf"/> + <param name="descriptor_opts" value="rdkit"/> + <param name="metric_opts" value="tanimoto"/> + <param name="threshold" value="0.6"/> + <param name="fragment_opts" value="hac"/> + <output name="outfile" ftype='sdf' file="cluster_butina_output1.sdf"/> + <output name="logfile"> + <assert_contents> + <has_text text="Found 30 clusters"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + +.. class:: infomark + +**What this tool does** + +This tool performs Butina clustering for a set of input molecules, using the chemistry toolkit RDKit, and returns results in SDF format. + + +----- + +.. class:: infomark + +**Input** + +| - Molecules in `SDF format`_ +| - A number of other parameters can be set, including the fingerprint type and the similarity metric to use. + +.. _SDF format: http://en.wikipedia.org/wiki/Chemical_table_file + + +----- + +.. class:: infomark + + **Output** + +SD file containing clusters. + + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file