diff galaxy_stubs/FingerprintSimilaritySearch.xml @ 2:605370bc1def draft default tip

Uploaded
author luis
date Tue, 12 Jul 2016 12:33:33 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy_stubs/FingerprintSimilaritySearch.xml	Tue Jul 12 12:33:33 2016 -0400
@@ -0,0 +1,114 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTD2Galaxy.-->
+<!--Proposed Tool Section: [Chemoinformatics]-->
+<tool id="FingerprintSimilaritySearch" name="FingerprintSimilaritySearch" version="1.1.0">
+  <description>calculate similar molecules in a library</description>
+  <macros>
+    <token name="@EXECUTABLE@">FingerprintSimilaritySearch</token>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="stdio"/>
+  <expand macro="requirements"/>
+  <command>FingerprintSimilaritySearch
+
+#if $param_t:
+  -t $param_t
+#end if
+#if $param_q:
+  -q $param_q
+#end if
+#if $param_o:
+  -o $param_o
+#end if
+#if $param_f:
+  -f $param_f
+#end if
+#if $param_fp_col:
+  -fp_col $param_fp_col
+#end if
+#if $param_id_col:
+  -id_col $param_id_col
+#end if
+#if $param_fp_tag:
+  -fp_tag     "$param_fp_tag"
+#end if
+#if $param_id_tag:
+  -id_tag     "$param_id_tag"
+#end if
+#if $param_tc:
+  -tc $param_tc
+#end if
+#if $param_nt:
+  -nt     "$param_nt"
+#end if
+#if $param_bs:
+  -bs $param_bs
+#end if
+#if $param_sdf_out:
+  -sdf_out $param_sdf_out
+#end if
+</command>
+  <inputs>
+    <param name="param_t" type="data" format="smi.gz,csv,sdf.gz,sdf,txt.gz,smi,txt,csv.gz" optional="False" value="&lt;class 'CTDopts.CTDopts._Null'&gt;" label="Target library input file" help="(-t) "/>
+    <param name="param_q" type="data" format="smi.gz,csv,sdf.gz,sdf,txt.gz,smi,txt,csv.gz" optional="False" value="&lt;class 'CTDopts.CTDopts._Null'&gt;" label="Query library input file" help="(-q) "/>
+    <param name="param_f" type="integer" min="1" max="2" optional="False" value="0" label="Fingerprint format [1 = binary bitstring, 2 = comma separated feature list]" help="(-f) "/>
+    <param name="param_fp_col" type="integer" value="-1" label="Column number for comma separated smiles input which contains the fingerprint" help="(-fp_col) "/>
+    <param name="param_id_col" type="integer" value="-1" label="Column number for comma separated smiles input which contains the molecule identifie" help="(-id_col) "/>
+    <param name="param_fp_tag" type="text" size="30" value=" " label="Tag name for SDF input which contains the fingerprint" help="(-fp_tag) ">
+      <sanitizer>
+        <valid initial="string.printable">
+          <remove value="'"/>
+          <remove value="&quot;"/>
+        </valid>
+      </sanitizer>
+    </param>
+    <param name="param_id_tag" type="text" size="30" value=" " label="Tag name for SDF input which contains the molecule identifie" help="(-id_tag) ">
+      <sanitizer>
+        <valid initial="string.printable">
+          <remove value="'"/>
+          <remove value="&quot;"/>
+        </valid>
+      </sanitizer>
+    </param>
+    <param name="param_tc" type="float" value="0.7" label="Tanimoto cutoff [default: 0.7]" help="(-tc) "/>
+    <param name="param_nt" type="text" size="30" value="1" label="Number of parallel threads to use" help="(-nt) To use all possible threads enter &lt;max&gt; [default: 1]">
+      <sanitizer>
+        <valid initial="string.printable">
+          <remove value="'"/>
+          <remove value="&quot;"/>
+        </valid>
+      </sanitizer>
+    </param>
+    <param name="param_bs" type="integer" value="500" label="Block size [default: 500]" help="(-bs) "/>
+    <param name="param_sdf_out" type="integer" min="0" max="1" optional="True" value="0" label="If query file has SD format, this flag activates writing of nearest neighbours as a new CSV tag in a copy of the query SD file" help="(-sdf_out) "/>
+  </inputs>
+  <expand macro="advanced_options"/>
+  <outputs>
+    <data name="param_o" metadata_source="param_t" format="input"/>
+  </outputs>
+  <help>This tool calculates all nearest neighbours above a similarity cutoff for given query molecules in a compound library on the basis of 2D binary fingerprints.
+The first library to specify (i1) is the compound library to be searched, the second library (i2) is conseiderd as the query compounds.
+Both files have to be comma separated values (csv) files and the binary fingerprints have to be encoded as feature lists or as binary bit strings.
+
+WARNING: If similarity cutoff is chosen to be 0.0, the output will be the entire similarity matrix and has a size of n*m with n=|i1| and m=|i2|. 
+
+======================================================================================================================================================
+
+Examples:
+
+$ FingerprintSimilaritySearch -t target.sdf -q query.sdf -o results -fp_tag FPRINT -f 1 -id_tag NAME
+  tries to extract fingerprints as binary bitstrings (-f 1) from tag &lt;FPRINT&gt; and compound IDs from tag &lt;NAME&gt; of target.sdf and query.sdf.
+  A similarity search is performed for all query molecules against all target molecules and pairs with similarity above Tanimoto cutoff 0.7 are written to outfile (results).
+
+$ FingerprintSimilaritySearch -t target.sdf -q query.sdf -o results -fp_tag FPRINT -f 1 -id_tag NAME -sdf_out
+  tries to extract fingerprints as binary bitstrings (-f 1) from tag &lt;FPRINT&gt; and compound IDs from tag &lt;NAME&gt; of target.sdf and query.sdf.
+  A similarity search is performed for all query molecules against all target molecules and pairs with similarity above Tanimoto cutoff 0.7
+  are added as a new SD tag to output file 'NN_TAGGED_query.sdf' as a list of TargetID:Similarity pairs.
+
+$ FingerprintSimilaritySearch -t target.sdf -q query.smi -o results -fp_tag FPRINT -f 1 -id_tag NAME -fp_col 2
+  tries to extract fingerprints as binary bitstrings (-f 1) from tag &lt;FPRINT&gt; and compound IDs from tag &lt;NAME&gt; of target.sdf
+  and fingerprints as binary bitstrings of space separated query file from column 2 (-fp_col 2).
+  A similarity search is performed for all query molecules against all target molecules and pairs with similarity above Tanimoto cutoff 0.7 are written to outfile (results).
+
+</help>
+</tool>