0
|
1 <tool id="ctb_simsearch" name="Similarity Search" version="0.1.1">
|
|
2 <description>of fingerprint data sets</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="1.1p1">chemfp</requirement>
|
|
5 <requirement type="package" version="2.3.2">openbabel</requirement>
|
|
6 </requirements>
|
|
7 <command>
|
|
8 #if $method_opts.method_opts_selector == "chemfp":
|
|
9 simsearch
|
|
10 #if int($method_opts.knn) == 0:
|
|
11 #set $k = 'all'
|
|
12 ## count is only available if k nearest neighbor search is disabled
|
|
13 $method_opts.counts
|
|
14 #else:
|
|
15 #set $k = int($method_opts.knn)
|
|
16 #end if
|
|
17
|
|
18 -k $k
|
|
19
|
|
20 --threshold $method_opts.threshold
|
|
21 --query-format fps
|
|
22 --target-format fps
|
|
23 -o "${outfile}"
|
|
24
|
|
25 ## build and search an in-memory data structure (faster for multiple queries)
|
|
26 --memory
|
|
27
|
|
28 #if $method_opts.query_opts.query_opts_selector == "normal":
|
|
29 -q "${method_opts.query_opts.query}"
|
|
30 #else:
|
|
31 --NxN
|
|
32 #end if
|
|
33
|
|
34 "${method_opts.query_opts.targets}"
|
|
35 #else:
|
|
36 ## OpenBabel needs the original molecule file (molecule.'fileformat') next to the fastsearch index (molecule.fs). We use a composite datatype to accomplish that.
|
|
37 ## Furthermore OpenBabel is really picky with file extensions. We need to specify every datatype. I did not find a solution to specify the query-filetype.
|
|
38 ## A workaround is to create a symlink with a proper file-extension.
|
|
39 #import tempfile
|
|
40 #set $temp_file = tempfile.NamedTemporaryFile()
|
|
41 #set $temp_link = "%s.%s" % ($temp_file.name, $method_opts.query.ext)
|
|
42 $temp_file.close()
|
|
43 ln -s $method_opts.query $temp_link;
|
|
44 obabel -i fs "${os.path.join($method_opts.fastsearch.extra_files_path,'molecule.fs')}" -S "${temp_link}" -at${method_opts.threshold} -O "${outfile}" -osmi -aa 2>&1;
|
|
45 rm $temp_link
|
|
46 #end if
|
|
47 </command>
|
|
48 <inputs>
|
|
49
|
|
50 <conditional name="method_opts">
|
|
51 <param name="method_opts_selector" type="select" label="Subject database/sequences">
|
|
52 <option value="chemfp">Chemfp fingerprint file</option>
|
|
53 <option value="obabel">OpenBabel Fastsearch Index</option>
|
|
54 </param>
|
|
55 <when value="chemfp">
|
|
56 <conditional name="query_opts">
|
|
57 <param name="query_opts_selector" type="select" label="Query Mode">
|
|
58 <option value="normal">Query molecules are stores in a separate file</option>
|
|
59 <option value="nxn">Target molecules are also queries (NxN)</option>
|
|
60 </param>
|
|
61 <when value="normal">
|
|
62 <param name='query' type='data' format="fps" label='Query molecules'/>
|
|
63 <param name='targets' type='data' format="fps" label='Target molecules'/>
|
|
64 </when>
|
|
65 <when value="nxn">
|
|
66 <param name='targets' type='data' format="fps" label='Target moleculs'/>
|
|
67 </when>
|
|
68 </conditional>
|
|
69 <param name='knn' type='integer' value='0' label='select the k nearest neighbors' help='0 means all neighbors'>
|
|
70 <validator type="in_range" min="0" />
|
|
71 </param>
|
|
72 <param name='threshold' type='float' value='0.7' label='threshold' />
|
|
73 <param name="counts" type="boolean" truevalue="-c" falsevalue="" checked="false" label="report counts (-c)" help="Is ignored if k nearest neighbor search is enabled" />
|
|
74 </when>
|
|
75 <when value="obabel">
|
|
76 <param name="query" type='data' format="smi,mol,sdf,inchi" label="query"/>
|
|
77 <param name="fastsearch" type='data' format="obfs" label="OpenBabel Fastsearch Index"/>
|
|
78 <param name="threshold" type='float' label="threshold" value='0.7'/>
|
|
79 </when>
|
|
80 </conditional>
|
|
81
|
|
82 </inputs>
|
|
83 <outputs>
|
|
84 <data name="outfile" format="tabular" />
|
|
85 </outputs>
|
|
86 <tests>
|
|
87 <test>
|
|
88 <param name="targets" ftype="fps" value="targets.fps"/>
|
|
89 <param name="query" ftype="fps" value="q.fps"/>
|
|
90 <param name="k" value='4'/>
|
|
91 <param name="th" value='0.7'/>
|
|
92 <output name="outfile" ftype="tabular" file="simsearch_on_tragets_and_q.tabular"/>
|
|
93 </test>
|
|
94 </tests>
|
|
95 <help>
|
|
96
|
|
97
|
|
98 .. class:: infomark
|
|
99
|
|
100 **What this tool does**
|
|
101
|
|
102 Similarity searches using a variety of different fingerprints using either the chemfp_ FPS type or the Open Babel FastSearch_ index.
|
|
103
|
|
104 .. _chemfp: http://chemfp.com/
|
|
105 .. _FastSearch: http://openbabel.org/wiki/FastSearch
|
|
106
|
|
107 -----
|
|
108
|
|
109 .. class:: infomark
|
|
110
|
|
111 **Cite**
|
|
112
|
|
113 | The chemfp_ project
|
|
114 |
|
|
115 | N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch and G R Hutchison - `Open Babel: An open chemical toolbox`_
|
|
116
|
|
117 .. _`Open Babel: An open chemical toolbox`: http://www.jcheminf.com/content/3/1/33
|
|
118
|
|
119
|
|
120 </help>
|
|
121 </tool>
|