annotate hhsearch.xml @ 6:cec2aa4d6c0d draft default tip

"planemo upload commit c335621f41bfb23eb31f71f5e2b2191727eccf9a"
author guerler
date Sun, 27 Sep 2020 17:08:05 +0000
parents 3e4d88784254
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
1 <tool id="hhsearch" name="HHsearch" version="0.1.0" python_template_version="3.5">
6
cec2aa4d6c0d "planemo upload commit c335621f41bfb23eb31f71f5e2b2191727eccf9a"
guerler
parents: 5
diff changeset
2 <description>detecting remote homologues of proteins</description>
5
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
3 <requirements>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
4 <requirement type="package" version="3.2.0">hhsuite</requirement>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
5 </requirements>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
6 <command detect_errors="exit_code"><![CDATA[
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
7 link '$hhm_ffdata' hhdb_hhm.ffdata &&
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
8 link '$hhm_ffindex' hhdb_hhm.ffindex &&
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
9 link '$cs219_ffdata' hhdb_cs219.ffdata &&
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
10 link '$cs219_ffindex' hhdb_cs219.ffindex &&
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
11 $method -e '$e' -i '$input' -d hhdb -o '$output'
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
12 ]]></command>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
13 <inputs>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
14 <param format="data" name="input" type="data" label="Query Sequence" help="Single sequence or multiple sequence alignment (MSA)
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
15 in a3m, a2m, or FASTA format, or HMM in hhm format. (-i)"/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
16 <param format="txt" name="hhm_ffindex" type="data" label="HHM Index file" help="Database file ending with 'hhm.ffindex'."/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
17 <param format="data" name="hhm_ffdata" type="data" label="HHM Data file" help="Database file ending with 'hhm.ffdata'."/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
18 <param format="txt" name="cs219_ffindex" type="data" label="cs219 Index file" help="Database file ending with 'cs219.ffindex'."/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
19 <param format="data" name="cs219_ffdata" type="data" label="cs219 Data file" help="Database file ending with 'cs219.ffdata'."/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
20 <param name="method" type="select" display="radio" label="Search Method" help="Select a search method. See help below for more information.">
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
21 <option value="hhsearch" selected="true">HHsearch</option>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
22 <option value="hhblits">HHblits</option>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
23 </param>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
24 <param name="e" type="float" label="E-value cutoff for inclusion in result alignment. (-e)" value="0.001" min="0" max="1"/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
25 </inputs>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
26 <outputs>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
27 <data format="txt" name="output" />
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
28 </outputs>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
29 <tests>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
30 <test>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
31 <param name="method" value="hhblits"/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
32 <param name="input" value="6VYB_A.fasta"/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
33 <param name="hhm_ffindex" value="dbCAN-fam-V8/dbCAN-fam-V8_hhm.ffindex"/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
34 <param name="hhm_ffdata" value="dbCAN-fam-V8/dbCAN-fam-V8_hhm.ffdata"/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
35 <param name="cs219_ffindex" value="dbCAN-fam-V8/dbCAN-fam-V8_cs219.ffindex"/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
36 <param name="cs219_ffdata" value="dbCAN-fam-V8/dbCAN-fam-V8_cs219.ffdata"/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
37 <output name="output" file="6VYB_A.hhr" lines_diff="4"/>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
38 </test>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
39 </tests>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
40 <help><![CDATA[
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
41 HHsearch aligns a profile HMM against a database of target profile HMMs. The search first aligns the
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
42 query HMM with each of the target HMMs using the Viterbi dynamic programming algorithm, which finds the
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
43 alignment with the maximum score. The E-value for the target HMM is calculated from the Viterbi score.
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
44 Target HMMs that reach sufficient significance to be reported are realigned using the Maximum Accuracy algorithm (MAC).
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
45 This algorithm maximizes the expected number of correctly aligned pairs of residues minus a penalty between 0 and 1.
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
46 Values near 0 produce greedy, long, nearly global alignments, values above 0.3 result in shorter, local alignments.
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
47
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
48 HHblits is an accelerated version of HHsearch that is fast enough to perform iterative searches through millions of profile HMMs,
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
49 e.g. through the Uniclust profile HMM databases, generated by clustering the UniProt database into clusters of globally alignable sequences.
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
50 Analogously to PSI-BLAST and HMMER3, such iterative searches can be used to build MSAs by starting from a single query sequence.
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
51 Sequences from matches to profile HMMs below some E-value threshold (e.g. 10−3) are added to the query MSA for the next search iteration.
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
52
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
53 Download databases from: http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
54 ]]></help>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
55 <citations>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
56 <citation type="bibtex">
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
57 @misc{githubhh-suite,
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
58 author = {Soeding, Johannes},
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
59 year = {2005},
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
60 title = {Protein homology detection by HMM-HMM comparison},
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
61 publisher = {Oxford University Press},
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
62 journal = {Bioinformatics},
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
63 url = {https://doi.org/10.1093/bioinformatics/bti125},
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
64 }</citation>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
65 </citations>
3e4d88784254 "planemo upload commit 3002402473e9f6dcdac5bfb4013cd2d81884a938-dirty"
guerler
parents:
diff changeset
66 </tool>