annotate metaeuk_easy_linclust.xml @ 0:b11620b9577a draft default tip

Uploaded
author dnbenso
date Tue, 23 Nov 2021 02:47:16 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
1 <tool id="metaeuk_easy_linclust" name="MetaEuk Easy Linclust" version="@TOOL_VERSION@+galaxy0">
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
2 <description>High-throughput gene discovery and annotation for large-scale eukaryotic metagenomics</description>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
3 <xrefs>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
4 <xref type="bio.tools">MetaEuk</xref>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
5 </xrefs>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
6 <macros>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
7 <token name="@TOOL_VERSION@">5.34c21f2</token>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
8 </macros>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
9 <requirements>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
10 <requirement type="package" version="@TOOL_VERSION@">metaeuk</requirement>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
11 </requirements>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
12 <command detect_errors="aggressive"><![CDATA[
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
13 metaeuk easy-linclust
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
14 '$contigs'
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
15 result
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
16 "./tmp"
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
17 --cov-mode '${cov_mode}'
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
18 -c '${abovematch}'
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
19 --threads \${GALAXY_SLOTS:-2}
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
20 #if $adv.adv_options == "yes":
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
21 #if $adv.alignment_mode:
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
22 --alignment-mode '${adv.alignment_mode}'
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
23 #end if
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
24 #if $adv.belowmatch:
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
25 -e '${adv.belowmatch}'
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
26 #end if
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
27 #if $adv.min_seq_id:
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
28 --min-seq-id '${adv.min_seq_id}'
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
29 #end if
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
30 #if $adv.min_aln_len:
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
31 --min-aln-len '${adv.min_aln_len}'
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
32 #end if
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
33 #if $adv.seq_id_mode:
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
34 --seq-id-mode '${adv.seq_id_mode}'
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
35 #end if
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
36 #if $adv.cluster_mode:
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
37 --cluster-mode '${adv.cluster_mode}'
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
38 #end if
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
39 #if $adv.kmer_per_seq:
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
40 --kmer-per-seq '${adv.kmer_per_seq}'
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
41 #end if
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
42 #if $adv.kmer_per_seq_scale:
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
43 --kmer-per-seq-scale '${adv.kmer_per_seq_scale}'
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
44 #end if
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
45 #if $adv.verbosity:
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
46 -v '${adv.verbosity}'
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
47 #end if
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
48 #end if
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
49 ]]></command>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
50 <inputs>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
51 <param name="contigs" type="data" format="fasta" label="Contigs to cluster" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
52 <param argument="--cov-mode" name="cov_mode" type="integer" min="0" max="5" value="0" label="Integer between 0 and 5 - see below for details" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
53 <param argument="-c" name="abovematch" type="float" min="0" max="1" value="0.800" label="list matches above this fraction of aligned (covered) residues (see --cov-mode) [0.800]" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
54 <conditional name="adv">
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
55 <param type="select" name="adv_options" label="Show advanced options">
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
56 <option value="yes">Yes</option>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
57 <option value="no" selected="true">No</option>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
58 </param>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
59 <when value="yes">
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
60 <param argument="--alignment-mode" name="alignment_mode" type="integer" optional="true" min="0" max="1" label="Integer between 0 and 4 - see below for details" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
61 <param argument="-e" name="belowmatch" type="float" optional="true" min="0" label="List matches below this E-value (range 0.0-inf) [0.001]" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
62 <param argument="--min-seq-id" name="min_seq_id" type="float" optional="true" min="0" max="1" label="List matches above this sequence identity (for clustering) (range 0.0-1.0) [0.000]" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
63 <param argument="--min-aln-len" name="min_aln_len" type="integer" optional="true" min="0" label="Minimum alignment length (range 0-INT_MAX) [0]" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
64 <param argument="--seq-id-mode" name="seq_id_mode" type="integer" optional="true" min="0" max="2" label="0: alignment length 1: shorter, 2: longer sequence [0]" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
65 <param argument="--cluster-mode" name="cluster_mode" type="integer" optional="true" min="0" max="3" label="Integer between 0 and 3 - see below for details" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
66 <param argument="--kmer-per-seq" name="kmer_per_seq" type="integer" optional="true" min="1" label="k-mers per sequence [21]" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
67 <param argument="--kmer-per-seq-scale" name="kmer_per_seq_scale" type="text" optional="true" label="Scale k-mer per sequence based on sequence length as kmer-per-seq val + scale x seqlen" help="e.g. [nucl:0.200,aa:0.000]" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
68 <param argument="-v" name="verbosity" type="integer" optional="true" min="0" max="3" label="Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
69 </when>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
70 <when value="no">
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
71 </when>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
72 </conditional>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
73 </inputs>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
74 <outputs>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
75 <data format="fasta" label="Representatives" name="rep_seq" from_work_dir="result_rep_seq.fasta" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
76 <data format="fasta" label="FASTA-like per cluster" name="all_seq" from_work_dir="result_all_seqs.fasta" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
77 <data format="tabular" label="Adjecency list" name="cluster" from_work_dir="result_cluster.tsv" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
78 </outputs>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
79 <tests>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
80 <test>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
81 <param name="contigs" ftype="fasta" value="DB.fasta" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
82 <param name="cov_mode" value="1" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
83 <param name="abovematch" value="0.85" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
84 <output name="rep_seq" ftype="fasta" value="result_rep_seq.fasta" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
85 <output name="all_seq" ftype="fasta" value="result_all_seqs.fasta" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
86 <output name="cluster" ftype="tabular" value="result_cluster.tsv" />
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
87 </test>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
88 </tests>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
89 <help><![CDATA[
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
90 **MetaEuk**
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
91
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
92 `MetaEuk`_ is a modular toolkit designed for large-scale gene discovery and annotation in eukaryotic metagenomic contigs. Metaeuk combines the fast and sensitive homology search capabilities of MMseqs2_ with a dynamic programming procedure to recover optimal exons sets. It reduces redundancies in multiple discoveries of the same gene and resolves conflicting gene predictions on the same strand.
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
93
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
94 This tool implements the easy-predict command from metaeuk, which combines metaeuk modules into a pipeline for protein alignment prediction. Input is the contigs you want to search for protein hits and the proteins you want to search against those contigs. Output is FASTA format predicted ORFs, with exons annotated in the header according to the metaeuk header format_.
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
95
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
96 ----
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
97
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
98 **easy-linclust**
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
99
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
100 This tool implements the `easy-linclust`_ command from `MMseqs2`_, which is a clustering in linear time. It is magnitudes faster but a bit less sensitive than clustering.
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
101
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
102 ----
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
103
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
104 **Running metaeuk easy-linclust**
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
105
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
106 metaeuk easy-linclust <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]> <o:clusterPrefix> <tmpDir> [options]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
107
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
108 * **--alignment-mode INT** How to compute the alignment:
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
109 | 0: automatic
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
110 | 1: only score and end_pos
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
111 | 2: also start_pos and cov
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
112 | 3: also seq.id
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
113 | 4: only ungapped alignment [0]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
114
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
115 * **-e FLOAT** List matches below this E-value (range 0.0-inf) [0.001]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
116
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
117 * **--min-seq-id FLOAT** List matches above this sequence identity (for clustering) (range 0.0-1.0) [0.000]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
118
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
119 * **--min-aln-len INT** Minimum alignment length (range 0-INT_MAX) [0]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
120
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
121 * **--seq-id-mode INT** 0: alignment length 1: shorter, 2: longer sequence [0]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
122
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
123 * **-c FLOAT** List matches above this fraction of aligned (covered) residues (see --cov-mode) [0.800]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
124
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
125 * **--cov-mode INT**
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
126 | 0: coverage of query and target
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
127 | 1: coverage of target
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
128 | 2: coverage of query
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
129 | 3: target seq. length has to be at least x% of query length
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
130 | 4: query seq. length has to be at least x% of target length
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
131 | 5: short seq. needs to be at least x% of the other seq. length [0]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
132
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
133 * **--cluster-mode INT**
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
134 | 0: Set-Cover (greedy)
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
135 | 1: Connected component (BLASTclust)
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
136 | 2,3: Greedy clustering by sequence length (CDHIT) [0]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
137
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
138 * **--kmer-per-seq INT** k-mers per sequence [21]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
139
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
140 * **--kmer-per-seq-scale TWIN** Scale k-mer per sequence based on sequence length as kmer-per-seq val + scale x seqlen [nucl:0.200,aa:0.000]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
141
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
142 * **--threads INT** Number of CPU-cores used (all by default) [20]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
143
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
144 * **--compressed INT** Write compressed output [0]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
145
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
146 * **-v INT** Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
147
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
148
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
149 .. _MetaEuk: https://github.com/soedinglab/metaeuk
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
150
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
151 .. _MMseqs2: https://github.com/soedinglab/MMseqs2
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
152
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
153 .. _easy-linclust: https://github.com/soedinglab/mmseqs2/wiki#linclust
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
154
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
155 .. _format: https://github.com/soedinglab/metaeuk#the-metaeuk-header
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
156
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
157 ]]></help>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
158 <citations>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
159 <citation type="doi">10.1186/s40168-020-00808-x</citation>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
160 </citations>
b11620b9577a Uploaded
dnbenso
parents:
diff changeset
161 </tool>