comparison metaeuk_easy_linclust.xml @ 0:b11620b9577a draft default tip

Uploaded
author dnbenso
date Tue, 23 Nov 2021 02:47:16 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b11620b9577a
1 <tool id="metaeuk_easy_linclust" name="MetaEuk Easy Linclust" version="@TOOL_VERSION@+galaxy0">
2 <description>High-throughput gene discovery and annotation for large-scale eukaryotic metagenomics</description>
3 <xrefs>
4 <xref type="bio.tools">MetaEuk</xref>
5 </xrefs>
6 <macros>
7 <token name="@TOOL_VERSION@">5.34c21f2</token>
8 </macros>
9 <requirements>
10 <requirement type="package" version="@TOOL_VERSION@">metaeuk</requirement>
11 </requirements>
12 <command detect_errors="aggressive"><![CDATA[
13 metaeuk easy-linclust
14 '$contigs'
15 result
16 "./tmp"
17 --cov-mode '${cov_mode}'
18 -c '${abovematch}'
19 --threads \${GALAXY_SLOTS:-2}
20 #if $adv.adv_options == "yes":
21 #if $adv.alignment_mode:
22 --alignment-mode '${adv.alignment_mode}'
23 #end if
24 #if $adv.belowmatch:
25 -e '${adv.belowmatch}'
26 #end if
27 #if $adv.min_seq_id:
28 --min-seq-id '${adv.min_seq_id}'
29 #end if
30 #if $adv.min_aln_len:
31 --min-aln-len '${adv.min_aln_len}'
32 #end if
33 #if $adv.seq_id_mode:
34 --seq-id-mode '${adv.seq_id_mode}'
35 #end if
36 #if $adv.cluster_mode:
37 --cluster-mode '${adv.cluster_mode}'
38 #end if
39 #if $adv.kmer_per_seq:
40 --kmer-per-seq '${adv.kmer_per_seq}'
41 #end if
42 #if $adv.kmer_per_seq_scale:
43 --kmer-per-seq-scale '${adv.kmer_per_seq_scale}'
44 #end if
45 #if $adv.verbosity:
46 -v '${adv.verbosity}'
47 #end if
48 #end if
49 ]]></command>
50 <inputs>
51 <param name="contigs" type="data" format="fasta" label="Contigs to cluster" />
52 <param argument="--cov-mode" name="cov_mode" type="integer" min="0" max="5" value="0" label="Integer between 0 and 5 - see below for details" />
53 <param argument="-c" name="abovematch" type="float" min="0" max="1" value="0.800" label="list matches above this fraction of aligned (covered) residues (see --cov-mode) [0.800]" />
54 <conditional name="adv">
55 <param type="select" name="adv_options" label="Show advanced options">
56 <option value="yes">Yes</option>
57 <option value="no" selected="true">No</option>
58 </param>
59 <when value="yes">
60 <param argument="--alignment-mode" name="alignment_mode" type="integer" optional="true" min="0" max="1" label="Integer between 0 and 4 - see below for details" />
61 <param argument="-e" name="belowmatch" type="float" optional="true" min="0" label="List matches below this E-value (range 0.0-inf) [0.001]" />
62 <param argument="--min-seq-id" name="min_seq_id" type="float" optional="true" min="0" max="1" label="List matches above this sequence identity (for clustering) (range 0.0-1.0) [0.000]" />
63 <param argument="--min-aln-len" name="min_aln_len" type="integer" optional="true" min="0" label="Minimum alignment length (range 0-INT_MAX) [0]" />
64 <param argument="--seq-id-mode" name="seq_id_mode" type="integer" optional="true" min="0" max="2" label="0: alignment length 1: shorter, 2: longer sequence [0]" />
65 <param argument="--cluster-mode" name="cluster_mode" type="integer" optional="true" min="0" max="3" label="Integer between 0 and 3 - see below for details" />
66 <param argument="--kmer-per-seq" name="kmer_per_seq" type="integer" optional="true" min="1" label="k-mers per sequence [21]" />
67 <param argument="--kmer-per-seq-scale" name="kmer_per_seq_scale" type="text" optional="true" label="Scale k-mer per sequence based on sequence length as kmer-per-seq val + scale x seqlen" help="e.g. [nucl:0.200,aa:0.000]" />
68 <param argument="-v" name="verbosity" type="integer" optional="true" min="0" max="3" label="Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]" />
69 </when>
70 <when value="no">
71 </when>
72 </conditional>
73 </inputs>
74 <outputs>
75 <data format="fasta" label="Representatives" name="rep_seq" from_work_dir="result_rep_seq.fasta" />
76 <data format="fasta" label="FASTA-like per cluster" name="all_seq" from_work_dir="result_all_seqs.fasta" />
77 <data format="tabular" label="Adjecency list" name="cluster" from_work_dir="result_cluster.tsv" />
78 </outputs>
79 <tests>
80 <test>
81 <param name="contigs" ftype="fasta" value="DB.fasta" />
82 <param name="cov_mode" value="1" />
83 <param name="abovematch" value="0.85" />
84 <output name="rep_seq" ftype="fasta" value="result_rep_seq.fasta" />
85 <output name="all_seq" ftype="fasta" value="result_all_seqs.fasta" />
86 <output name="cluster" ftype="tabular" value="result_cluster.tsv" />
87 </test>
88 </tests>
89 <help><![CDATA[
90 **MetaEuk**
91
92 `MetaEuk`_ is a modular toolkit designed for large-scale gene discovery and annotation in eukaryotic metagenomic contigs. Metaeuk combines the fast and sensitive homology search capabilities of MMseqs2_ with a dynamic programming procedure to recover optimal exons sets. It reduces redundancies in multiple discoveries of the same gene and resolves conflicting gene predictions on the same strand.
93
94 This tool implements the easy-predict command from metaeuk, which combines metaeuk modules into a pipeline for protein alignment prediction. Input is the contigs you want to search for protein hits and the proteins you want to search against those contigs. Output is FASTA format predicted ORFs, with exons annotated in the header according to the metaeuk header format_.
95
96 ----
97
98 **easy-linclust**
99
100 This tool implements the `easy-linclust`_ command from `MMseqs2`_, which is a clustering in linear time. It is magnitudes faster but a bit less sensitive than clustering.
101
102 ----
103
104 **Running metaeuk easy-linclust**
105
106 metaeuk easy-linclust <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]> <o:clusterPrefix> <tmpDir> [options]
107
108 * **--alignment-mode INT** How to compute the alignment:
109 | 0: automatic
110 | 1: only score and end_pos
111 | 2: also start_pos and cov
112 | 3: also seq.id
113 | 4: only ungapped alignment [0]
114
115 * **-e FLOAT** List matches below this E-value (range 0.0-inf) [0.001]
116
117 * **--min-seq-id FLOAT** List matches above this sequence identity (for clustering) (range 0.0-1.0) [0.000]
118
119 * **--min-aln-len INT** Minimum alignment length (range 0-INT_MAX) [0]
120
121 * **--seq-id-mode INT** 0: alignment length 1: shorter, 2: longer sequence [0]
122
123 * **-c FLOAT** List matches above this fraction of aligned (covered) residues (see --cov-mode) [0.800]
124
125 * **--cov-mode INT**
126 | 0: coverage of query and target
127 | 1: coverage of target
128 | 2: coverage of query
129 | 3: target seq. length has to be at least x% of query length
130 | 4: query seq. length has to be at least x% of target length
131 | 5: short seq. needs to be at least x% of the other seq. length [0]
132
133 * **--cluster-mode INT**
134 | 0: Set-Cover (greedy)
135 | 1: Connected component (BLASTclust)
136 | 2,3: Greedy clustering by sequence length (CDHIT) [0]
137
138 * **--kmer-per-seq INT** k-mers per sequence [21]
139
140 * **--kmer-per-seq-scale TWIN** Scale k-mer per sequence based on sequence length as kmer-per-seq val + scale x seqlen [nucl:0.200,aa:0.000]
141
142 * **--threads INT** Number of CPU-cores used (all by default) [20]
143
144 * **--compressed INT** Write compressed output [0]
145
146 * **-v INT** Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
147
148
149 .. _MetaEuk: https://github.com/soedinglab/metaeuk
150
151 .. _MMseqs2: https://github.com/soedinglab/MMseqs2
152
153 .. _easy-linclust: https://github.com/soedinglab/mmseqs2/wiki#linclust
154
155 .. _format: https://github.com/soedinglab/metaeuk#the-metaeuk-header
156
157 ]]></help>
158 <citations>
159 <citation type="doi">10.1186/s40168-020-00808-x</citation>
160 </citations>
161 </tool>