Mercurial > repos > iuc > mmseqs2_taxonomy_assignment
comparison mmseqs2_taxonomy_assignment.xml @ 0:d0acde079e2e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mmsesq2 commit 1400593429eb4e9c6e307df3621825a8b84a6fa7
| author | iuc |
|---|---|
| date | Thu, 27 Mar 2025 14:38:20 +0000 |
| parents | |
| children | 876d26806584 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d0acde079e2e |
|---|---|
| 1 <tool id="mmseqs2_taxonomy_assignment" name="MMseqs2 Taxonomy Assignments" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description> | |
| 3 of sequences by comparing them to a reference database | |
| 4 </description> | |
| 5 <macros> | |
| 6 <import>macro.xml</import> | |
| 7 </macros> | |
| 8 <expand macro="biotools"/> | |
| 9 <expand macro="requirements"/> | |
| 10 <expand macro="version_command"/> | |
| 11 <command detect_errors="exit_code"><![CDATA[ | |
| 12 ln -s -f '${createdb.input_fasta}' 'input' && | |
| 13 mmseqs createdb | |
| 14 'input' | |
| 15 'sequenceDB' | |
| 16 --dbtype '$createdb.alph_type.dbtype' | |
| 17 --shuffle $createdb.shuffle && | |
| 18 | |
| 19 cp -r '$createtaxdb.database_type.mmseqs2_db_select.fields.path'/database* . && | |
| 20 | |
| 21 mmseqs createtaxdb | |
| 22 database | |
| 23 'tmp' | |
| 24 #if $createtaxdb.tax_mapping_file | |
| 25 --tax-mapping-file '$createtaxdb.tax_mapping_file' | |
| 26 #end if | |
| 27 --tax-mapping-mode '$createtaxdb.tax_mapping_mode' | |
| 28 --threads "\${GALAXY_SLOTS:-1}" && | |
| 29 | |
| 30 #if $filtertaxseqdb.taxon_list | |
| 31 mmseqs filtertaxseqdb | |
| 32 'database' | |
| 33 'database_filtered' | |
| 34 --taxon-list '$filtertaxseqdb.taxon_list' | |
| 35 && | |
| 36 #end if | |
| 37 | |
| 38 mmseqs taxonomy | |
| 39 'sequenceDB' | |
| 40 #if $filtertaxseqdb.taxon_list | |
| 41 'database_filtered' | |
| 42 #else | |
| 43 'database' | |
| 44 #end if | |
| 45 'output_taxonomy' | |
| 46 'tmp' | |
| 47 #if str($createdb.alph_type.dbtype) == "1" | |
| 48 --comp-bias-corr-scale $createdb.alph_type.comp_bias_corr_scale | |
| 49 #elif str($createdb.alph_type.dbtype) == "2" | |
| 50 --zdrop $createdb.alph_type.zdrop | |
| 51 #end if | |
| 52 ##Pre-filter options | |
| 53 --add-self-matches $taxonomy.prefilter.add_self_matches | |
| 54 -s $taxonomy.prefilter.sensitivity | |
| 55 -k $taxonomy.prefilter.kmer_length | |
| 56 --target-search-mode $taxonomy.prefilter.target_search_mode | |
| 57 ##--k-score TWIN k-mer threshold for generating similar k-mer lists [seq:2147483647,prof:2147483647] | |
| 58 --max-seqs $taxonomy.prefilter.max_seqs | |
| 59 --split $taxonomy.prefilter.split | |
| 60 --split-mode $taxonomy.prefilter.split_mode | |
| 61 ##--split-memory-limit BYTE Set max memory per split. E.g. 800B, 5K, 10M, 1G. Default (0) to all available system memory [0] | |
| 62 --diag-score $taxonomy.prefilter.diag_score | |
| 63 --exact-kmer-matching $taxonomy.prefilter.exact_kmer_matching | |
| 64 --mask $taxonomy.prefilter.mask | |
| 65 --mask-prob $taxonomy.prefilter.mask_prob | |
| 66 --mask-lower-case $taxonomy.prefilter.mask_lower_case | |
| 67 --min-ungapped-score $taxonomy.prefilter.min_ungapped_score | |
| 68 --spaced-kmer-mode $taxonomy.prefilter.spaced_kmer_mode | |
| 69 ##--spaced-kmer-pattern STR User-specified spaced k-mer pattern [] | |
| 70 ##--local-tmp STR Path where some of the temporary files will be created [] | |
| 71 ##--disk-space-limit BYTE Set max disk space to use for reverse profile searches. E.g. 800B, 5K, 10M, 1G. Default (0) to all available disk space in the temp folder [0] | |
| 72 | |
| 73 ##Align options | |
| 74 -a $taxonomy.align.convertalis | |
| 75 ##The next 2 parameters seems to be the same | |
| 76 --alignment-mode $taxonomy.align.alignment_mode | |
| 77 --alignment-output-mode $taxonomy.align.alignment_output_mode | |
| 78 --wrapped-scoring $taxonomy.align.wrapped_scoring | |
| 79 -e $taxonomy.align.evalue | |
| 80 --min-seq-id $taxonomy.align.min_seq_id | |
| 81 --min-aln-len $taxonomy.align.min_aln_len | |
| 82 --seq-id-mode $taxonomy.align.seq_id_mode | |
| 83 --alt-ali $taxonomy.align.alt_ali | |
| 84 -c $taxonomy.align.cov | |
| 85 --cov-mode $taxonomy.align.cov_mode | |
| 86 --max-rejected $taxonomy.align.max_rejected | |
| 87 --max-accept $taxonomy.align.max_accept | |
| 88 --score-bias $taxonomy.align.score_bias | |
| 89 --realign $taxonomy.align.realign | |
| 90 --realign-score-bias $taxonomy.align.realign_score_bias | |
| 91 --realign-max-seqs $taxonomy.align.realign_max_seqs | |
| 92 --corr-score-weight $taxonomy.align.corr_score_weight | |
| 93 --exhaustive-search-filter $taxonomy.align.exhaustive_search_filter | |
| 94 | |
| 95 ##Profile options | |
| 96 ##--pca Pseudo count admixture strength [] | |
| 97 ##--pcb Pseudo counts: Neff at half of maximum admixture (range 0.0-inf) [] | |
| 98 --mask-profile $taxonomy.profile.mask_profile | |
| 99 --e-profile $taxonomy.profile.e_profile | |
| 100 --wg $taxonomy.profile.wg | |
| 101 --filter-msa $taxonomy.profile.filter_msa | |
| 102 --filter-min-enable $taxonomy.profile.filter_min_enable | |
| 103 --max-seq-id $taxonomy.profile.max_seq_id | |
| 104 --qid $taxonomy.profile.qid | |
| 105 --qsc $taxonomy.profile.qsc | |
| 106 --cov $taxonomy.profile.cov | |
| 107 --diff $taxonomy.profile.diff | |
| 108 --pseudo-cnt-mode $taxonomy.profile.pseudo_cnt_mode | |
| 109 --exhaustive-search $taxonomy.profile.exhaustive_search | |
| 110 --lca-search $taxonomy.profile.lca_search | |
| 111 | |
| 112 ##Misc options | |
| 113 ##--orf-filter INT Prefilter query ORFs with non-selective search | |
| 114 ## Only used during nucleotide-vs-protein classification | |
| 115 ## NOTE: Consider disabling when classifying short reads [1] | |
| 116 --orf-filter-e $taxonomy.misc.orf_filter_e | |
| 117 --orf-filter-s $taxonomy.misc.orf_filter_s | |
| 118 --lca-mode $taxonomy.misc.lca_mode | |
| 119 --tax-output-mode $taxonomy.misc.tax_output_mode | |
| 120 --majority $taxonomy.misc.majority | |
| 121 --vote-mode $taxonomy.misc.vote_mode | |
| 122 ##--lca-ranks STR Add column with specified ranks (',' separated) [] | |
| 123 --tax-lineage $taxonomy.misc.tax_lineage | |
| 124 --blacklist $taxonomy.misc.blacklist | |
| 125 --taxon-list $taxonomy.misc.taxon_list | |
| 126 --rescore-mode $taxonomy.misc.rescore_mode | |
| 127 --allow-deletion $taxonomy.misc.allow_deletion | |
| 128 --min-length $taxonomy.misc.min_length | |
| 129 --max-length $taxonomy.misc.max_length | |
| 130 --max-gaps $taxonomy.misc.max_gaps | |
| 131 --contig-start-mode $taxonomy.misc.contig_start_mode | |
| 132 --contig-end-mode $taxonomy.misc.contig_end_mode | |
| 133 --orf-start-mode $taxonomy.misc.orf_start_mode | |
| 134 --forward-frames $taxonomy.misc.forward_frames | |
| 135 --reverse-frames $taxonomy.misc.reverse_frames | |
| 136 --translation-table $taxonomy.misc.translation_table | |
| 137 --translate $taxonomy.misc.translate | |
| 138 --use-all-table-starts $taxonomy.misc.use_all_table_starts | |
| 139 --id-offset $taxonomy.misc.id_offset | |
| 140 --add-orf-stop $taxonomy.misc.add_orf_stop | |
| 141 --sequence-overlap $taxonomy.misc.sequence_overlap | |
| 142 --sequence-split-mode $taxonomy.misc.sequence_split_mode | |
| 143 --headers-split-mode $taxonomy.misc.headers_split_mode | |
| 144 --search-type $createtaxdb.database_type.search_type | |
| 145 --prefilter-mode $taxonomy.misc.prefilter_mode | |
| 146 | |
| 147 ##Common options | |
| 148 ##--compressed INT Write compressed output [0] | |
| 149 --threads "\${GALAXY_SLOTS:-1}" | |
| 150 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3] | |
| 151 --max-seq-len $taxonomy.common.max_seq_len | |
| 152 ##--db-load-mode INT Database preload mode 0: auto, 1: fread, 2: mmap, 3: mmap+touch [0] | |
| 153 ##--mpi-runner STR Use MPI on compute cluster with this MPI command (e.g. "mpirun -np 42") [] | |
| 154 ##--force-reuse BOOL Reuse tmp filse in tmp/latest folder ignoring parameters and version changes [0] | |
| 155 ##--remove-tmp-files BOOL Delete temporary files [0] | |
| 156 | |
| 157 ##Expert options | |
| 158 --filter-hits $taxonomy.expert.filter_hits | |
| 159 --sort-results $taxonomy.expert.sort_results | |
| 160 ##--create-lookup INT Create database lookup file (can be very large) [0] | |
| 161 --chain-alignments $taxonomy.expert.chain_alignments | |
| 162 --merge-query $taxonomy.expert.merge_query | |
| 163 ##--strand INT Strand selection only works for DNA/DNA search 0: reverse, 1: forward, 2: both [1] | |
| 164 && | |
| 165 mmseqs createtsv | |
| 166 'sequenceDB' | |
| 167 'output_taxonomy' | |
| 168 'taxo_result.tsv' | |
| 169 | |
| 170 --first-seq-as-repr $createtsv.first_seq_as_repr | |
| 171 --target-column $createtsv.target_column | |
| 172 --full-header $createtsv.full_header | |
| 173 --idx-seq-src $createtsv.idx_seq_src | |
| 174 --threads "\${GALAXY_SLOTS:-1}" | |
| 175 ##--compressed INT Write compressed output [0] | |
| 176 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3] | |
| 177 ##--db-output BOOL Return a result DB instead of a text file [0] | |
| 178 | |
| 179 #if str($kraken_report.keep_report) == "Yes" | |
| 180 && | |
| 181 mmseqs taxonomyreport | |
| 182 #if $filtertaxseqdb.taxon_list | |
| 183 'database_filtered' | |
| 184 #else | |
| 185 'database' | |
| 186 #end if | |
| 187 'output_taxonomy' | |
| 188 'taxo_result.txt' | |
| 189 --report-mode 0 | |
| 190 --threads "\${GALAXY_SLOTS:-1}" | |
| 191 #end if | |
| 192 #if str($krona_report.keep_report) == "Yes" | |
| 193 && | |
| 194 mmseqs taxonomyreport | |
| 195 #if $filtertaxseqdb.taxon_list | |
| 196 'database_filtered' | |
| 197 #else | |
| 198 'database' | |
| 199 #end if | |
| 200 'output_taxonomy' | |
| 201 'taxo_result.html' | |
| 202 --report-mode 1 | |
| 203 --threads "\${GALAXY_SLOTS:-1}" | |
| 204 #end if | |
| 205 ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3] | |
| 206 | |
| 207 ]]></command> | |
| 208 <inputs> | |
| 209 <section name="createdb" title="Convert FASTA/Q file(s) to MMseqs sequence DB format" expanded="true"> | |
| 210 <param name="input_fasta" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input fasta file" help="" /> | |
| 211 <conditional name="alph_type"> | |
| 212 <param argument="--dbtype" type="select" label="Input type" help="" > | |
| 213 <option value="0" selected="true">Auto</option> | |
| 214 <option value="1">Amino acid</option> | |
| 215 <option value="2">Nucleotides</option> | |
| 216 </param> | |
| 217 <when value="0"/> | |
| 218 <when value="1"> | |
| 219 <param argument="--comp-bias-corr-scale" type="float" min="0" max="1" value="1" label="Scale composition bias correction" help=""/> | |
| 220 </when> | |
| 221 <when value="2"> | |
| 222 <param argument="--zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/> | |
| 223 </when> | |
| 224 </conditional> | |
| 225 <param argument="--shuffle" type="boolean" checked="true" label="Shuffle input database" truevalue="1" falsevalue="0" optional="true" help="" /> | |
| 226 </section> | |
| 227 <section name="createtaxdb" title="Add taxonomic labels to reference sequence DB" expanded="true"> | |
| 228 <conditional name="database_type"> | |
| 229 <param name="type" type="select" label="Database type" help="" > | |
| 230 <option value="amino_acid_tax" selected="true">Amino acid with taxonomy information</option> | |
| 231 <option value="nucleotides_tax">Nucleotides with taxonomy information</option> | |
| 232 <option value="amino_acid">Amino acid without taxonomy information</option> | |
| 233 <option value="nucleotides">Nucleotides without taxonomy information</option> | |
| 234 </param> | |
| 235 <when value="amino_acid_tax"> | |
| 236 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> | |
| 237 <options from_data_table="mmseqs2_databases"> | |
| 238 <filter type="static_value" value="aminoacid" column="type"/> | |
| 239 <filter type="static_value" value="yes" column="taxonomy"/> | |
| 240 <validator message="No mmseqs2 database is available" type="no_options"/> | |
| 241 </options> | |
| 242 </param> | |
| 243 <expand macro="search_type_aa" /> | |
| 244 </when> | |
| 245 <when value="nucleotides_tax"> | |
| 246 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> | |
| 247 <options from_data_table="mmseqs2_databases"> | |
| 248 <filter type="static_value" value="nucleotide" column="type"/> | |
| 249 <filter type="static_value" value="yes" column="taxonomy"/> | |
| 250 <validator message="No mmseqs2 database is available" type="no_options"/> | |
| 251 </options> | |
| 252 </param> | |
| 253 <expand macro="search_type_nt" /> | |
| 254 </when> | |
| 255 <when value="amino_acid"> | |
| 256 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> | |
| 257 <options from_data_table="mmseqs2_databases"> | |
| 258 <filter type="static_value" value="aminoacid" column="type"/> | |
| 259 <filter type="static_value" value="no" column="taxonomy"/> | |
| 260 <validator message="No mmseqs2 database is available" type="no_options"/> | |
| 261 </options> | |
| 262 </param> | |
| 263 <expand macro="search_type_aa" /> | |
| 264 </when> | |
| 265 <when value="nucleotides"> | |
| 266 <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> | |
| 267 <options from_data_table="mmseqs2_databases"> | |
| 268 <filter type="static_value" value="nucleotide" column="type"/> | |
| 269 <filter type="static_value" value="no" column="taxonomy"/> | |
| 270 <validator message="No mmseqs2 database is available" type="no_options"/> | |
| 271 </options> | |
| 272 </param> | |
| 273 <expand macro="search_type_nt" /> | |
| 274 </when> | |
| 275 </conditional> | |
| 276 <param argument="--tax-mapping-file" type="data" format="tabular,tsv,txt" label="File to map sequence identifier to taxonomical identifier" optional="true"/> | |
| 277 <param argument="--tax-mapping-mode" type="select" label="Map taxonomy based on sequence database" help="" > | |
| 278 <option value="0" selected="true">0: .lookup file</option> | |
| 279 <option value="1">1: .source file</option> | |
| 280 </param> | |
| 281 </section> | |
| 282 <section name="filtertaxseqdb" title="Filter taxonomy sequence database"> | |
| 283 <param argument="--taxon-list" type="text" optional="true" value="" label="Taxonomy ID" help="Possibly multiple values separated by ','"/> | |
| 284 </section> | |
| 285 <section name="taxonomy" title="Taxonomy assignment by computing the lowest common ancestor of homologs"> | |
| 286 <section name="prefilter" title="Pre-filter"> | |
| 287 <expand macro="prefilter_common_parameters" /> | |
| 288 <param argument="--spaced-kmer-mode" type="select" label="Spaced k-mer mode" help=""> | |
| 289 <option value="0">Use consecutive positions in k-mers</option> | |
| 290 <option value="1" selected="true">Use spaced k-mers</option> | |
| 291 </param> | |
| 292 <param argument="--min-ungapped-score" type="integer" min="0" value="15" label="Accept only matches with ungapped alignment score above threshold" help=""/> | |
| 293 <param argument="-s" name="sensitivity" type="float" min="0" max="7.5" value="2" label="Sensitivity" help="1.0 faster; 4.0 fast; 7.5 sensitive"/> | |
| 294 <param argument="--target-search-mode" type="select" label="Target search mode" help="" > | |
| 295 <option value="0" selected="true">Regular k-mer</option> | |
| 296 <option value="1">Similar k-mer</option> | |
| 297 </param> | |
| 298 <param argument="--max-seqs" type="integer" min="0" value="300" label="Maximum results per query sequence allowed to pass the prefilter" help="Affects sensitivity"/> | |
| 299 <param argument="--split" type="integer" min="0" value="0" label="Split input into N equally distributed chunks" help="0: set the best split automatically"/> | |
| 300 <param argument="--split-mode" type="select" label="Split mode" help="" > | |
| 301 <option value="0">Split target db</option> | |
| 302 <option value="1">Split query db</option> | |
| 303 <option value="2" selected="true">Auto, depending on main memory</option> | |
| 304 </param> | |
| 305 <param argument="--diag-score" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Use ungapped diagonal scoring during prefilter" help=""/> | |
| 306 <param argument="--exact-kmer-matching" type="integer" min="0" max="1" value="0" label="Extract only exact k-mers for matching" help=""/> | |
| 307 </section> | |
| 308 <section name="align" title="Align"> | |
| 309 <expand macro="align_common_parameters" /> | |
| 310 <param argument="--alignment-mode" type="select" label="Alignment mode : How to compute the alignment" help="" > | |
| 311 <option value="0">Automatic</option> | |
| 312 <option value="1" selected="true">Only score and end_pos</option> | |
| 313 <option value="2">Also start_pos and cov</option> | |
| 314 <option value="3">Also seq.id</option> | |
| 315 <option value="4">Only ungapped alignment</option> | |
| 316 </param> | |
| 317 <param argument="-e" name="evalue" type="float" min="0" value="1" label="E-value threshold" help="List matches below this E-value"/> | |
| 318 <param argument="--min-seq-id" type="float" min="0" max="1" value="0" label="Minimum sequence identity" help="List matches above this sequence identity for clustering"/> | |
| 319 <param argument="-c" name="cov" type="float" min="0" value="0" label="List matches above this fraction of aligned (covered) residues" help=""/> | |
| 320 <param argument="--cov-mode" type="select" label="Coverage mode" help="" > | |
| 321 <option value="0" selected="true">Coverage of query and target</option> | |
| 322 <option value="1">Coverage of target</option> | |
| 323 <option value="2">Coverage of query</option> | |
| 324 <option value="3">Target seq. length has to be at least x% of query length</option> | |
| 325 <option value="4">Query seq. length has to be at least x% of target length</option> | |
| 326 <option value="5">Short seq. needs to be at least x% of the other seq. length</option> | |
| 327 </param> | |
| 328 <param argument="--max-rejected" type="integer" min="0" value="5" label="Maximum rejected alignments before alignment calculation for a query is stopped" help=""/> | |
| 329 <param argument="--max-accept" type="integer" min="0" value="30" label="Maximum accepted alignments before alignment calculation for a query is stopped" help=""/> | |
| 330 <param argument="--exhaustive-search-filter" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Filter result during search ?" help=""/> | |
| 331 </section> | |
| 332 <section name="profile" title="Profile"> | |
| 333 <param argument="--mask-profile" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Mask query sequence of profile using tantan" help=""/> | |
| 334 <param argument="--e-profile" type="float" min="0" value="1e-03" label="Include sequences matches with inf E-value threshold into the profile" help=""/> | |
| 335 <param argument="--wg" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use global sequence weighting for profile calculation" help=""/> | |
| 336 <param argument="--filter-msa" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Filter MSA" help=""/> | |
| 337 <param argument="--filter-min-enable" type="integer" min="0" value="0" label="Only filter MSAs with more than N sequences, 0 always filters" help=""/> | |
| 338 <param argument="--max-seq-id" type="float" min="0" max="1" value="0.9" label="Reduce redundancy of output MSA using max. pairwise sequence identity" help=""/> | |
| 339 <param argument="--qid" type="text" value="0" label="Reduce diversity of output MSAs using min.seq. identity with query sequences [0.0,1.0]" help="Alternatively, can be a list of multiple thresholds: | |
| 340 E.g.: 0.15,0.30,0.50 to defines filter buckets of ]0.15-0.30] and ]0.30-0.50]"/> | |
| 341 <param argument="--qsc" type="float" min="-50" max="100" value="-20" label="Reduce diversity of output MSAs using min. score per aligned residue with query sequences" help=""/> | |
| 342 <param argument="--cov" type="float" min="0" max="1" value="0" label="Filter output MSAs using min. fraction of query residues covered by matched sequences" help=""/> | |
| 343 <param argument="--diff" type="integer" min="0" value="1000" label="Filter MSAs by selecting most diverse set of sequences, keeping at least this many seqs in each MSA block of length 50" help=""/> | |
| 344 <param argument="--pseudo-cnt-mode" type="select" label="Pseudo count mode" help="" > | |
| 345 <option value="0" selected="true">Substitution-matrix</option> | |
| 346 <option value="1">Context-specific pseudocounts</option> | |
| 347 </param> | |
| 348 <param argument="--exhaustive-search" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Exhaustive search" help=""/> | |
| 349 <param argument="--lca-search" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Efficient search for LCA candidates" help=""/> | |
| 350 </section> | |
| 351 <section name="misc" title="Misc"> | |
| 352 <param argument="--orf-filter-e" type="float" min="0" value="1.000E+02" label="E-value threshold used for query ORF prefiltering" help=""/> | |
| 353 <param argument="--orf-filter-s" type="float" min="0" value="2" label="Sensitivity used for query ORF prefiltering" help=""/> | |
| 354 <param argument="--lca-mode" type="select" label="LCA mode" help="" > | |
| 355 <option value="1">Single search LCA</option> | |
| 356 <option value="3" selected="true">Approximate 2bLCA</option> | |
| 357 <option value="4">Top hit</option> | |
| 358 </param> | |
| 359 <param argument="--tax-output-mode" type="select" label="Taxonomy output mode" help="" > | |
| 360 <option value="0" selected="true">Output LCA</option> | |
| 361 <option value="1">Output alignment</option> | |
| 362 <option value="2">Output both</option> | |
| 363 </param> | |
| 364 <param argument="--majority" type="float" min="0" value="0.5" label="Minimal fraction of agreement among taxonomically assigned sequences of a set" help=""/> | |
| 365 <param argument="--vote-mode" type="select" label="Mode of assigning weights to compute majority" help="" > | |
| 366 <option value="0">Uniform</option> | |
| 367 <option value="1" selected="true">Minus log E-value</option> | |
| 368 <option value="2">Score</option> | |
| 369 </param> | |
| 370 <param argument="--tax-lineage" type="select" label="Taxonomy lineage" help="" > | |
| 371 <option value="0" selected="true">Don't show</option> | |
| 372 <option value="1">Add all lineage names</option> | |
| 373 <option value="2">Add all lineage taxids</option> | |
| 374 </param> | |
| 375 <param argument="--blacklist" type="text" value="" label="Comma separated list of ignored taxa in LCA computation" help=""/> | |
| 376 <param argument="--taxon-list" type="text" value="" label="Taxonomy ID, possibly multiple values separated by ','" help=""/> | |
| 377 <param argument="--rescore-mode" type="select" label="Rescore diagonals with" help="" > | |
| 378 <option value="0" selected="true">Hamming distance</option> | |
| 379 <option value="1">Local alignment (score only)</option> | |
| 380 <option value="2">Local alignment</option> | |
| 381 <option value="3">Global alignment</option> | |
| 382 <option value="4">Longest alignment fulfilling window quality criterion</option> | |
| 383 </param> | |
| 384 <param argument="--allow-deletion" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Allow deletions in a MSA" help=""/> | |
| 385 <param argument="--min-length" type="integer" min="0" value="30" label="Minimum codon number in open reading frames" help=""/> | |
| 386 <param argument="--max-length" type="integer" min="0" value="32734" label="Maximum codon number in open reading frames" help=""/> | |
| 387 <param argument="--max-gaps" type="integer" min="0" value="2147483647" label="Maximum number of codons with gaps or unknown residues before an open reading frame is rejected" help=""/> | |
| 388 <param argument="--contig-start-mode" type="select" label="Contig start can be" help="" > | |
| 389 <option value="0">Incomplete</option> | |
| 390 <option value="1">Complete</option> | |
| 391 <option value="2" selected="true">Both</option> | |
| 392 </param> | |
| 393 <param argument="--contig-end-mode" type="select" label="Contig end can be" help="" > | |
| 394 <option value="0">Incomplete</option> | |
| 395 <option value="1">Complete</option> | |
| 396 <option value="2" selected="true">Both</option> | |
| 397 </param> | |
| 398 <param argument="--orf-start-mode" type="select" label="ORF fragment can be" help="" > | |
| 399 <option value="0">From start to stop</option> | |
| 400 <option value="1" selected="true">From any to stop</option> | |
| 401 <option value="2">From last encountered start to stop (no start in the middle)</option> | |
| 402 </param> | |
| 403 <param argument="--forward-frames" type="text" value="1,2,3" label="Comma-separated list of frames on the forward strand to be extracted" help=""/> | |
| 404 <param argument="--reverse-frames" type="text" value="1,2,3" label="Comma-separated list of frames on the reverse strand to be extracted" help=""/> | |
| 405 <param argument="--translation-table" type="select" label="Translation table" help=""> | |
| 406 <option value="1" selected="true">Canonical</option> | |
| 407 <option value="2">The Vertebrate Mitochondrial Code</option> | |
| 408 <option value="3">The Yeast Mitochondrial Code</option> | |
| 409 <option value="4">The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> | |
| 410 <option value="5">The Invertebrate Mitochondrial Code</option> | |
| 411 <option value="6">The Ciliate, Dasycladacean and Hexamita Nuclear Code</option> | |
| 412 <option value="9">The Echinoderm and Flatworm Mitochondrial Code</option> | |
| 413 <option value="10">The Euplotid Nuclear Code</option> | |
| 414 <option value="11">The Bacterial, Archaeal and Plant Plastid Code</option> | |
| 415 <option value="12">The Alternative Yeast Nuclear Code</option> | |
| 416 <option value="13">The Ascidian Mitochondrial Code</option> | |
| 417 <option value="14">The Alternative Flatworm Mitochondrial Code</option> | |
| 418 <option value="15">Blepharisma Nuclear Code</option> | |
| 419 <option value="16">Chlorophycean Mitochondrial Code</option> | |
| 420 <option value="21">Trematode Mitochondrial Code</option> | |
| 421 <option value="22">Scenedesmus obliquus Mitochondrial Code</option> | |
| 422 <option value="23">Thraustochytrium Mitochondrial Code</option> | |
| 423 <option value="24">Rhabdopleuridae Mitochondrial Code</option> | |
| 424 <option value="25">Candidate Division SR1 and Gracilibacteria Code</option> | |
| 425 <option value="26">Pachysolen tannophilus Nuclear Code</option> | |
| 426 <option value="27">Karyorelict Nuclear Code</option> | |
| 427 <option value="28">Condylostoma Nuclear Code</option> | |
| 428 <option value="29">Mesodinium Nuclear Code</option> | |
| 429 <option value="30">Peritrich Nuclear Code</option> | |
| 430 <option value="31">Blastocrithidia Nuclear Code</option> | |
| 431 </param> | |
| 432 <param argument="--translate" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Translate ORF to amino acid" help=""/> | |
| 433 <param argument="--use-all-table-starts" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use all alternatives for a start codon in the genetic table, if false - only ATG (AUG)" help=""/> | |
| 434 <param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/> | |
| 435 <param argument="--add-orf-stop" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Add stop codon '*' at complete start and end" help=""/> | |
| 436 <param argument="--sequence-overlap" type="integer" min="0" value="0" label="Overlap between sequences" help=""/> | |
| 437 <param argument="--sequence-split-mode" type="select" label="Sequence split mode" help="" > | |
| 438 <option value="0">Copy data</option> | |
| 439 <option value="1" selected="true">Soft link data and write new index</option> | |
| 440 </param> | |
| 441 <param argument="--headers-split-mode" type="select" label="Headers split mode" help="" > | |
| 442 <option value="0" selected="true">Split position</option> | |
| 443 <option value="1">Original header</option> | |
| 444 </param> | |
| 445 <param argument="--prefilter-mode" type="select" label="Prefilter mode" help="" > | |
| 446 <option value="0" selected="true">Kmer/ungapped</option> | |
| 447 <option value="1">Ungapped</option> | |
| 448 <option value="2">No filter</option> | |
| 449 </param> | |
| 450 </section> | |
| 451 <expand macro="common_section"/> | |
| 452 <section name="expert" title="Expert"> | |
| 453 <expand macro="expert_common_parameters" /> | |
| 454 <param argument="--chain-alignments" type="integer" min="0" value="0" label="Chain alignments" help=""/> | |
| 455 <param argument="--merge-query" type="integer" min="0" value="1" label="Combine ORFs/split sequences to a single entry" help=""/> | |
| 456 </section> | |
| 457 </section> | |
| 458 <section name="createtsv" title="Create a tsv report from taxonomy output "> | |
| 459 <param argument="--first-seq-as-repr" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use the first sequence of the clustering result as representative sequence" help=""/> | |
| 460 <param argument="--target-column" type="integer" min="0" value="1" label="Select a target column, 0 if no target id exists" help="" /> | |
| 461 <param argument="--full-header" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Replace DB ID by its corresponding Full Header" help=""/> | |
| 462 <param argument="--idx-seq-src" type="select" label="Index sequences source" help=""> | |
| 463 <option value="0" selected="true">Auto</option> | |
| 464 <option value="1">Split/translated sequences</option> | |
| 465 <option value="2">Input sequences</option> | |
| 466 </param> | |
| 467 </section> | |
| 468 <conditional name="kraken_report"> | |
| 469 <param name="keep_report" type="select" label="Do you want a Kraken style report" help="" > | |
| 470 <option value="Yes" selected="true">Yes</option> | |
| 471 <option value="No">No</option> | |
| 472 </param> | |
| 473 <when value="Yes"/> | |
| 474 <when value="No"/> | |
| 475 </conditional> | |
| 476 <conditional name="krona_report"> | |
| 477 <param name="keep_report" type="select" label="Do you want a Krona style report" help="" > | |
| 478 <option value="Yes" selected="true">Yes</option> | |
| 479 <option value="No">No</option> | |
| 480 </param> | |
| 481 <when value="Yes"/> | |
| 482 <when value="No"/> | |
| 483 </conditional> | |
| 484 </inputs> | |
| 485 <outputs> | |
| 486 <data name="output_taxonomy_tsv" format="tabular" from_work_dir="taxo_result.tsv" label="${tool.name} on ${on_string}: Taxonomy Report"/> | |
| 487 <data name="output_taxonomy_kraken" format="txt" from_work_dir="taxo_result.txt" label="${tool.name} on ${on_string}: Kraken Report"> | |
| 488 <filter>kraken_report['keep_report'] == "Yes"</filter> | |
| 489 </data> | |
| 490 <data name="output_taxonomy_krona" format="html" from_work_dir="taxo_result.html" label="${tool.name} on ${on_string}: Krona Report"> | |
| 491 <filter>krona_report['keep_report'] == "Yes"</filter> | |
| 492 </data> | |
| 493 </outputs> | |
| 494 <tests> | |
| 495 <!-- Test with Kraken report --> | |
| 496 <test expect_num_outputs="2"> | |
| 497 <section name="createdb"> | |
| 498 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> | |
| 499 <conditional name="alph_type"> | |
| 500 <param name="dbtype" value="2"/> | |
| 501 </conditional> | |
| 502 </section> | |
| 503 <section name="createtaxdb"> | |
| 504 <conditional name="database_type"> | |
| 505 <param name="type" value="amino_acid_tax"/> | |
| 506 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> | |
| 507 </conditional> | |
| 508 </section> | |
| 509 <section name="filtertaxseqdb"> | |
| 510 <param name="taxon_list" value="2" /> | |
| 511 </section> | |
| 512 <conditional name="krona_report"> | |
| 513 <param name="keep_report" value="No"/> | |
| 514 </conditional> | |
| 515 <output name="output_taxonomy_tsv" ftype="tabular"> | |
| 516 <assert_contents> | |
| 517 <has_line line="MYSTERY.222	1236	class	Gammaproteobacteria	1	1	1	1.000"/> | |
| 518 <has_line line="MYSTERY.64	119060	family	Burkholderiaceae	1	1	1	1.000"/> | |
| 519 <has_n_columns n="8"/> | |
| 520 </assert_contents> | |
| 521 </output> | |
| 522 <output name="output_taxonomy_kraken" ftype="txt"> | |
| 523 <assert_contents> | |
| 524 <has_text text="93.3333"/> | |
| 525 <has_text text="33.3333"/> | |
| 526 </assert_contents> | |
| 527 </output> | |
| 528 </test> | |
| 529 <test expect_num_outputs="2"> | |
| 530 <section name="createdb"> | |
| 531 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> | |
| 532 </section> | |
| 533 <section name="createtaxdb"> | |
| 534 <conditional name="database_type"> | |
| 535 <param name="type" value="amino_acid_tax"/> | |
| 536 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> | |
| 537 </conditional> | |
| 538 </section> | |
| 539 <conditional name="kraken_report"> | |
| 540 <param name="keep_report" value="No"/> | |
| 541 </conditional> | |
| 542 <output name="output_taxonomy_tsv" ftype="tabular"> | |
| 543 <assert_contents> | |
| 544 <has_line line="MYSTERY.222	1236	class	Gammaproteobacteria	1	1	1	1.000"/> | |
| 545 <has_line line="MYSTERY.64	119060	family	Burkholderiaceae	1	1	1	1.000"/> | |
| 546 <has_n_columns n="8"/> | |
| 547 </assert_contents> | |
| 548 </output> | |
| 549 <output name="output_taxonomy_krona" ftype="html"> | |
| 550 <assert_contents> | |
| 551 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/> | |
| 552 </assert_contents> | |
| 553 </output> | |
| 554 </test> | |
| 555 <test expect_num_outputs="3"> | |
| 556 <section name="createdb"> | |
| 557 <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> | |
| 558 </section> | |
| 559 <section name="createtaxdb"> | |
| 560 <conditional name="database_type"> | |
| 561 <param name="type" value="amino_acid_tax"/> | |
| 562 <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> | |
| 563 </conditional> | |
| 564 </section> | |
| 565 <output name="output_taxonomy_tsv" ftype="tabular"> | |
| 566 <assert_contents> | |
| 567 <has_line line="MYSTERY.222	1236	class	Gammaproteobacteria	1	1	1	1.000"/> | |
| 568 <has_line line="MYSTERY.64	119060	family	Burkholderiaceae	1	1	1	1.000"/> | |
| 569 <has_n_columns n="8"/> | |
| 570 </assert_contents> | |
| 571 </output> | |
| 572 <output name="output_taxonomy_krona" ftype="html"> | |
| 573 <assert_contents> | |
| 574 <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/> | |
| 575 </assert_contents> | |
| 576 </output> | |
| 577 <output name="output_taxonomy_kraken" ftype="txt"> | |
| 578 <assert_contents> | |
| 579 <has_text text="93.3333"/> | |
| 580 <has_text text="33.3333"/> | |
| 581 </assert_contents> | |
| 582 </output> | |
| 583 </test> | |
| 584 </tests> | |
| 585 <help><![CDATA[ | |
| 586 **MMseqs2: ultra fast and sensitive sequence search and clustering suite** | |
| 587 | |
| 588 MMseqs2 (Many-against-Many sequence searching) is a software suite to search and cluster huge protein and nucleotide sequence sets. | |
| 589 MMseqs2 is open source GPL-licensed software implemented in C++ for Linux, MacOS, and (as beta version, via cygwin) Windows. | |
| 590 The software is designed to run on multiple cores and servers and exhibits very good scalability. | |
| 591 MMseqs2 can run 10000 times faster than BLAST. At 100 times its speed it achieves almost the same sensitivity. | |
| 592 It can perform profile searches with the same sensitivity as PSI-BLAST at over 400 times its speed. | |
| 593 | |
| 594 **Usage** | |
| 595 | |
| 596 * Convert FASTA/Q file(s) to MMseqs sequence DB format | |
| 597 *mmseqs createdb <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB> [options]* | |
| 598 | |
| 599 * Add taxonomic labels to sequence DB | |
| 600 *mmseqs createtaxdb <i:sequenceDB> <tmpDir> [options]* | |
| 601 | |
| 602 * Filter taxonomy sequence database | |
| 603 *mmseqs filtertaxseqdb <i:taxSeqDB> <o:taxSeqDB> [options]* | |
| 604 | |
| 605 * Taxonomy assignment by computing the lowest common ancestor of homologs | |
| 606 *mmseqs taxonomy <i:queryDB> <i:targetDB> <o:taxaDB> <tmpDir> [options]* | |
| 607 | |
| 608 * Convert result DB to tab-separated flat file | |
| 609 *mmseqs createtsv <i:queryDB> [<i:targetDB>] <i:resultDB> <o:tsvFile> [options]* | |
| 610 | |
| 611 * Create a taxonomy report in Kraken or Krona format | |
| 612 *mmseqs taxonomyreport <i:seqTaxDB> <i:taxResultDB/resultDB/sequenceDB> <o:taxonomyReport> [options]* | |
| 613 | |
| 614 https://github.com/soedinglab/MMseqs2 | |
| 615 | |
| 616 ]]></help> | |
| 617 <expand macro="citations"/> | |
| 618 </tool> |
