Mercurial > repos > q2d2 > qiime2__feature_classifier__classify_hybrid_vsearch_sklearn
changeset 0:bafd0cd558db draft
planemo upload for repository https://github.com/qiime2/galaxy-tools/tree/main/tools/suite_qiime2__feature_classifier commit 9023cfd83495a517fbcbb6f91d5b01a6f1afcda1
author | q2d2 |
---|---|
date | Mon, 29 Aug 2022 19:51:25 +0000 |
parents | |
children | 4d9059e67f87 |
files | qiime2__feature_classifier__classify_hybrid_vsearch_sklearn.xml test-data/.gitkeep |
diffstat | 1 files changed, 156 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime2__feature_classifier__classify_hybrid_vsearch_sklearn.xml Mon Aug 29 19:51:25 2022 +0000 @@ -0,0 +1,156 @@ +<?xml version='1.0' encoding='utf-8'?> +<!-- +Copyright (c) 2022, QIIME 2 development team. + +Distributed under the terms of the Modified BSD License. (SPDX: BSD-3-Clause) +--> +<!-- +This tool was automatically generated by: + q2galaxy (version: 2022.8.1) +for: + qiime2 (version: 2022.8.1) +--> +<tool name="qiime2 feature-classifier classify-hybrid-vsearch-sklearn" id="qiime2__feature_classifier__classify_hybrid_vsearch_sklearn" version="2022.8.0+q2galaxy.2022.8.1.2" profile="22.05" license="BSD-3-Clause"> + <description>ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier</description> + <requirements> + <container type="docker">quay.io/qiime2/core:2022.8</container> + </requirements> + <version_command>q2galaxy version feature_classifier</version_command> + <command detect_errors="aggressive">q2galaxy run feature_classifier classify_hybrid_vsearch_sklearn '$inputs'</command> + <configfiles> + <inputs name="inputs" data_style="paths"/> + </configfiles> + <inputs> + <param name="query" type="data" format="qza" label="query: FeatureData[Sequence]" help="[required] Query Sequences."> + <options options_filter_attribute="metadata.semantic_type"> + <filter type="add_value" value="FeatureData[Sequence]"/> + </options> + <validator type="expression" message="Incompatible type">hasattr(value.metadata, "semantic_type") and value.metadata.semantic_type in ['FeatureData[Sequence]']</validator> + </param> + <param name="reference_reads" type="data" format="qza" label="reference_reads: FeatureData[Sequence]" help="[required] Reference sequences."> + <options options_filter_attribute="metadata.semantic_type"> + <filter type="add_value" value="FeatureData[Sequence]"/> + </options> + <validator type="expression" message="Incompatible type">hasattr(value.metadata, "semantic_type") and value.metadata.semantic_type in ['FeatureData[Sequence]']</validator> + </param> + <param name="reference_taxonomy" type="data" format="qza" label="reference_taxonomy: FeatureData[Taxonomy]" help="[required] Reference taxonomy labels."> + <options options_filter_attribute="metadata.semantic_type"> + <filter type="add_value" value="FeatureData[Taxonomy]"/> + </options> + <validator type="expression" message="Incompatible type">hasattr(value.metadata, "semantic_type") and value.metadata.semantic_type in ['FeatureData[Taxonomy]']</validator> + </param> + <param name="classifier" type="data" format="qza" label="classifier: TaxonomicClassifier" help="[required] Pre-trained sklearn taxonomic classifier for classifying the reads."> + <options options_filter_attribute="metadata.semantic_type"> + <filter type="add_value" value="TaxonomicClassifier"/> + </options> + <validator type="expression" message="Incompatible type">hasattr(value.metadata, "semantic_type") and value.metadata.semantic_type in ['TaxonomicClassifier']</validator> + </param> + <section name="__q2galaxy__GUI__section__extra_opts__" title="Click here for additional options"> + <conditional name="__q2galaxy__GUI__conditional__maxaccepts__"> + <param name="__q2galaxy__GUI__select__" type="select" label="maxaccepts: Int % Range(1, None) | Str % Choices('all')" help="[default: 10] Maximum number of hits to keep for each query. Set to "all" to keep all hits > perc_identity similarity. Note that if strand=both, maxaccepts will keep N hits for each direction (if searches in the opposite direction yield results that exceed the minimum perc_identity). In those cases use maxhits to control the total number of hits returned. This option works in pair with maxrejects. The search process sorts target sequences by decreasing number of k-mers they have in common with the query sequence, using that information as a proxy for sequence similarity. After pairwise alignments, if the first target sequence passes the acceptation criteria, it is accepted as best hit and the search process stops for that query. If maxaccepts is set to a higher value, more hits are accepted. If maxaccepts and maxrejects are both set to "all", the complete database is searched."> + <option value="all">all (Str)</option> + <option value="__q2galaxy__::control::Int X Range(1__comma__ None)" selected="true">Provide a value (Int % Range(1, None))</option> + </param> + <when value="all"> + <param name="maxaccepts" type="hidden" value="all"/> + </when> + <when value="__q2galaxy__::control::Int X Range(1__comma__ None)"> + <param name="maxaccepts" type="integer" min="1" value="10" label="maxaccepts: Int % Range(1, None)" help="[default: 10] Maximum number of hits to keep for each query. Set to "all" to keep all hits > perc_identity similarity. Note that if strand=both, maxaccepts will keep N hits for each direction (if searches in the opposite direction yield results that exceed the minimum perc_identity). In those cases use maxhits to control the total number of hits returned. This option works in pair with maxrejects. The search process sorts target sequences by decreasing number of k-mers they have in common with the query sequence, using that information as a proxy for sequence similarity. After pairwise alignments, if the first target sequence passes the acceptation criteria, it is accepted as best hit and the search process stops for that query. If maxaccepts is set to a higher value, more hits are accepted. If maxaccepts and maxrejects are both set to "all", the complete database is searched."/> + </when> + </conditional> + <param name="perc_identity" type="float" min="0.0" max="1.0" value="0.5" label="perc_identity: Float % Range(0.0, 1.0, inclusive_end=True)" help="[default: 0.5] Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled."/> + <param name="query_cov" type="float" min="0.0" max="1.0" value="0.8" label="query_cov: Float % Range(0.0, 1.0, inclusive_end=True)" help="[default: 0.8] Query coverage threshold to use for PREFILTER. Reject match if query alignment coverage per high-scoring pair is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled."/> + <param name="strand" type="select" label="strand: Str % Choices('both', 'plus')" display="radio"> + <option value="both" selected="true">both</option> + <option value="plus">plus</option> + </param> + <param name="min_consensus" type="float" min="0.500001" max="1.0" value="0.51" label="min_consensus: Float % Range(0.5, 1.0, inclusive_start=False, inclusive_end=True)" help="[default: 0.51] Minimum fraction of assignments must match top hit to be accepted as consensus assignment."/> + <conditional name="__q2galaxy__GUI__conditional__maxhits__"> + <param name="__q2galaxy__GUI__select__" type="select" label="maxhits: Int % Range(1, None) | Str % Choices('all')" help="[default: 'all']"> + <option value="all" selected="true">all (Str)</option> + <option value="__q2galaxy__::control::Int X Range(1__comma__ None)">Provide a value (Int % Range(1, None))</option> + </param> + <when value="all"> + <param name="maxhits" type="hidden" value="all"/> + </when> + <when value="__q2galaxy__::control::Int X Range(1__comma__ None)"> + <param name="maxhits" type="integer" min="1" value="" label="maxhits: Int % Range(1, None)" help="[required]"/> + </when> + </conditional> + <conditional name="__q2galaxy__GUI__conditional__maxrejects__"> + <param name="__q2galaxy__GUI__select__" type="select" label="maxrejects: Int % Range(1, None) | Str % Choices('all')" help="[default: 'all']"> + <option value="all" selected="true">all (Str)</option> + <option value="__q2galaxy__::control::Int X Range(1__comma__ None)">Provide a value (Int % Range(1, None))</option> + </param> + <when value="all"> + <param name="maxrejects" type="hidden" value="all"/> + </when> + <when value="__q2galaxy__::control::Int X Range(1__comma__ None)"> + <param name="maxrejects" type="integer" min="1" value="" label="maxrejects: Int % Range(1, None)" help="[required]"/> + </when> + </conditional> + <conditional name="__q2galaxy__GUI__conditional__reads_per_batch__"> + <param name="__q2galaxy__GUI__select__" type="select" label="reads_per_batch: Int % Range(1, None) | Str % Choices('auto')" help="[default: 'auto'] Number of reads to process in each batch for sklearn classification. If "auto", this parameter is autoscaled to min(number of query sequences / threads, 20000)."> + <option value="auto" selected="true">auto (Str)</option> + <option value="__q2galaxy__::control::Int X Range(1__comma__ None)">Provide a value (Int % Range(1, None))</option> + </param> + <when value="auto"> + <param name="reads_per_batch" type="hidden" value="auto"/> + </when> + <when value="__q2galaxy__::control::Int X Range(1__comma__ None)"> + <param name="reads_per_batch" type="integer" min="1" value="" label="reads_per_batch: Int % Range(1, None)" help="[required] Number of reads to process in each batch for sklearn classification. If "auto", this parameter is autoscaled to min(number of query sequences / threads, 20000)."/> + </when> + </conditional> + <conditional name="__q2galaxy__GUI__conditional__confidence__"> + <param name="__q2galaxy__GUI__select__" type="select" label="confidence: Float % Range(0, 1, inclusive_end=True) | Str % Choices('disable')" help="[default: 0.7] Confidence threshold for limiting taxonomic depth. Set to "disable" to disable confidence calculation, or 0 to calculate confidence but not apply it to limit the taxonomic depth of the assignments."> + <option value="disable">disable (Str)</option> + <option value="__q2galaxy__::control::Float X Range(0__comma__ 1__comma__ inclusive_end=True)" selected="true">Provide a value (Float % Range(0, 1, inclusive_end=True))</option> + </param> + <when value="disable"> + <param name="confidence" type="hidden" value="disable"/> + </when> + <when value="__q2galaxy__::control::Float X Range(0__comma__ 1__comma__ inclusive_end=True)"> + <param name="confidence" type="float" min="0" max="1" value="0.7" label="confidence: Float % Range(0, 1, inclusive_end=True)" help="[default: 0.7] Confidence threshold for limiting taxonomic depth. Set to "disable" to disable confidence calculation, or 0 to calculate confidence but not apply it to limit the taxonomic depth of the assignments."/> + </when> + </conditional> + <param name="read_orientation" type="select" label="read_orientation: Str % Choices('same', 'reverse-complement', 'auto')" display="radio"> + <option value="same">same</option> + <option value="reverse-complement">reverse-complement</option> + <option value="auto" selected="true">auto</option> + </param> + <param name="threads" type="integer" min="1" value="1" label="threads: Int % Range(1, None)" help="[default: 1] Number of threads to use for job parallelization."/> + <param name="prefilter" type="boolean" truevalue="__q2galaxy__::literal::True" falsevalue="__q2galaxy__::literal::False" checked="true" label="prefilter: Bool" help="[default: Yes] Toggle positive filter of query sequences on or off."/> + <param name="sample_size" type="integer" min="1" value="1000" label="sample_size: Int % Range(1, None)" help="[default: 1000] Randomly extract the given number of sequences from the reference database to use for prefiltering. This parameter is ignored if `prefilter` is disabled."/> + <param name="randseed" type="integer" min="0" value="0" label="randseed: Int % Range(0, None)" help="[default: 0] Use integer as a seed for the pseudo-random generator used during prefiltering. A given seed always produces the same output, which is useful for replicability. Set to 0 to use a pseudo-random seed. This parameter is ignored if `prefilter` is disabled."/> + </section> + </inputs> + <outputs> + <data name="classification" format="qza" label="${tool.name} on ${on_string}: classification.qza" from_work_dir="classification.qza"/> + </outputs> + <tests/> + <help> +QIIME 2: feature-classifier classify-hybrid-vsearch-sklearn +=========================================================== +ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier + + +Outputs: +-------- +:classification.qza: Taxonomy classifications of query sequences. + +| + +Description: +------------ +NOTE: THIS PIPELINE IS AN ALPHA RELEASE. Please report bugs to https://forum.qiime2.org! +Assign taxonomy to query sequences using hybrid classifier. First performs rough positive filter to remove artifact and low-coverage sequences (use "prefilter" parameter to toggle this step on or off). Second, performs VSEARCH exact match between query and reference_reads to find exact matches, followed by least common ancestor consensus taxonomy assignment from among maxaccepts top hits, min_consensus of which share that taxonomic assignment. Query sequences without an exact match are then classified with a pre-trained sklearn taxonomy classifier to predict the most likely taxonomic lineage. + + +| + +</help> + <citations> + <citation type="doi">10.1186/s40168-018-0470-z</citation> + <citation type="doi">10.1038/s41587-019-0209-9</citation> + </citations> +</tool>