view qiime2-2020.8/qiime_feature-classifier_classify-hybrid-vsearch-sklearn.xml @ 26:60da5215e182 draft

Uploaded
author florianbegusch
date Fri, 04 Sep 2020 12:58:02 +0000
parents d93d8888f0b0
children
line wrap: on
line source

<?xml version="1.0" ?>
<tool id="qiime_feature-classifier_classify-hybrid-vsearch-sklearn" name="qiime feature-classifier classify-hybrid-vsearch-sklearn"
      version="2020.8">
  <description> ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier</description>
  <requirements>
    <requirement type="package" version="2020.8">qiime2</requirement>
  </requirements>
  <command><![CDATA[
qiime feature-classifier classify-hybrid-vsearch-sklearn

--i-query=$iquery

--i-reference-reads=$ireferencereads

--i-reference-taxonomy=$ireferencetaxonomy

--i-classifier=$iclassifier

#if str($pmaxaccepts) != 'None':
--p-maxaccepts=$pmaxaccepts
#end if

--p-perc-identity=$ppercidentity

--p-query-cov=$pquerycov

#if str($pstrand) != 'None':
--p-strand=$pstrand
#end if

--p-min-consensus=$pminconsensus

#if str($pmaxhits) != 'None':
--p-maxhits=$pmaxhits
#end if

#if str($pmaxrejects) != 'None':
--p-maxrejects=$pmaxrejects
#end if

#if str($pconfidence) != 'None':
--p-confidence=$pconfidence
#end if

#if str($preadorientation) != 'None':
--p-read-orientation=$preadorientation
#end if

--p-threads=$pthreads

#if $pnoprefilter:
 --p-no-prefilter
#end if

--p-sample-size=$psamplesize

--p-randseed=$prandseed

--o-classification=oclassification

#if str($examples) != 'None':
--examples=$examples
#end if

;
cp oclassification.qza $oclassification

  ]]></command>
  <inputs>
    <param format="qza,no_unzip.zip" label="--i-query: ARTIFACT FeatureData[Sequence] Sequences to classify taxonomically.        [required]" name="iquery" optional="False" type="data" />
    <param format="qza,no_unzip.zip" label="--i-reference-reads: ARTIFACT FeatureData[Sequence] reference sequences.                        [required]" name="ireferencereads" optional="False" type="data" />
    <param format="qza,no_unzip.zip" label="--i-reference-taxonomy: ARTIFACT FeatureData[Taxonomy] reference taxonomy labels.                  [required]" name="ireferencetaxonomy" optional="False" type="data" />
    <param format="qza,no_unzip.zip" label="--i-classifier: ARTIFACT TaxonomicClassifier Pre-trained sklearn taxonomic classifier for classifying the reads.                      [required]" name="iclassifier" optional="False" type="data" />
    <param label="--p-maxaccepts: " name="pmaxaccepts" optional="True" type="select">
      <option selected="True" value="None">Selection is Optional</option>
      <option value="Int % Range(1">Int % Range(1</option>
      <option value="None">None</option>
    </param>
    <param exclude_max="False" label="--p-perc-identity: PROPORTION Range(0.0, 1.0, inclusive_end=True) Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.5]" max="1.0" min="0.0" name="ppercidentity" optional="True" type="float" value="0.5" />
    <param exclude_max="False" label="--p-query-cov: PROPORTION Range(0.0, 1.0, inclusive_end=True) Query coverage threshold to use for PREFILTER. Reject match if query alignment coverage per high-scoring pair is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled.                            [default: 0.8]" max="1.0" min="0.0" name="pquerycov" optional="True" type="float" value="0.8" />
    <param label="--p-strand: " name="pstrand" optional="True" type="select">
      <option selected="True" value="None">Selection is Optional</option>
      <option value="both">both</option>
      <option value="plus">plus</option>
    </param>
    <param exclude_max="False" exclude_min="True" label="--p-min-consensus: NUMBER Range(0.5, 1.0, inclusive_start=False, inclusive_end=True) Minimum fraction of assignments must match top hit to be accepted as consensus assignment.   [default: 0.51]" max="1.0" min="0.5" name="pminconsensus" optional="True" type="float" value="0.51" />
    <param label="--p-maxhits: " name="pmaxhits" optional="True" type="select">
      <option selected="True" value="None">Selection is Optional</option>
      <option value="Int % Range(1">Int % Range(1</option>
      <option value="None">None</option>
    </param>
    <param label="--p-maxrejects: " name="pmaxrejects" optional="True" type="select">
      <option selected="True" value="None">Selection is Optional</option>
      <option value="Int % Range(1">Int % Range(1</option>
      <option value="None">None</option>
    </param>
    <param label="--p-confidence: " name="pconfidence" optional="True" type="select">
      <option selected="True" value="None">Selection is Optional</option>
      <option value="Float % Range(0">Float % Range(0</option>
      <option value="1">1</option>
      <option value="inclusive_end=True">inclusive_end=True</option>
    </param>
    <param label="--p-read-orientation: " name="preadorientation" optional="True" type="select">
      <option selected="True" value="None">Selection is Optional</option>
      <option value="same">same</option>
      <option value="reverse-complement">reverse-complement</option>
      <option value="auto">auto</option>
    </param>
    <param label="--p-no-prefilter: Do not toggle positive filter of query sequences on or off. [default: True]" name="pnoprefilter" selected="False" type="boolean" />
    <param label="--p-sample-size: INTEGER Range(1, None)      Randomly extract the given number of sequences from the reference database to use for prefiltering. This parameter is ignored if `prefilter` is disabled. [default: 1000]" min="1" name="psamplesize" optional="True" type="integer" value="1000" />
    <param label="--p-randseed: INTEGER  Use integer as a seed for the pseudo-random generator Range(0, None)      used during prefiltering. A given seed always produces the same output, which is useful for replicability. Set to 0 to use a pseudo-random seed. This parameter is ignored if `prefilter` is disabled.    [default: 0]" min="0" name="prandseed" optional="True" type="integer" value="0" />
    <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
    
  </inputs>

  <outputs>
    <data format="qza" label="${tool.name} on ${on_string}: classification.qza" name="oclassification" />
    
  </outputs>

  <help><![CDATA[
 ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier
###############################################################

NOTE: THIS PIPELINE IS AN ALPHA RELEASE. Please report bugs to
https://forum.qiime2.org! Assign taxonomy to query sequences using hybrid
classifier. First performs rough positive filter to remove artifact and
low-coverage sequences (use "prefilter" parameter to toggle this step on or
off). Second, performs VSEARCH exact match between query and
reference_reads to find exact matches, followed by least common ancestor
consensus taxonomy assignment from among maxaccepts top hits, min_consensus
of which share that taxonomic assignment. Query sequences without an exact
match are then classified with a pre-trained sklearn taxonomy classifier to
predict the most likely taxonomic lineage.

Parameters
----------
query : FeatureData[Sequence]
    Sequences to classify taxonomically.
reference_reads : FeatureData[Sequence]
    reference sequences.
reference_taxonomy : FeatureData[Taxonomy]
    reference taxonomy labels.
classifier : TaxonomicClassifier
    Pre-trained sklearn taxonomic classifier for classifying the reads.
maxaccepts : Int % Range(1, None) | Str % Choices('all'), optional
    Maximum number of hits to keep for each query. Set to "all" to keep all
    hits > perc_identity similarity. Note that if strand=both, maxaccepts
    will keep N hits for each direction (if searches in the opposite
    direction yield results that exceed the minimum perc_identity). In
    those cases use maxhits to control the total number of hits returned.
    This option works in pair with maxrejects. The search process sorts
    target sequences by decreasing number of k-mers they have in common
    with the query sequence, using that information as a proxy for sequence
    similarity. After pairwise alignments, if the first target sequence
    passes the acceptation criteria, it is accepted as best hit and the
    search process stops for that query. If maxaccepts is set to a higher
    value, more hits are accepted. If maxaccepts and maxrejects are both
    set to "all", the complete database is searched.
perc_identity : Float % Range(0.0, 1.0, inclusive_end=True), optional
    Percent sequence similarity to use for PREFILTER. Reject match if
    percent identity to query is lower. Set to a lower value to perform a
    rough pre-filter. This parameter is ignored if `prefilter` is disabled.
query_cov : Float % Range(0.0, 1.0, inclusive_end=True), optional
    Query coverage threshold to use for PREFILTER. Reject match if query
    alignment coverage per high-scoring pair is lower. Set to a lower value
    to perform a rough pre-filter. This parameter is ignored if `prefilter`
    is disabled.
strand : Str % Choices('both', 'plus'), optional
    Align against reference sequences in forward ("plus") or both
    directions ("both").
min_consensus : Float % Range(0.5, 1.0, inclusive_start=False, inclusive_end=True), optional
    Minimum fraction of assignments must match top hit to be accepted as
    consensus assignment.
maxhits : Int % Range(1, None) | Str % Choices('all'), optional
maxrejects : Int % Range(1, None) | Str % Choices('all'), optional
reads_per_batch : Int % Range(0, None), optional
    Number of reads to process in each batch for sklearn classification. If
    "auto", this parameter is autoscaled to min(number of query sequences /
    threads, 20000).
confidence : Float % Range(0, 1, inclusive_end=True) | Str % Choices('disable'), optional
    Confidence threshold for limiting taxonomic depth. Set to "disable" to
    disable confidence calculation, or 0 to calculate confidence but not
    apply it to limit the taxonomic depth of the assignments.
read_orientation : Str % Choices('same', 'reverse-complement', 'auto'), optional
    Direction of reads with respect to reference sequences in pre-trained
    sklearn classifier. same will cause reads to be classified unchanged;
    reverse-complement will cause reads to be reversed and complemented
    prior to classification. "auto" will autodetect orientation based on
    the confidence estimates for the first 100 reads.
threads : Int % Range(1, None), optional
    Number of threads to use for job parallelization.
prefilter : Bool, optional
    Toggle positive filter of query sequences on or off.
sample_size : Int % Range(1, None), optional
    Randomly extract the given number of sequences from the reference
    database to use for prefiltering. This parameter is ignored if
    `prefilter` is disabled.
randseed : Int % Range(0, None), optional
    Use integer as a seed for the pseudo-random generator used during
    prefiltering. A given seed always produces the same output, which is
    useful for replicability. Set to 0 to use a pseudo-random seed. This
    parameter is ignored if `prefilter` is disabled.

Returns
-------
classification : FeatureData[Taxonomy]
    The resulting taxonomy classifications.
  ]]></help>
  <macros>
    <import>qiime_citation.xml</import>
  </macros>
  <expand macro="qiime_citation"/>
</tool>