Mercurial > repos > florianbegusch > qiime2_suite
view qiime2-2020.8/qiime_feature-classifier_classify-hybrid-vsearch-sklearn.xml @ 26:60da5215e182 draft
Uploaded
author | florianbegusch |
---|---|
date | Fri, 04 Sep 2020 12:58:02 +0000 |
parents | d93d8888f0b0 |
children |
line wrap: on
line source
<?xml version="1.0" ?> <tool id="qiime_feature-classifier_classify-hybrid-vsearch-sklearn" name="qiime feature-classifier classify-hybrid-vsearch-sklearn" version="2020.8"> <description> ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier</description> <requirements> <requirement type="package" version="2020.8">qiime2</requirement> </requirements> <command><![CDATA[ qiime feature-classifier classify-hybrid-vsearch-sklearn --i-query=$iquery --i-reference-reads=$ireferencereads --i-reference-taxonomy=$ireferencetaxonomy --i-classifier=$iclassifier #if str($pmaxaccepts) != 'None': --p-maxaccepts=$pmaxaccepts #end if --p-perc-identity=$ppercidentity --p-query-cov=$pquerycov #if str($pstrand) != 'None': --p-strand=$pstrand #end if --p-min-consensus=$pminconsensus #if str($pmaxhits) != 'None': --p-maxhits=$pmaxhits #end if #if str($pmaxrejects) != 'None': --p-maxrejects=$pmaxrejects #end if #if str($pconfidence) != 'None': --p-confidence=$pconfidence #end if #if str($preadorientation) != 'None': --p-read-orientation=$preadorientation #end if --p-threads=$pthreads #if $pnoprefilter: --p-no-prefilter #end if --p-sample-size=$psamplesize --p-randseed=$prandseed --o-classification=oclassification #if str($examples) != 'None': --examples=$examples #end if ; cp oclassification.qza $oclassification ]]></command> <inputs> <param format="qza,no_unzip.zip" label="--i-query: ARTIFACT FeatureData[Sequence] Sequences to classify taxonomically. [required]" name="iquery" optional="False" type="data" /> <param format="qza,no_unzip.zip" label="--i-reference-reads: ARTIFACT FeatureData[Sequence] reference sequences. [required]" name="ireferencereads" optional="False" type="data" /> <param format="qza,no_unzip.zip" label="--i-reference-taxonomy: ARTIFACT FeatureData[Taxonomy] reference taxonomy labels. [required]" name="ireferencetaxonomy" optional="False" type="data" /> <param format="qza,no_unzip.zip" label="--i-classifier: ARTIFACT TaxonomicClassifier Pre-trained sklearn taxonomic classifier for classifying the reads. [required]" name="iclassifier" optional="False" type="data" /> <param label="--p-maxaccepts: " name="pmaxaccepts" optional="True" type="select"> <option selected="True" value="None">Selection is Optional</option> <option value="Int % Range(1">Int % Range(1</option> <option value="None">None</option> </param> <param exclude_max="False" label="--p-perc-identity: PROPORTION Range(0.0, 1.0, inclusive_end=True) Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.5]" max="1.0" min="0.0" name="ppercidentity" optional="True" type="float" value="0.5" /> <param exclude_max="False" label="--p-query-cov: PROPORTION Range(0.0, 1.0, inclusive_end=True) Query coverage threshold to use for PREFILTER. Reject match if query alignment coverage per high-scoring pair is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.8]" max="1.0" min="0.0" name="pquerycov" optional="True" type="float" value="0.8" /> <param label="--p-strand: " name="pstrand" optional="True" type="select"> <option selected="True" value="None">Selection is Optional</option> <option value="both">both</option> <option value="plus">plus</option> </param> <param exclude_max="False" exclude_min="True" label="--p-min-consensus: NUMBER Range(0.5, 1.0, inclusive_start=False, inclusive_end=True) Minimum fraction of assignments must match top hit to be accepted as consensus assignment. [default: 0.51]" max="1.0" min="0.5" name="pminconsensus" optional="True" type="float" value="0.51" /> <param label="--p-maxhits: " name="pmaxhits" optional="True" type="select"> <option selected="True" value="None">Selection is Optional</option> <option value="Int % Range(1">Int % Range(1</option> <option value="None">None</option> </param> <param label="--p-maxrejects: " name="pmaxrejects" optional="True" type="select"> <option selected="True" value="None">Selection is Optional</option> <option value="Int % Range(1">Int % Range(1</option> <option value="None">None</option> </param> <param label="--p-confidence: " name="pconfidence" optional="True" type="select"> <option selected="True" value="None">Selection is Optional</option> <option value="Float % Range(0">Float % Range(0</option> <option value="1">1</option> <option value="inclusive_end=True">inclusive_end=True</option> </param> <param label="--p-read-orientation: " name="preadorientation" optional="True" type="select"> <option selected="True" value="None">Selection is Optional</option> <option value="same">same</option> <option value="reverse-complement">reverse-complement</option> <option value="auto">auto</option> </param> <param label="--p-no-prefilter: Do not toggle positive filter of query sequences on or off. [default: True]" name="pnoprefilter" selected="False" type="boolean" /> <param label="--p-sample-size: INTEGER Range(1, None) Randomly extract the given number of sequences from the reference database to use for prefiltering. This parameter is ignored if `prefilter` is disabled. [default: 1000]" min="1" name="psamplesize" optional="True" type="integer" value="1000" /> <param label="--p-randseed: INTEGER Use integer as a seed for the pseudo-random generator Range(0, None) used during prefiltering. A given seed always produces the same output, which is useful for replicability. Set to 0 to use a pseudo-random seed. This parameter is ignored if `prefilter` is disabled. [default: 0]" min="0" name="prandseed" optional="True" type="integer" value="0" /> <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" /> </inputs> <outputs> <data format="qza" label="${tool.name} on ${on_string}: classification.qza" name="oclassification" /> </outputs> <help><![CDATA[ ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier ############################################################### NOTE: THIS PIPELINE IS AN ALPHA RELEASE. Please report bugs to https://forum.qiime2.org! Assign taxonomy to query sequences using hybrid classifier. First performs rough positive filter to remove artifact and low-coverage sequences (use "prefilter" parameter to toggle this step on or off). Second, performs VSEARCH exact match between query and reference_reads to find exact matches, followed by least common ancestor consensus taxonomy assignment from among maxaccepts top hits, min_consensus of which share that taxonomic assignment. Query sequences without an exact match are then classified with a pre-trained sklearn taxonomy classifier to predict the most likely taxonomic lineage. Parameters ---------- query : FeatureData[Sequence] Sequences to classify taxonomically. reference_reads : FeatureData[Sequence] reference sequences. reference_taxonomy : FeatureData[Taxonomy] reference taxonomy labels. classifier : TaxonomicClassifier Pre-trained sklearn taxonomic classifier for classifying the reads. maxaccepts : Int % Range(1, None) | Str % Choices('all'), optional Maximum number of hits to keep for each query. Set to "all" to keep all hits > perc_identity similarity. Note that if strand=both, maxaccepts will keep N hits for each direction (if searches in the opposite direction yield results that exceed the minimum perc_identity). In those cases use maxhits to control the total number of hits returned. This option works in pair with maxrejects. The search process sorts target sequences by decreasing number of k-mers they have in common with the query sequence, using that information as a proxy for sequence similarity. After pairwise alignments, if the first target sequence passes the acceptation criteria, it is accepted as best hit and the search process stops for that query. If maxaccepts is set to a higher value, more hits are accepted. If maxaccepts and maxrejects are both set to "all", the complete database is searched. perc_identity : Float % Range(0.0, 1.0, inclusive_end=True), optional Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. query_cov : Float % Range(0.0, 1.0, inclusive_end=True), optional Query coverage threshold to use for PREFILTER. Reject match if query alignment coverage per high-scoring pair is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. strand : Str % Choices('both', 'plus'), optional Align against reference sequences in forward ("plus") or both directions ("both"). min_consensus : Float % Range(0.5, 1.0, inclusive_start=False, inclusive_end=True), optional Minimum fraction of assignments must match top hit to be accepted as consensus assignment. maxhits : Int % Range(1, None) | Str % Choices('all'), optional maxrejects : Int % Range(1, None) | Str % Choices('all'), optional reads_per_batch : Int % Range(0, None), optional Number of reads to process in each batch for sklearn classification. If "auto", this parameter is autoscaled to min(number of query sequences / threads, 20000). confidence : Float % Range(0, 1, inclusive_end=True) | Str % Choices('disable'), optional Confidence threshold for limiting taxonomic depth. Set to "disable" to disable confidence calculation, or 0 to calculate confidence but not apply it to limit the taxonomic depth of the assignments. read_orientation : Str % Choices('same', 'reverse-complement', 'auto'), optional Direction of reads with respect to reference sequences in pre-trained sklearn classifier. same will cause reads to be classified unchanged; reverse-complement will cause reads to be reversed and complemented prior to classification. "auto" will autodetect orientation based on the confidence estimates for the first 100 reads. threads : Int % Range(1, None), optional Number of threads to use for job parallelization. prefilter : Bool, optional Toggle positive filter of query sequences on or off. sample_size : Int % Range(1, None), optional Randomly extract the given number of sequences from the reference database to use for prefiltering. This parameter is ignored if `prefilter` is disabled. randseed : Int % Range(0, None), optional Use integer as a seed for the pseudo-random generator used during prefiltering. A given seed always produces the same output, which is useful for replicability. Set to 0 to use a pseudo-random seed. This parameter is ignored if `prefilter` is disabled. Returns ------- classification : FeatureData[Taxonomy] The resulting taxonomy classifications. ]]></help> <macros> <import>qiime_citation.xml</import> </macros> <expand macro="qiime_citation"/> </tool>