view qiime2__dada2__denoise_ccs.xml @ 0:8860fd51e176 draft

planemo upload for repository https://github.com/qiime2/galaxy-tools/tree/main/tools/suite_qiime2__dada2 commit 9023cfd83495a517fbcbb6f91d5b01a6f1afcda1
author q2d2
date Mon, 29 Aug 2022 19:24:12 +0000
parents
children 9977f9d23166
line wrap: on
line source

<?xml version='1.0' encoding='utf-8'?>
<!--
Copyright (c) 2022, QIIME 2 development team.

Distributed under the terms of the Modified BSD License. (SPDX: BSD-3-Clause)
-->
<!--
This tool was automatically generated by:
    q2galaxy (version: 2022.8.1)
for:
    qiime2 (version: 2022.8.1)
-->
<tool name="qiime2 dada2 denoise-ccs" id="qiime2__dada2__denoise_ccs" version="2022.8.0+q2galaxy.2022.8.1.2" profile="22.05" license="BSD-3-Clause">
    <description>Denoise and dereplicate single-end Pacbio CCS</description>
    <requirements>
        <container type="docker">quay.io/qiime2/core:2022.8</container>
    </requirements>
    <version_command>q2galaxy version dada2</version_command>
    <command detect_errors="aggressive">q2galaxy run dada2 denoise_ccs '$inputs'</command>
    <configfiles>
        <inputs name="inputs" data_style="paths"/>
    </configfiles>
    <inputs>
        <param name="demultiplexed_seqs" type="data" format="qza" label="demultiplexed_seqs: SampleData[SequencesWithQuality]" help="[required]  The single-end demultiplexed PacBio CCS sequences to be denoised.">
            <options options_filter_attribute="metadata.semantic_type">
                <filter type="add_value" value="SampleData[SequencesWithQuality]"/>
            </options>
            <validator type="expression" message="Incompatible type">hasattr(value.metadata, "semantic_type") and value.metadata.semantic_type in ['SampleData[SequencesWithQuality]']</validator>
        </param>
        <param name="front" type="text" label="front: Str" help="[required]  Sequence of an adapter ligated to the 5' end. The adapter and any preceding bases are trimmed. Can contain IUPAC ambiguous nucleotide codes. Note, primer direction is 5' to 3'. Primers are removed before trim and filter step. Reads that do not contain the primer are discarded. Each read is re-oriented if the reverse complement of the read is a better match to the provided primer sequence. This is recommended for PacBio CCS reads, which come in a random mix of forward and reverse-complement orientations.">
            <sanitizer>
                <valid initial="string.printable"/>
            </sanitizer>
            <validator type="expression" message="Please verify this parameter.">value is not None and len(value) &gt; 0</validator>
        </param>
        <param name="adapter" type="text" label="adapter: Str" help="[required]  Sequence of an adapter ligated to the 3' end. The adapter and any preceding bases are trimmed. Can contain IUPAC ambiguous nucleotide codes. Note, primer direction is 5' to 3'. Primers are removed before trim and filter step. Reads that do not contain the primer are discarded.">
            <sanitizer>
                <valid initial="string.printable"/>
            </sanitizer>
            <validator type="expression" message="Please verify this parameter.">value is not None and len(value) &gt; 0</validator>
        </param>
        <section name="__q2galaxy__GUI__section__extra_opts__" title="Click here for additional options">
            <param name="max_mismatch" type="integer" value="2" label="max_mismatch: Int" help="[default: 2]  The number of mismatches to tolerate when matching reads to primer sequences - see http://benjjneb.github.io/dada2/ for complete details."/>
            <param name="indels" type="boolean" truevalue="__q2galaxy__::literal::True" falsevalue="__q2galaxy__::literal::False" label="indels: Bool" help="[default: No]  Allow insertions or deletions of bases when matching adapters. Note that primer matching can be significantly slower, currently about 4x slower"/>
            <param name="trunc_len" type="integer" value="0" label="trunc_len: Int" help="[default: 0]  Position at which sequences should be truncated due to decrease in quality. This truncates the 3' end of the of the input sequences, which will be the bases that were sequenced in the last cycles. Reads that are shorter than this value will be discarded. If 0 is provided, no truncation or length filtering will be performed. Note: Since Pacbio CCS sequences were normally with very high quality scores, there is no need to truncate the Pacbio CCS sequences."/>
            <param name="trim_left" type="integer" value="0" label="trim_left: Int" help="[default: 0]  Position at which sequences should be trimmed due to low quality. This trims the 5' end of the of the input sequences, which will be the bases that were sequenced in the first cycles."/>
            <param name="max_ee" type="float" value="2.0" label="max_ee: Float" help="[default: 2.0]  Reads with number of expected errors higher than this value will be discarded."/>
            <param name="trunc_q" type="integer" value="2" label="trunc_q: Int" help="[default: 2]  Reads are truncated at the first instance of a quality score less than or equal to this value. If the resulting read is then shorter than `trunc_len`, it is discarded."/>
            <param name="min_len" type="integer" value="20" label="min_len: Int" help="[default: 20]  Remove reads with length less than minLen. minLen is enforced after trimming and truncation. For 16S Pacbio CCS, suggest 1000."/>
            <param name="max_len" type="integer" value="0" label="max_len: Int" help="[default: 0]  Remove reads prior to trimming or truncation which are longer than this value. If 0 is provided no reads will be removed based on length. For 16S Pacbio CCS, suggest 1600."/>
            <param name="pooling_method" type="select" label="pooling_method: Str % Choices('independent', 'pseudo')" display="radio">
                <option value="independent" selected="true">independent</option>
                <option value="pseudo">pseudo</option>
            </param>
            <param name="chimera_method" type="select" label="chimera_method: Str % Choices('consensus', 'none', 'pooled')" display="radio">
                <option value="consensus" selected="true">consensus</option>
                <option value="none">none</option>
                <option value="pooled">pooled</option>
            </param>
            <param name="min_fold_parent_over_abundance" type="float" value="3.5" label="min_fold_parent_over_abundance: Float" help="[default: 3.5]  The minimum abundance of potential parents of a sequence being tested as chimeric, expressed as a fold-change versus the abundance of the sequence being tested. Values should be greater than or equal to 1 (i.e. parents should be more abundant than the sequence being tested). Suggest 3.5. This parameter has no effect if chimera_method is &quot;none&quot;."/>
            <param name="allow_one_off" type="boolean" truevalue="__q2galaxy__::literal::True" falsevalue="__q2galaxy__::literal::False" label="allow_one_off: Bool" help="[default: No]  Bimeras that are one-off from exact are also identified if the `allow_one_off` argument is True. If True, a sequence will be identified as bimera if it is one mismatch or indel away from an exact bimera."/>
            <param name="n_threads" type="integer" value="1" label="n_threads: Int" help="[default: 1]  The number of threads to use for multithreaded processing. If 0 is provided, all available cores will be used."/>
            <param name="n_reads_learn" type="integer" value="1000000" label="n_reads_learn: Int" help="[default: 1000000]  The number of reads to use when training the error model. Smaller numbers will result in a shorter run time but a less reliable error model."/>
            <param name="hashed_feature_ids" type="boolean" truevalue="__q2galaxy__::literal::True" falsevalue="__q2galaxy__::literal::False" checked="true" label="hashed_feature_ids: Bool" help="[default: Yes]  If true, the feature ids in the resulting table will be presented as hashes of the sequences defining each feature. The hash will always be the same for the same sequence so this allows feature tables to be merged across runs of this method. You should only merge tables if the exact same parameters are used for each run."/>
        </section>
    </inputs>
    <outputs>
        <data name="table" format="qza" label="${tool.name} on ${on_string}: table.qza" from_work_dir="table.qza"/>
        <data name="representative_sequences" format="qza" label="${tool.name} on ${on_string}: representative_sequences.qza" from_work_dir="representative_sequences.qza"/>
        <data name="denoising_stats" format="qza" label="${tool.name} on ${on_string}: denoising_stats.qza" from_work_dir="denoising_stats.qza"/>
    </outputs>
    <tests/>
    <help>
QIIME 2: dada2 denoise-ccs
==========================
Denoise and dereplicate single-end Pacbio CCS


Outputs:
--------
:table.qza: The resulting feature table.
:representative_sequences.qza: The resulting feature sequences. Each feature in the feature table will be represented by exactly one sequence.
:denoising_stats.qza: &lt;no description&gt;

|  

Description:
------------
This method denoises single-end Pacbio CCS sequences, dereplicates them, and filters chimeras. Tutorial and workflow: https://github.com/benjjneb/LRASManuscript


|  

</help>
    <citations>
        <citation type="doi">10.1038/nmeth.3869</citation>
        <citation type="doi">10.1038/s41587-019-0209-9</citation>
    </citations>
</tool>