Mercurial > repos > frogs > frogs

<?xml version="1.0"?>
<!--
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-->
<tool id="FROGS_remove_chimera" name="FROGS Remove chimera" version="@TOOL_VERSION@+galaxy2">
	<description>Remove PCR chimera in each sample.</description>

    <macros>
        <import>macros.xml</import>
    </macros>

        <expand macro="requirements" >
        	<requirement type="package" version="2.17.0">vsearch</requirement>
        </expand>

        <stdio>
                <exit_code range="1:" />
                <exit_code range=":-1" />
        </stdio>
	<command>
		remove_chimera.py
		                --nb-cpus \${GALAXY_SLOTS:-1}
		                --input-fasta '$sequence_file' --non-chimera '$non_chimera_fasta'
		                --summary '$summary_file'
		                #if $abundance_type.abundance_type_selected == "biom"
		                --input-biom '$abundance_biom'
		                --out-abundance '$out_abundance_biom'
		                #else
		                --input-count '$abundance_count'
		                --out-abundance '$out_abundance_count'
		                #end if
	</command>
	<inputs>
		<!-- Files -->
		<param format="fasta" name="sequence_file" type="data" label="Sequences file" help="The sequences file (format: fasta)." optional="false" />
		<conditional name="abundance_type">
			<param name="abundance_type_selected" type="select" label="Abundance type" help="Select the type of file where the abundance of each sequence by sample is stored.">
				<option value="biom" selected="true">BIOM file</option>
				<option value="count">TSV file</option>
			</param>
			<when value="biom">
				<param format="biom1" name="abundance_biom" type="data" label="Abundance file" help="It contains the count by sample for each sequence." optional="false" />
			</when>
			<when value="count">
				<param format="tabular" name="abundance_count" type="data" label="Count file" help="It contains the count by sample for each sequence (see below)." optional="false" />
			</when>
		</conditional>
	</inputs>
	<outputs>
		<data format="fasta" name="non_chimera_fasta" label="${tool.name}: non_chimera.fasta" from_work_dir="non_chimera.fasta"/>
		<data format="biom1" name="out_abundance_biom" label="${tool.name}: non_chimera_abundance.biom" from_work_dir="non_chimera_abundance.biom">
			<filter>abundance_type['abundance_type_selected'] == "biom"</filter>
		</data>
		<data format="tabular" name="out_abundance_count" label="${tool.name}: non_chimera_abundance.tsv" from_work_dir="non_chimera_abundance.tsv">
			<filter>abundance_type['abundance_type_selected'] == "count"</filter>
		</data>
		<data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="report.html"/>
	</outputs>
	<tests>
		<test>
			<param name="sequence_file" value="references/02-clustering_fastidious.fasta"/>
			<conditional name="abundance_type">
				<param name="abundance_type_selected" value="biom"/>
				<param name="abundance_biom" value="references/02-clustering_fastidious.biom" />
			</conditional>
			<output name="non_chimera_fasta" file="references/03-chimera.fasta" compare="diff" lines_diff="0"/>
			<output name="summary_file" file="references/03-chimera.html" compare="diff" lines_diff="0"/>
			<output name="out_abundance_biom" file="references/03-chimera.biom" compare="sim_size" delta="0"/>
		</test>
	</tests>
	<help>

@HELP_LOGO@

.. class:: infomark page-header h2

What it does

Remove chimeric sequences by sample.


.. class:: infomark page-header h2

Context

Chimeras are sequences formed from two or more biological sequences joined together.

The majority of these anomalous sequences are formed from an incomplete extension during a PCR cycle. During subsequent cycles, a partially extended strand can bind to a template derived from a different but similar sequence.

This phenomena is particularly common in amplicon sequencing where closely related sequences are amplified.


.. class:: infomark page-header h2

Inputs/Outputs

.. class:: h3

Inputs

**Sequence file**:

The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).

**Abundance file**:

 The abundance of each cluster in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).

OR

 The abundance of each sequence in each sample (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_). This type of file is produced by *FROGS pre-process*.

 Example:

.. csv-table::
   :header: "#id", "splA","splB"
   :widths: 10,10,10
   :class: table table-striped

    "seq1", "1289", "2901"
    "seq2", "3415", "0"

.. class:: h3

Outputs

**Sequence file** (non_chimera.fasta):

 The sequence file with only non-chimera (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).

**Abundance file** (non_chimera.biom or non_chimera.tsv):

 The abundance file with only non-chimera (format the same of the abundance input).

**Summary file** (report.html):

 This file presents the number of removed elements (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).


.. class:: infomark page-header h2

How it works

.. csv-table::
   :header: "Steps", "Description"
   :widths: 10, 90
   :class: table table-striped

   "1", "Split input data by sample (classicaly the PCR is realised by sample)."
   "2", "Find chimera in each sample (`vsearch &lt;https://github.com/torognes/vsearch&gt;`_)."
   "3", "Remove the sequences identify as chimera in all samples where they are present."


@HELP_CONTACT@


	</help>

	<citations>
		<expand macro="citations" />
	</citations>

</tool>
author	frogs
date	Tue, 24 Aug 2021 08:21:23 +0000
parents	094a2469204d
children	445b04a65ed8