view affiliation_postprocess.xml @ 3:8edcbafb3b4e draft

"planemo upload for repository https://github.com/geraldinepascal/FROGS-wrappers/ commit 2024a13846ea6f9bd94ae62e3b2a5a3aba8cd304-dirty"
author frogs
date Tue, 24 Aug 2021 08:21:23 +0000
parents 094a2469204d
children 445b04a65ed8
line wrap: on
line source

<?xml version="1.0"?>
<!--
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-->
<tool id="FROGS_affiliation_postprocess" name="FROGS Affiliation postprocess" version="@TOOL_VERSION@+galaxy2">
	<description>Optionnal step to resolve inclusive amplicon ambiguities and to aggregate OTUs based on alignment metrics</description>

    <macros>
        <import>macros.xml</import>
    </macros>

    <expand macro="requirements"/>

  	<stdio>
	    <exit_code range="1:" />
	    <exit_code range=":-1" />
  	</stdio>
	<command>
		affiliation_postprocess.py
		#if $is_HVL.HVL_amplicon
            --reference '$is_HVL.ref_file.fields.path'
        #end if
		--identity $identity
		--coverage $coverage
                --input-biom '$input_biom'
                --input-fasta '$input_fasta'
                --output-biom '$biom_out'
                --output-fasta '$fasta_out'
                --output-compo '$compo_out'
	</command>
	<inputs>
		<!-- Files -->
		<param format="fasta" name="input_fasta" type="data" label="Sequences file" help="The sequence file to filter (format: fasta)." />
		<param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)." />
		<conditional name="is_HVL">
			<param name="HVL_amplicon" type="boolean" label="Is this an amplicon hyper variable in length?" help="Multi-affiliation tag may be resolved by selecting the shortest amplicon reference. For this you need the reference fasta file of your targetted amplicon."/>
      			<when value="true">
        			<param name="ref_file" type="select" label="Using reference database" help="Select reference from the list">
 					<options from_data_table="frogs_HVL_db"></options>
	        			<validator type="no_options" message="A built-in database is not available" />
    					<!--column name="name" index="0"/>
    					<column name="value" index="1"/-->
				</param>
      			</when>
      			<when value="false"/>
    		</conditional>

 		<param name="identity" type="float" min="0.0" max="100.0" value="99.0" label="minimum identity for aggregation" help="OTUs will be aggregated if they share the same taxonomy with at least X% identity." />
 		<param name="coverage" type="float" min="0.0" max="100.0" value="99.0" label="minimum coverage for aggregation" help="OTUs will be aggregated if they share the same taxonomy with at least X% alignment coverage." />
	</inputs>
	<outputs>
		<data format="biom1" name="biom_out" label="${tool.name}: affiliation_abundance.biom" from_work_dir="affiliation.biom" />
		<data format="fasta" name="fasta_out" label="${tool.name}: sequences.fasta" from_work_dir="sequences.fasta" />
		<data format="tabular" name="compo_out" label="${tool.name}: OTU_aggregation_composition.txt" from_work_dir="aggregation_composition.txt" />
	</outputs>
 	<tests>
	    <test>
			<param name="input_biom" value="references/06-affiliation.biom"/>
			<param name="input_fasta" value="references/04-filters.fasta"/>
			<conditional name="is_HVL">
				<param name="HVL_amplicon" value="true"/>
				<param name="ref_file" value="Unite_extract_ITS1_test"/>
			</conditional>
			<output name="compo_out" file="references/08-affiliation_postprocessed.compo.tsv" compare="diff" lines_diff="0" />
			<output name="fasta_out" file="references/08-affiliation_postprocessed.fasta" compare="diff" lines_diff="0" />
			<output name="biom_out" file="references/08-affiliation_postprocessed.biom" compare="sim_size" delta="0" />
	    </test>
	</tests>
	<help>

@HELP_LOGO@

.. class:: infomark page-header h2

What it does

Resolves multi-hit ambiguities if exact amplicon length are available and aggregrated OTUs sharing same taxonomy based on alignment metrics thresholds


.. class:: infomark page-header h2

Inputs/outputs

.. class:: h3

Inputs

**Abundance file**:

The abundance of each OTU in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_) with taxonomic affiliations metadata.

**Sequence file**:

The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_) of each OTU seed.

**Reference file** (optionnal):

The exact amplicon reference sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).


.. class:: h3

Outputs

**Abundance file**:

 The abundance file of OTUs and aggregated OTUs, with their affiliation (format `BIOM &lt;http://biom-format.org/&gt;`_) and with potentially less ambiguities.

**Sequence file**:

The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_) of each aggregated OTU seed.

**Composition file**:

The aggregation composition file (format text) describing the composition of each resulting OTU.

.. class:: infomark page-header h2

How it works

If a reference fasta file is provided, for each OTU with multiaffiliation, among the different possible affiliations, we only keep the affiliation of the sequence with the shorter length. The aim is to resolve ambiguities due to potential inclusive sequences such as ITS.

Second step is the OTUs aggregation that share the same taxonomy inferred on alignment metrics.
We start with the most abundant OTU. If an OTU shares at least one affiliation with another OTU with at least I% of identity and C% of alignment coverage, the OTUs are aggregated together : the different affiliations, are merged, blast concensus taxonomy is updated and abundance counts are summed. The seed of the most abundant OTU is kept.


@HELP_CONTACT@

	</help>

	<citations>
		<expand macro="citations" />
	</citations>
	
</tool>