view affiliation_filters.xml @ 3:8edcbafb3b4e draft

"planemo upload for repository https://github.com/geraldinepascal/FROGS-wrappers/ commit 2024a13846ea6f9bd94ae62e3b2a5a3aba8cd304-dirty"
author frogs
date Tue, 24 Aug 2021 08:21:23 +0000
parents 094a2469204d
children 445b04a65ed8
line wrap: on
line source

<?xml version="1.0"?>
<!--
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-->
<tool id="FROGS_affiliation_filters" name="FROGS Affiliation Filters" version="@TOOL_VERSION@+galaxy2">
	<description>Filters OTUs on several affiliation criteria.</description>

    <macros>
        <import>macros.xml</import>
    </macros>

    <expand macro="requirements"/>

        <stdio>
                <exit_code range="1:" />
                <exit_code range=":-1" />
        </stdio>

	<command>

		affiliation_filters.py
			--input-biom '$input_biom'
			--input-fasta '$input_fasta'
			--output-biom '$output_biom'
			--impacted '$output_impacted'
			--impacted-multihit '$output_multihit'
			--summary '$output_summary'

			#if $conditional_mode.mode == "delete"
				--delete
				--output-fasta '$output_fasta'
			#else
				--mask
			#end if

			--taxonomic-ranks $taxonomic_ranks

			#if '$rdp.rdp_rank'
				--min-rdp-bootstrap '$rdp.rdp_rank:$rdp.rdp_bootstrap'
			#end if

			#if $blast.min_blast_length
				--min-blast-length $blast.min_blast_length
			#end if
			#if $blast.max_blast_evalue != None
				#set $eval = 'a' + str($blast.max_blast_evalue)
				#if $eval != 'a'
					--max-blast-evalue $blast.max_blast_evalue
		 		#end if
			#end if
			#if $blast.min_blast_identity
				--min-blast-identity $blast.min_blast_identity
			#end if
			#if $blast.min_blast_coverage
				--min-blast-coverage $blast.min_blast_coverage
			#end if

			#set $sep = ' '
            #if $blast.taxon_ignored
            	--ignore-blast-taxa
            	#for $current in $blast.taxon_ignored
                	$sep'${current.name}'
            	#end for
            #end if

	</command>
	<inputs>
		<!-- Files -->
		<param format="fasta" name="input_fasta" type="data" label="Sequences file" help="The sequence file to filter (format: fasta)." />
		<param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)." />
		<param name="taxonomic_ranks" type="text" label="Taxonomic ranks" help="The ordered taxonomic ranks levels stored in BIOM. Each rank is separated by one space." optional="false" value="Domain Phylum Class Order Family Genus Species" size="80" />
		<!-- conditional is used for optional output fasta -->
		<conditional name="conditional_mode">
			<param name="mode" type="select" label="Filtering mode" multiple="false" help="Do you want to delete OTU or hide affiliations" optional="false" display="radio">
				<option value="hide" selected="true">Hidding mode</option>
				<option value="delete">Deleting mode</option>
			</param>
			<when value="hide"/>
			<when value="delete"/>
		</conditional>

		<section name="blast" title="Filter on Blast affiliations" expanded="true" >
			<param name="max_blast_evalue" type="float" min="0.0" max="1.0" optional="true" label="Maximum e-value (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" />
			<param name="min_blast_identity" type="float" min="0.0" max="1.0" optional="true" label="Minimum identity % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" />
			<param name="min_blast_coverage" type="float" min="0.0" max="1.0" optional="true" label="Minimum coverage % (between 0 and 1)" size="5" help="Fill the field only if you want this treatment" />
			<param name="min_blast_length" type="integer" optional="true" label="Minimum alignment length" size="5" help="Fill the field only if you want this treatment" />

			<repeat name="taxon_ignored" title="Filter blast affiliations including these taxon / word">
				<param name="name" type="text" label="Full or partial taxon name" help='ex: "unknown species" or "subsp."' optional="true"/>
			</repeat>
		</section>

		<section name="rdp" title="Filter on RDP affiliations" expanded="false">
			<param name="rdp_rank" type="text" label="Taxonomical rank on which to apply bootstrap filter" optional="true" help="One of the available taxonomical rank name. Ex: Species"/>
			<param name="rdp_bootstrap" type="float" min="0.0" max="1.0" label="Minimum bootstrap % (between 0 and 1)" size="5" optional="true" help="Fill these two fields if you want this treatment."/>
		</section>

	</inputs>
	<outputs>
		<data format="biom1" name="output_biom" label="${tool.name}: affiFilter_abundance.biom" from_work_dir="affiFilter_abundance.biom" />
		<data format="fasta" name="output_fasta" label="${tool.name}: affiFilter_sequences.fasta" from_work_dir="affiFilter_sequences.fasta">
			<filter> conditional_mode['mode'] == 'delete'</filter>
		</data>
		<data format="tabular" name="output_impacted" label="${tool.name}: impacted_OTU.tsv" from_work_dir="impacted.tsv" />
		<data format="tabular" name="output_multihit" label="${tool.name}: impacted_OTU.multi-affiliations.tsv" from_work_dir="impacted.multihit.tsv" />
		<data format="html" name="output_summary" label="${tool.name}: report.html" from_work_dir="report.html" />
	</outputs>
	<tests>
		<test>
			<param name="input_fasta" value="references/04-filters.fasta" />
            <param name="input_biom" value="references/06-affiliation.biom" />
		    <conditional name="conditional_mode">
				<param name="mode" value="hide"/>
		    </conditional>
		    <param name="rdp_rank" value="Species"/>
		    <param name="rdp_bootstrap" value="0.8"/>
		    <param name="min_blast_length" value="150"/>
		    <param name="max_blast_evalue" value="1e-150"/>
		    <param name="min_blast_identity" value="1"/>
		    <param name="min_blast_coverage" value="1"/>
		    <repeat name="taxon_ignored">
		    	<param name="name" value="g__Sarcodon"/>
		    </repeat>
			<repeat name="taxon_ignored">
		    	<param name="name" value="s__Trichoderma"/>
		    </repeat>
	 	    <output name="output_impacted" file="references/07-impacted_OTU_masked.tsv" compare="diff" lines_diff="0"/>
	 	    <output name="output_summary" file="references/07-affiliation_masked.html" compare="diff" lines_diff="0"/>
		    <output name="output_multihit" file="references/07-impacted_OTU_masked_multihit.tsv" compare="diff" lines_diff="0"/>
		    <output name="output_biom" file="references/07-affiliation_masked.biom" compare="sim_size" delta="0"/>
		</test>
		<test>
			<param name="input_fasta" value="references/04-filters.fasta" />
            <param name="input_biom" value="references/06-affiliation.biom" />
		    <conditional name="conditional_mode">
				<param name="mode" value="delete"/>
		    </conditional>
		    <param name="rdp_rank" value="Species"/>
		    <param name="rdp_bootstrap" value="0.8"/>
		    <param name="min_blast_length" value="150"/>
		    <param name="max_blast_evalue" value="1e-150"/>
		    <param name="min_blast_identity" value="1"/>
		    <param name="min_blast_coverage" value="1"/>
		    <repeat name="taxon_ignored">
		    	<param name="name" value="g__Sarcodon"/>
		    </repeat>
			<repeat name="taxon_ignored">
		    	<param name="name" value="s__Trichoderma"/>
		    </repeat>
	 	    <output name="output_impacted" file="references/07-impacted_OTU_deleted.tsv" compare="diff" lines_diff="0"/>
	 	    <output name="output_summary" file="references/07-affiliation_deleted.html" compare="diff" lines_diff="0"/>
		    <output name="output_multihit" file="references/07-impacted_OTU_deleted_multihit.tsv" compare="diff" lines_diff="0"/>
		    <output name="output_biom" file="references/07-affiliation_deleted.biom" compare="sim_size" delta="0"/>
		    <output name="output_fasta" file="references/07-affiliation_deleted.fasta" compare="diff" lines_diff="0" />
		</test>
	</tests>
	<help>

@HELP_LOGO@

.. class:: infomark page-header h2

What it does

Remove OTUs or hide taxonomical metadata according to one (or more criteria):

 - for RDP taxonomy : a minimal bootstrap threshold at a specific rank

 - for Blast taxonomy : a minimal identity rate, coverage rate, or alignment length, or a maximal evalue, or the absence of full or partial taxon name.


.. class:: infomark page-header h2

Inputs/outputs


.. class:: h3

Inputs

**Abundance file**:

The abundance of each OTU with taxonomical metadata (format `BIOM &lt;http://biom-format.org/&gt;`_).

**Sequence file** (optional):

The OTUs seed sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_). This parameter is mandatory in case of using deleting mode.


.. class:: h3

Outputs

**Abundance file** (abundance.biom):

 The abundance and updated taxonomical metadata after filtering (format `BIOM &lt;http://biom-format.org/&gt;`_).

**Sequence file** (sequences.fasta):

 The updated sequences after filtering (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_), in case of using deleting mode.

**Impacted abundance tabular file** (impacted.tsv):

 The list of the OTUs deleted/hidden or with updated blast affiliation (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).

**Impacted multihit tabular file** (impacted.multihit.tsv):

 The list of blast affiliations for multi-affiliated impacted OTU (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).

**Summary file** (report.html):

 A summary report HTML of impacted OTUs, taxonomies lost and details by samples (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).

.. class:: infomark page-header h2

How it works

.. csv-table::
   :header: "Steps", "description"
   :widths: 10, 150
   :class: table table-striped

   "1", "Compare all affiliations (RDP and all Blast hits) to each filtering criteria."
   "2", "Update concensus Blast taxonomy if at least one blast criteria has an impact. When only part of the Blast affiliations are removed, the concensus Blast taxonomy is updated, and if it changes, the OTU is considered as *Modified*."
   "3", "**In deleting mode**, OTU is removed either if the RDP bootstrap criteria is not respected or if none of the blast affiliation respect all the criteria . **In hidding mode**, RDP affiliation is hidden (removed) if the RDP bootstrap criteria is not respected, and independantly, blast affiliations are hidden (removed) if none of the blast affiliation respect all the criteria."
   "4", "Write valid OTU into the filtered.biom file with potential Blast affiliation updated and impacting criteria annotations.All impacted/modified OTU with their original metadata are reported in the impacted.tsv file with its associated impacted.multihit.tsv file. Impacting status (OTU_deleted / Affiliation_masked / Blast_taxonomy_changed) and the list of impacting criteria are also reported in the impacted.tsv file."


.. class:: infomark page-header h2

Advices

Imagine that you want to filter out affiliations with:

- RDP bootstrap at the Species level less than 0.8
- Blast %identity and %coverage less than 100
- Blast taxonomies with "unknown species"

What will append in these different cases in deleting or hidding mode:

- Cluster 1:

.. image:: static/images/FROGS_affiliation_filter1.png

The RDP taxonomy does not respect the RDP boostrap threshold, but all blast affiliation criteria are respected.

In deleting mode, Cluster_1 will be removed.
In the summary.html, it will be considered as "Removed", and if the RDP taxonomy and/or the Blast taxonomy is/are not kept thanks to an other OTU, the RDP/Blast taxonomy(ies) will be considered as lost.

In hidding mode, RDP taxonomy will be removed, blast taxonomy will remain unchanged and Cluster_1 will be kept.
In the summary.html, it will be considered as "Hidden", and if the RDP taxonomy is not kept thanks to an other OTU, the RDP taxonomy will be considered as lost.

- Cluster 2:

.. image:: static/images/FROGS_affiliation_filter2.png

The RDP taxonomy respects the RDP boostrap threshold, but none of the blast affiliations respect all the blast criteria.

In deleting mode, Cluster_1 will be removed.
In the summary.html, it will be considered as "Removed", and if the two blast taxonomies are not kept thanks to others OTUs, they will be considered as lost. In the same way, if there is no other OTU, affiliated to Sulfurimonas genus but with an ambiguous species, 1 Multi-affilaition will be considered as lost in the summary.html. And idem for the RDP taxonomy.

In hidding mode, RDP taxonomy will be remain unchanged, blast taxonomy will be removed and Cluster_2 will be kept.
In the summary.html, it will be considered as "Hidden", and if the two taxonomies are not kept thanks to others OTUs, they will be considered as lost. In the same way, if there is no other OTU, affiliated to Sulfurimonas genus but with an ambiguous species, 1 Multi-affilaition will be considered as lost in the summary.html.

- Cluster 3:

.. image:: static/images/FROGS_affiliation_filter3.png

The RDP taxonomy respects the RDP boostrap threshold, and one of the two blast affiliations respect all the blast criteria.

In both deleting and hidding mode, Cluster_1 will be kept.
In the summary.html, it will be considered as "Modified", as the RDP taxonomy will remain unchanged and the blast taxonomy will be updated.
If no other OTU is affiliated to the "unknown species" of the Fusobacterium genu, this species will be considered as lost. In the same way if no other OTU is affiliated to Fusobacterium genus but with an ambiguous species, 1 Multi-affilaition at the Species level will be considered as lost in the summary.html.

@HELP_CONTACT@

	</help>

	<citations>
		<expand macro="citations" />
	</citations>
	
</tool>