diff ncbi_fcs_gx.xml @ 0:3cdb96f2855d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ncbi_fcs_gx commit 4a6561ed00e004260be3f3c29d81e814c60e20af
author iuc
date Fri, 12 Jan 2024 22:11:39 +0000
parents
children 49f8eae39606
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ncbi_fcs_gx.xml	Fri Jan 12 22:11:39 2024 +0000
@@ -0,0 +1,167 @@
+<tool id="ncbi_fcs_gx" name="NCBI FCS GX" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>detects contamination from foreign organisms in genome sequences</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edam_ontology"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+GX_NUM_CORES=\${GALAXY_SLOTS:-2}
+#if $mode.mode_selector == "screen"
+    ## copy data to local storage
+    #set manifest_pathname = $mode.screen_adv.database.fields.name
+    #if $mode.config_tag.fields.use_source_manifest == "1"
+        #set manifest_pathname = $mode.screen_adv.database.fields.source_manifest
+    #end if
+    mkdir -p '$mode.config_tag.fields.node_cache_dir' &&
+    sync_files.py get --mft '$manifest_pathname' --dir '$mode.config_tag.fields.node_cache_dir' > /dev/null 2>&1 &&
+    ## run gx
+    run_gx.py
+    #if $mode.config_tag.fields.phone_home == "1"
+        --phone-home-label '$mode.config_tag.fields.phone_home_label'
+    #end if
+        --fasta '$mode.fasta'
+    #if $mode.id.id_selector == "gx_div"
+        --tax-id '1'
+        --div '$mode.id.div'
+    #else
+        --tax-id '$mode.id.tax_id'
+    #end if
+    #if $mode.species != ""
+        --species '$mode.species'
+    #end if
+        --split-fasta '$mode.screen_adv.split_fasta'
+    #if $mode.screen_adv.div:
+        --div '$mode.screen_adv.div'
+    #end if
+        --gx-db '$mode.config_tag.fields.node_cache_dir'
+        --out-basename output
+        --action-report true
+        --generate-logfile false
+#elif $mode.mode_selector == "clean"
+    ## run gx
+    gx clean-genome
+        --input '$mode.input'
+        --action-report '$mode.action_report'
+        --contam-fasta-out 'contam.fa'
+        --min-seq-len '$mode.min_seq_len'
+        --output 'clean.fa'
+#end if
+    ]]></command>
+    <environment_variables>
+        <environment_variable name="GX_ALIGN_EXCLUDE_TAXA">$getVar('mode.screen_adv.gx_align_exclude_taxa', '')</environment_variable>
+        <environment_variable name="GX_EXTRA_CONTAM_DIVS"><![CDATA[#echo ','.join($getVar('mode.screen_adv.gx_extra_contam_divs', []))]]></environment_variable>
+    </environment_variables>
+    <inputs>
+        <conditional name="mode">
+            <param name="mode_selector" type="select" label="Choose the mode">
+                <option value="screen" selected="true">Screen genome</option>
+                <option value="clean">Clean genome</option>
+            </param>
+            <when value="screen">
+                <!-- value, name, use_source_manifest, phone_home, phone_home_label, node_cache_dir -->
+                <param name="config_tag" type="select" label="Database">
+                    <options from_data_table="ncbi_fcs_gx_config">
+                        <filter type="sort_by" name="sorted_description" column="1"/>
+                    </options>
+                    <validator message="No database is available" type="no_options"/>
+                </param>
+                <param argument="--fasta" type="data" format="fasta" label="Input file (Fasta file)" help="To detect contamination from foreign organisms, a genome assembly in a fasta file."/>
+                <conditional name="id">
+                    <param name="id_selector" type="select" label="Taxonomy entry">
+                        <option value="gx_div" selected="true">GX Division</option>
+                        <option value="ncbi_tax">NCBI Taxonomic identifier</option>
+                    </param>
+                    <when value="gx_div">
+                        <param argument="--div" type="select">
+                            <options from_data_table="ncbi_fcs_gx_divisions">
+                                <filter type="param_value" ref="config_tag" column="1" />
+                                <filter type="sort_by" name="sorted_description" column="2" />
+                            </options>
+                            <validator message="No GX Divisions are available" type="no_options"/>
+                        </param>
+                    </when>
+                    <when value="ncbi_tax">
+                        <!-- https://www.ncbi.nlm.nih.gov/taxonomy -->
+                        <param argument="--tax-id" type="text" label="Taxonomic identifier" help="The appropriate tax-id for your genome assembly.  The appropriate tax-id for an organism can be retrieved from the NCBI Taxonomy website."/>
+                    </when>
+                </conditional>
+                <param argument="--species" type="text" optional="true" label="Species binomial name"/>
+                <section name="screen_adv" title="Advanced options">
+                    <!-- comma separated list of taxa to ignore in GX_ALIGN_EXCLUDE_TAXA environment variable -->
+                    <param name="gx_align_exclude_taxa" type="text" value="" optional="true" label="Taxonomic identifier(s) to exclude" help="Multiple tax-ids may be provided as a comma-separated list.">
+                        <validator type="regex" message="comma separated integers">^\s*\d+\s*(,\s*\d+\s*)*$</validator>
+                        <sanitizer invalid_char="">
+                            <valid initial="string.digits">
+                                <add value=","/>
+                            </valid>
+                        </sanitizer>
+                    </param>
+                    <param name="gx_extra_contam_divs" type="select" multiple="true" optional="true" label="Additional contaminants to identify" help="Multiple gx-divisions may be selected.">
+                        <options from_data_table="ncbi_fcs_gx_divisions">
+                            <filter type="param_value" ref="config_tag" column="1" />
+                            <filter type="sort_by" name="sorted_description" column="2" />
+                        </options>
+                        <validator message="No GX Divisions are available" type="no_options"/>
+                    </param>
+                    <param argument="--split-fasta" type="boolean" checked="true" optional="true" label="Split fasta sequences on N-runs of length at least 10"/>
+                    <param argument="--div" type="text" value="" optional="true" label="BLAST-div of the tax-id" help="from 'NCBI BLAST name' on taxon Info page"/>
+                    <param name="database" type="select" label="Database location">
+                        <options from_data_table="ncbi_fcs_gx_databases">
+                            <filter type="param_value" ref="config_tag" column="0"/>
+                        </options>
+                        <validator message="No database location is available" type="no_options"/>
+                    </param>
+                </section>
+            </when>
+            <when value="clean">
+                <param argument="--input" type="data" format="fasta" label="Input file (Fasta file)" help="To detect contamination from foreign organisms, a genome assembly in a fasta file."/>
+                <param argument="--action-report" type="data" format="tabular" label="Select Action report"/>
+                <param argument="--min-seq-len" type="integer" value="200" label="Minimumm sequence length to keep"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <!-- mode == screen -->
+        <data name="taxonomy_report" format="tabular" from_work_dir="output.taxonomy.rpt" label="${tool.name} on ${on_string}: Taxonomy report">
+            <filter>mode['mode_selector'] == 'screen'</filter>
+        </data>
+        <data name="action_report" format="tabular" from_work_dir="output.fcs_gx_report.txt" label="${tool.name} on ${on_string}: Action report">
+            <filter>mode['mode_selector'] == 'screen'</filter>
+        </data>
+        <!-- mode == clean -->
+        <data name="contam_fasta" format="fasta" from_work_dir="contam.fa" label="${tool.name} on ${on_string}: Fasta for EXCLUDE entries">
+            <filter>mode['mode_selector'] == 'clean'</filter>
+        </data>
+        <data name="clean_fasta" format="fasta" from_work_dir="clean.fa" metadata_source="mode.input" label="${tool.name} on ${on_string}: Cleaned Fasta">
+            <filter>mode['mode_selector'] == 'clean'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="mode_selector" value="screen"/>
+            <param name="config_tag" value="test-only" />
+            <param name="id_selector" value="ncbi_tax"/>
+            <param name="fasta" value="fcsgx_test.fa.gz" ftype="fasta"/>
+            <param name="tax_id" value="6973"/>
+            <output name="taxonomy_report" file="output.taxonomy.rpt" compare="diff" lines_diff="2" />
+            <output name="action_report" file="output.fcs_gx_report.txt" compare="diff" lines_diff="2" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="mode_selector" value="clean"/>
+            <param name="id_selector" value="ncbi_tax"/>
+            <param name="input" value="fcsgx_test.fa.gz" ftype="fasta"/>
+            <param name="action_report" value="output.fcs_gx_report.txt" ftype="tabular"/>
+            <output name="contam_fasta" decompress="true" file="output.contam.fa.gz" ftype="fasta" />
+            <output name="clean_fasta" decompress="true" file="output.clean.fa.gz" ftype="fasta" />
+        </test>
+    </tests>
+    <help><![CDATA[
+    FCS-GX detects contamination from foreign organisms in genome sequences using the genome cross-species aligner (GX). The FCS-GX executable retrieves a Docker or Singularity container and runs a pipeline to align sequences to a large database of NCBI genomes through modified k-mer seeds and assign a most likely taxonomic division.
+
+    FCS-GX classifies sequences as contaminant when their taxonomic assignment is different from the user provided taxonomic identifier. A contamination summary provides an overview of observed contaminant divisions, counts, and total sizes, and an action report provides details and recommended actions for each problematic sequence.
+
+    https://github.com/ncbi/fcs/wiki/FCS-GX
+    ]]></help>
+    <expand macro="citations"/>
+</tool>