Mercurial > repos > iuc > ncbi_fcs_gx
view ncbi_fcs_gx.xml @ 1:49f8eae39606 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ncbi_fcs_gx commit b1797a2dee3977cdf40d3cf413ab9ec1e0cb3f26
author | iuc |
---|---|
date | Thu, 30 May 2024 10:49:36 +0000 |
parents | 3cdb96f2855d |
children | ed1ef564da41 |
line wrap: on
line source
<tool id="ncbi_fcs_gx" name="NCBI FCS GX" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>detects contamination from foreign organisms in genome sequences</description> <macros> <import>macros.xml</import> </macros> <expand macro="biotools"/> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ GX_NUM_CORES=\${GALAXY_SLOTS:-2} #if $mode.mode_selector == "screen" ## copy data to local storage #set manifest_pathname = $mode.screen_adv.database.fields.name #if $mode.config_tag.fields.use_source_manifest == "1" #set manifest_pathname = $mode.screen_adv.database.fields.source_manifest #end if mkdir -p '$mode.config_tag.fields.node_cache_dir' && sync_files.py get --mft '$manifest_pathname' --dir '$mode.config_tag.fields.node_cache_dir' > /dev/null 2>&1 && ## run gx run_gx.py #if $mode.config_tag.fields.phone_home == "1" --phone-home-label '$mode.config_tag.fields.phone_home_label' #end if --fasta '$mode.fasta' #if $mode.id.id_selector == "gx_div" --tax-id '1' --div '$mode.id.div' #else --tax-id '$mode.id.tax_id' #end if #if $mode.species != "" --species '$mode.species' #end if --split-fasta '$mode.screen_adv.split_fasta' #if $mode.screen_adv.div: --div '$mode.screen_adv.div' #end if --gx-db '$mode.config_tag.fields.node_cache_dir' --out-basename output --action-report true --generate-logfile false #elif $mode.mode_selector == "clean" ## run gx gx clean-genome --input '$mode.input' --action-report '$mode.action_report' --contam-fasta-out 'contam.fa' --min-seq-len '$mode.min_seq_len' --output 'clean.fa' #end if ]]></command> <environment_variables> <environment_variable name="GX_ALIGN_EXCLUDE_TAXA">$getVar('mode.screen_adv.gx_align_exclude_taxa', '')</environment_variable> <environment_variable name="GX_EXTRA_CONTAM_DIVS"><![CDATA[#echo ','.join($getVar('mode.screen_adv.gx_extra_contam_divs', []))]]></environment_variable> </environment_variables> <inputs> <conditional name="mode"> <param name="mode_selector" type="select" label="Choose the mode"> <option value="screen" selected="true">Screen genome</option> <option value="clean">Clean genome</option> </param> <when value="screen"> <!-- value, name, use_source_manifest, phone_home, phone_home_label, node_cache_dir --> <param name="config_tag" type="select" label="Database"> <options from_data_table="ncbi_fcs_gx_config"> <filter type="sort_by" name="sorted_description" column="1"/> </options> <validator message="No database is available" type="no_options"/> </param> <param argument="--fasta" type="data" format="fasta" label="Input file (Fasta file)" help="To detect contamination from foreign organisms, a genome assembly in a fasta file."/> <conditional name="id"> <param name="id_selector" type="select" label="Taxonomy entry"> <option value="gx_div" selected="true">GX Division</option> <option value="ncbi_tax">NCBI Taxonomic identifier</option> </param> <when value="gx_div"> <param argument="--div" type="select"> <options from_data_table="ncbi_fcs_gx_divisions"> <filter type="param_value" ref="config_tag" column="1" /> <filter type="sort_by" name="sorted_description" column="2" /> </options> <validator message="No GX Divisions are available" type="no_options"/> </param> </when> <when value="ncbi_tax"> <!-- https://www.ncbi.nlm.nih.gov/taxonomy --> <param argument="--tax-id" type="text" label="Taxonomic identifier" help="The appropriate tax-id for your genome assembly. The appropriate tax-id for an organism can be retrieved from the NCBI Taxonomy website."/> </when> </conditional> <param argument="--species" type="text" optional="true" label="Species binomial name"/> <section name="screen_adv" title="Advanced options"> <!-- comma separated list of taxa to ignore in GX_ALIGN_EXCLUDE_TAXA environment variable --> <param name="gx_align_exclude_taxa" type="text" value="" optional="true" label="Taxonomic identifier(s) to exclude" help="Multiple tax-ids may be provided as a comma-separated list."> <validator type="regex" message="comma separated integers">^\s*\d+\s*(,\s*\d+\s*)*$</validator> <sanitizer invalid_char=""> <valid initial="string.digits"> <add value=","/> </valid> </sanitizer> </param> <param name="gx_extra_contam_divs" type="select" multiple="true" optional="true" label="Additional contaminants to identify" help="Multiple gx-divisions may be selected."> <options from_data_table="ncbi_fcs_gx_divisions"> <filter type="param_value" ref="config_tag" column="1" /> <filter type="sort_by" name="sorted_description" column="2" /> </options> <validator message="No GX Divisions are available" type="no_options"/> </param> <param argument="--split-fasta" type="boolean" checked="true" optional="true" label="Split fasta sequences on N-runs of length at least 10"/> <param argument="--div" type="text" value="" optional="true" label="BLAST-div of the tax-id" help="from 'NCBI BLAST name' on taxon Info page"/> <param name="database" type="select" label="Database location"> <options from_data_table="ncbi_fcs_gx_databases"> <filter type="param_value" ref="config_tag" column="0"/> </options> <validator message="No database location is available" type="no_options"/> </param> </section> </when> <when value="clean"> <param argument="--input" type="data" format="fasta" label="Input file (Fasta file)" help="To detect contamination from foreign organisms, a genome assembly in a fasta file."/> <param argument="--action-report" type="data" format="tabular" label="Select Action report"/> <param argument="--min-seq-len" type="integer" value="200" label="Minimumm sequence length to keep"/> </when> </conditional> </inputs> <outputs> <!-- mode == screen --> <data name="taxonomy_report" format="tabular" from_work_dir="output.taxonomy.rpt" label="${tool.name} on ${on_string}: Taxonomy report"> <filter>mode['mode_selector'] == 'screen'</filter> </data> <data name="action_report" format="tabular" from_work_dir="output.fcs_gx_report.txt" label="${tool.name} on ${on_string}: Action report"> <filter>mode['mode_selector'] == 'screen'</filter> </data> <!-- mode == clean --> <data name="contam_fasta" format="fasta" from_work_dir="contam.fa" label="${tool.name} on ${on_string}: Fasta for EXCLUDE entries"> <filter>mode['mode_selector'] == 'clean'</filter> </data> <data name="clean_fasta" format="fasta" from_work_dir="clean.fa" metadata_source="mode.input" label="${tool.name} on ${on_string}: Cleaned Fasta"> <filter>mode['mode_selector'] == 'clean'</filter> </data> </outputs> <tests> <test expect_num_outputs="2"> <param name="mode_selector" value="screen"/> <param name="config_tag" value="test-only" /> <param name="id_selector" value="ncbi_tax"/> <param name="fasta" value="fcsgx_test.fa.gz" ftype="fasta"/> <param name="tax_id" value="6973"/> <output name="taxonomy_report" file="output.taxonomy.rpt" compare="diff" lines_diff="2" /> <output name="action_report" file="output.fcs_gx_report.txt" compare="diff" lines_diff="2" /> </test> <test expect_num_outputs="2"> <param name="mode_selector" value="clean"/> <param name="id_selector" value="ncbi_tax"/> <param name="input" value="fcsgx_test.fa.gz" ftype="fasta"/> <param name="action_report" value="output.fcs_gx_report.txt" ftype="tabular"/> <output name="contam_fasta" decompress="true" file="output.contam.fa.gz" ftype="fasta" /> <output name="clean_fasta" decompress="true" file="output.clean.fa.gz" ftype="fasta" /> </test> </tests> <help><![CDATA[ FCS-GX detects contamination from foreign organisms in genome sequences using the genome cross-species aligner (GX). The FCS-GX executable retrieves a Docker or Singularity container and runs a pipeline to align sequences to a large database of NCBI genomes through modified k-mer seeds and assign a most likely taxonomic division. FCS-GX classifies sequences as contaminant when their taxonomic assignment is different from the user provided taxonomic identifier. A contamination summary provides an overview of observed contaminant divisions, counts, and total sizes, and an action report provides details and recommended actions for each problematic sequence. https://github.com/ncbi/fcs/wiki/FCS-GX ]]></help> <expand macro="citations"/> </tool>