Mercurial > repos > iuc > ncbi_fcs_gx
changeset 0:3cdb96f2855d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ncbi_fcs_gx commit 4a6561ed00e004260be3f3c29d81e814c60e20af
author | iuc |
---|---|
date | Fri, 12 Jan 2024 22:11:39 +0000 |
parents | |
children | 49f8eae39606 |
files | macros.xml ncbi_fcs_gx.xml test-data/fcsgx_test.fa.gz test-data/ncbi_fcs_gx_config.tsv test-data/ncbi_fcs_gx_databases.loc test-data/ncbi_fcs_gx_divisions.tsv test-data/output.clean.fa.gz test-data/output.contam.fa.gz test-data/output.fcs_gx_report.txt test-data/output.taxonomy.rpt tool-data/ncbi_fcs_gx_config.tsv.sample tool-data/ncbi_fcs_gx_databases.loc.sample tool-data/ncbi_fcs_gx_divisions.tsv.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 15 files changed, 324 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Jan 12 22:11:39 2024 +0000 @@ -0,0 +1,22 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">ncbi-fcs-gx</requirement> + <yield/> + </requirements> + </xml> + <token name="@TOOL_VERSION@">0.5.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">21.05</token> + <xml name="edam_ontology"> + <edam_operations> + <edam_operation>operation_3187</edam_operation> + </edam_operations> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1101/2023.06.02.543519</citation> + <yield/> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ncbi_fcs_gx.xml Fri Jan 12 22:11:39 2024 +0000 @@ -0,0 +1,167 @@ +<tool id="ncbi_fcs_gx" name="NCBI FCS GX" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>detects contamination from foreign organisms in genome sequences</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="edam_ontology"/> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ +GX_NUM_CORES=\${GALAXY_SLOTS:-2} +#if $mode.mode_selector == "screen" + ## copy data to local storage + #set manifest_pathname = $mode.screen_adv.database.fields.name + #if $mode.config_tag.fields.use_source_manifest == "1" + #set manifest_pathname = $mode.screen_adv.database.fields.source_manifest + #end if + mkdir -p '$mode.config_tag.fields.node_cache_dir' && + sync_files.py get --mft '$manifest_pathname' --dir '$mode.config_tag.fields.node_cache_dir' > /dev/null 2>&1 && + ## run gx + run_gx.py + #if $mode.config_tag.fields.phone_home == "1" + --phone-home-label '$mode.config_tag.fields.phone_home_label' + #end if + --fasta '$mode.fasta' + #if $mode.id.id_selector == "gx_div" + --tax-id '1' + --div '$mode.id.div' + #else + --tax-id '$mode.id.tax_id' + #end if + #if $mode.species != "" + --species '$mode.species' + #end if + --split-fasta '$mode.screen_adv.split_fasta' + #if $mode.screen_adv.div: + --div '$mode.screen_adv.div' + #end if + --gx-db '$mode.config_tag.fields.node_cache_dir' + --out-basename output + --action-report true + --generate-logfile false +#elif $mode.mode_selector == "clean" + ## run gx + gx clean-genome + --input '$mode.input' + --action-report '$mode.action_report' + --contam-fasta-out 'contam.fa' + --min-seq-len '$mode.min_seq_len' + --output 'clean.fa' +#end if + ]]></command> + <environment_variables> + <environment_variable name="GX_ALIGN_EXCLUDE_TAXA">$getVar('mode.screen_adv.gx_align_exclude_taxa', '')</environment_variable> + <environment_variable name="GX_EXTRA_CONTAM_DIVS"><![CDATA[#echo ','.join($getVar('mode.screen_adv.gx_extra_contam_divs', []))]]></environment_variable> + </environment_variables> + <inputs> + <conditional name="mode"> + <param name="mode_selector" type="select" label="Choose the mode"> + <option value="screen" selected="true">Screen genome</option> + <option value="clean">Clean genome</option> + </param> + <when value="screen"> + <!-- value, name, use_source_manifest, phone_home, phone_home_label, node_cache_dir --> + <param name="config_tag" type="select" label="Database"> + <options from_data_table="ncbi_fcs_gx_config"> + <filter type="sort_by" name="sorted_description" column="1"/> + </options> + <validator message="No database is available" type="no_options"/> + </param> + <param argument="--fasta" type="data" format="fasta" label="Input file (Fasta file)" help="To detect contamination from foreign organisms, a genome assembly in a fasta file."/> + <conditional name="id"> + <param name="id_selector" type="select" label="Taxonomy entry"> + <option value="gx_div" selected="true">GX Division</option> + <option value="ncbi_tax">NCBI Taxonomic identifier</option> + </param> + <when value="gx_div"> + <param argument="--div" type="select"> + <options from_data_table="ncbi_fcs_gx_divisions"> + <filter type="param_value" ref="config_tag" column="1" /> + <filter type="sort_by" name="sorted_description" column="2" /> + </options> + <validator message="No GX Divisions are available" type="no_options"/> + </param> + </when> + <when value="ncbi_tax"> + <!-- https://www.ncbi.nlm.nih.gov/taxonomy --> + <param argument="--tax-id" type="text" label="Taxonomic identifier" help="The appropriate tax-id for your genome assembly. The appropriate tax-id for an organism can be retrieved from the NCBI Taxonomy website."/> + </when> + </conditional> + <param argument="--species" type="text" optional="true" label="Species binomial name"/> + <section name="screen_adv" title="Advanced options"> + <!-- comma separated list of taxa to ignore in GX_ALIGN_EXCLUDE_TAXA environment variable --> + <param name="gx_align_exclude_taxa" type="text" value="" optional="true" label="Taxonomic identifier(s) to exclude" help="Multiple tax-ids may be provided as a comma-separated list."> + <validator type="regex" message="comma separated integers">^\s*\d+\s*(,\s*\d+\s*)*$</validator> + <sanitizer invalid_char=""> + <valid initial="string.digits"> + <add value=","/> + </valid> + </sanitizer> + </param> + <param name="gx_extra_contam_divs" type="select" multiple="true" optional="true" label="Additional contaminants to identify" help="Multiple gx-divisions may be selected."> + <options from_data_table="ncbi_fcs_gx_divisions"> + <filter type="param_value" ref="config_tag" column="1" /> + <filter type="sort_by" name="sorted_description" column="2" /> + </options> + <validator message="No GX Divisions are available" type="no_options"/> + </param> + <param argument="--split-fasta" type="boolean" checked="true" optional="true" label="Split fasta sequences on N-runs of length at least 10"/> + <param argument="--div" type="text" value="" optional="true" label="BLAST-div of the tax-id" help="from 'NCBI BLAST name' on taxon Info page"/> + <param name="database" type="select" label="Database location"> + <options from_data_table="ncbi_fcs_gx_databases"> + <filter type="param_value" ref="config_tag" column="0"/> + </options> + <validator message="No database location is available" type="no_options"/> + </param> + </section> + </when> + <when value="clean"> + <param argument="--input" type="data" format="fasta" label="Input file (Fasta file)" help="To detect contamination from foreign organisms, a genome assembly in a fasta file."/> + <param argument="--action-report" type="data" format="tabular" label="Select Action report"/> + <param argument="--min-seq-len" type="integer" value="200" label="Minimumm sequence length to keep"/> + </when> + </conditional> + </inputs> + <outputs> + <!-- mode == screen --> + <data name="taxonomy_report" format="tabular" from_work_dir="output.taxonomy.rpt" label="${tool.name} on ${on_string}: Taxonomy report"> + <filter>mode['mode_selector'] == 'screen'</filter> + </data> + <data name="action_report" format="tabular" from_work_dir="output.fcs_gx_report.txt" label="${tool.name} on ${on_string}: Action report"> + <filter>mode['mode_selector'] == 'screen'</filter> + </data> + <!-- mode == clean --> + <data name="contam_fasta" format="fasta" from_work_dir="contam.fa" label="${tool.name} on ${on_string}: Fasta for EXCLUDE entries"> + <filter>mode['mode_selector'] == 'clean'</filter> + </data> + <data name="clean_fasta" format="fasta" from_work_dir="clean.fa" metadata_source="mode.input" label="${tool.name} on ${on_string}: Cleaned Fasta"> + <filter>mode['mode_selector'] == 'clean'</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="2"> + <param name="mode_selector" value="screen"/> + <param name="config_tag" value="test-only" /> + <param name="id_selector" value="ncbi_tax"/> + <param name="fasta" value="fcsgx_test.fa.gz" ftype="fasta"/> + <param name="tax_id" value="6973"/> + <output name="taxonomy_report" file="output.taxonomy.rpt" compare="diff" lines_diff="2" /> + <output name="action_report" file="output.fcs_gx_report.txt" compare="diff" lines_diff="2" /> + </test> + <test expect_num_outputs="2"> + <param name="mode_selector" value="clean"/> + <param name="id_selector" value="ncbi_tax"/> + <param name="input" value="fcsgx_test.fa.gz" ftype="fasta"/> + <param name="action_report" value="output.fcs_gx_report.txt" ftype="tabular"/> + <output name="contam_fasta" decompress="true" file="output.contam.fa.gz" ftype="fasta" /> + <output name="clean_fasta" decompress="true" file="output.clean.fa.gz" ftype="fasta" /> + </test> + </tests> + <help><![CDATA[ + FCS-GX detects contamination from foreign organisms in genome sequences using the genome cross-species aligner (GX). The FCS-GX executable retrieves a Docker or Singularity container and runs a pipeline to align sequences to a large database of NCBI genomes through modified k-mer seeds and assign a most likely taxonomic division. + + FCS-GX classifies sequences as contaminant when their taxonomic assignment is different from the user provided taxonomic identifier. A contamination summary provides an overview of observed contaminant divisions, counts, and total sizes, and an action report provides details and recommended actions for each problematic sequence. + + https://github.com/ncbi/fcs/wiki/FCS-GX + ]]></help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ncbi_fcs_gx_config.tsv Fri Jan 12 22:11:39 2024 +0000 @@ -0,0 +1,4 @@ +## NCBI FCS GX Tool Config +# +#tag description use_source_manifest phone_home phone_home_label node_cache_dir +test-only Testing GX database 1 0 /tmp/gxdb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ncbi_fcs_gx_databases.loc Fri Jan 12 22:11:39 2024 +0000 @@ -0,0 +1,2 @@ +#tag source_manifest local_manifest +test-only https://ncbi-fcs-gx.s3.amazonaws.com/gxdb/test-only/test-only.manifest <UNUSED>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ncbi_fcs_gx_divisions.tsv Fri Jan 12 22:11:39 2024 +0000 @@ -0,0 +1,3 @@ +#gx_div tag description +prok:CFB group bacteria test-only Bacteria - CFB group bacteria +unkn:unknown test-only Unknown / Unclassified
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fcs_gx_report.txt Fri Jan 12 22:11:39 2024 +0000 @@ -0,0 +1,7 @@ +##[["FCS genome report", 2, 1], {"git-rev": "+branch--", "run-date": "Wed Jan 10 08:58:52 2024", "db": {"build-date": "2022-11-03", "seqs": 3042, "Gbp": 0.28336}, "run-info": {"agg-cvg": 0.358706, "asserted-div": "anml:insects", "inferred-primary-divs": ["prok:CFB group bacteria"], "corrected-primary-divs": ["anml:insects"]}}] +#seq_id start_pos end_pos seq_len action div agg_cont_cov top_tax_name +JPZV02005859.1 1 705930 705930 REVIEW prok:CFB group bacteria 22 Bacteroides xylanisolvens +JPZV02009577.1 1 600722 600722 EXCLUDE prok:CFB group bacteria 52 Bacteroides caecimuris +JPZV02016362.1 1 432265 432265 EXCLUDE prok:CFB group bacteria 45 Bacteroides salyersiae +JPZV02031416.1 1 170387 170387 REVIEW prok:CFB group bacteria 18 Bacteroides sp. CBA7301 +JPZV02034235.1 1 131158 131158 REVIEW prok:CFB group bacteria 24 Bacteroides faecichinchillae
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.taxonomy.rpt Fri Jan 12 22:11:39 2024 +0000 @@ -0,0 +1,8 @@ +##[["GX taxonomy analysis report", 3, 1], {"git-rev": "+branch--", "run-date": "Wed Jan 10 08:58:52 2024", "db": {"build-date": "2022-11-03", "seqs": 3042, "Gbp": 0.28336}, "run-info": {"agg-cvg": 0.358706, "asserted-div": "anml:insects", "inferred-primary-divs": ["prok:CFB group bacteria"], "corrected-primary-divs": ["anml:insects"]}}] +#seq-id seq-len (xp,lc,co,n,xc)-len cvg-by-all sep1 tax-name-1 tax-id-1 div-1 cvg-by-div-1 cvg-by-tax-1 score-1 sep2 tax-id-2 div-2 cvg-by-div-2 cvg-by-tax-2 score-2 sep3 tax-id-3 div-3 cvg-by-div-3 cvg-by-tax-3 score-3 sep4 tax-id-4 div-4 cvg-by-div-4 cvg-by-tax-4 score-4 sep5 reserved result div div_pct_cvg +JPZV02005859.1 705930 0,1139,0,0,0 159470 | Bacteroides xylanisolvens 371601 prok:CFB group bacteria 159470 49227 393 | 1796613 prok:CFB group bacteria 159470 48051 387 | | | n/a low-coverage prok:CFB group bacteria 23 +JPZV02009577.1 600722 0,65,0,0,0 316464 | Bacteroides caecimuris 1796613 prok:CFB group bacteria 316464 137645 782 | 371601 prok:CFB group bacteria 316464 120328 757 | | | n/a contaminant(div) prok:CFB group bacteria 53 +JPZV02016362.1 432265 0,0,0,0,0 197253 | Bacteroides salyersiae 291644 prok:CFB group bacteria 197253 74527 520 | 246787 prok:CFB group bacteria 197253 74971 512 | | | n/a contaminant(div) prok:CFB group bacteria 46 +JPZV02031416.1 170387 0,60,0,0,0 32395 | Bacteroides sp. CBA7301 2715212 prok:CFB group bacteria 32395 13706 205 | 1297750 prok:CFB group bacteria 32395 12253 201 | | | n/a low-coverage prok:CFB group bacteria 19 +JPZV02034235.1 131158 0,0,0,0,0 33221 | Bacteroides faecichinchillae 871325 prok:CFB group bacteria 33221 17243 230 | 1121098 prok:CFB group bacteria 33221 15138 222 | | | n/a low-coverage prok:CFB group bacteria 25 +JPZV02046037.1 26753 0,0,0,0,0 2720 | Bacteroides graminisolvens DSM 19988 = JCM 15093 1121097 prok:CFB group bacteria 2720 912 45 | 28111 prok:CFB group bacteria 2720 821 45 | | | n/a low-coverage prok:CFB group bacteria 10
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/ncbi_fcs_gx_config.tsv.sample Fri Jan 12 22:11:39 2024 +0000 @@ -0,0 +1,5 @@ +## NCBI FCS GX Tool Configuration +# +#tag description use_source_manifest phone_home phone_home_label node_cache_dir +#all Complete GX database 0 0 /tmp/gxdb +#test-only Testing GX database 0 1 usegalaxy.org /tmp/gxdb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/ncbi_fcs_gx_databases.loc.sample Fri Jan 12 22:11:39 2024 +0000 @@ -0,0 +1,8 @@ +## NCBI FCS GX Databases +# +#tag manifest path +#r2022-01-24 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/r2022-01-24/all.manifest /big/data/dir/ncbi_fcs_gx_databases/r2022-01-24/all.manifest +#r2022-07-08 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/r2022-07-08/all.manifest /big/data/dir/ncbi_fcs_gx_databases/r2022-07-08/all.manifest +#r2023-01-24 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/r2023-01-24/all.manifest /big/data/dir/ncbi_fcs_gx_databases/r2023-01-24/all.manifest +#latest https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/latest/all.manifest /big/data/dir/ncbi_fcs_gx_databases/latest/all.manifest +#test-only https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.manifest /big/data/dir/ncbi_fcs_gx_databases/test-only/test-only.manifest
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/ncbi_fcs_gx_divisions.tsv.sample Fri Jan 12 22:11:39 2024 +0000 @@ -0,0 +1,64 @@ +#gx_div tag description +#anml:amphibians latest Animals (Metazoa) - amphibians +#anml:basal metazoans latest Animals (Metazoa) - basal metazoans +#anml:birds latest Animals (Metazoa) - birds +#anml:brachiopods latest Animals (Metazoa) - brachiopods +#anml:crustaceans latest Animals (Metazoa) - crustaceans +#anml:echinoderms latest Animals (Metazoa) - echinoderms +#anml:fishes latest Animals (Metazoa) - fishes +#anml:insects latest Animals (Metazoa) - insects +#anml:mammals latest Animals (Metazoa) - mammals +#anml:marsupials latest Animals (Metazoa) - marsupials +#anml:molluscs latest Animals (Metazoa) - molluscs +#anml:nematodes latest Animals (Metazoa) - nematodes +#anml:primates latest Animals (Metazoa) - primates +#anml:reptiles latest Animals (Metazoa) - reptiles +#anml:rodents latest Animals (Metazoa) - rodents +#anml:rotifers latest Animals (Metazoa) - rotifers +#anml:tardigrades latest Animals (Metazoa) - tardigrades +#anml:worms latest Animals (Metazoa) - worms +#arch:archaea latest Archaea - archaea +#prok:CFB group bacteria latest Bacteria - CFB group bacteria +#prok:GNS bacteria latest Bacteria - GNS bacteria +#prok:a-proteobacteria latest Bacteria - a-proteobacteria +#prok:actinobacteria latest Bacteria - actinobacteria +#prok:aquificales latest Bacteria - aquificales +#prok:b-proteobacteria latest Bacteria - b-proteobacteria +#prok:bacteria latest Bacteria - bacteria +#prok:chlamydias latest Bacteria - chlamydias +#prok:cyanobacteria latest Bacteria - cyanobacteria +#prok:d-proteobacteria latest Bacteria - d-proteobacteria +#prok:firmicutes latest Bacteria - firmicutes +#prok:fusobacteria latest Bacteria - fusobacteria +#prok:g-proteobacteria latest Bacteria - g-proteobacteria +#prok:green sulfur bacteria latest Bacteria - green sulfur bacteria +#prok:high GC Gram+ latest Bacteria - high GC Gram+ +#prok:mycoplasmas latest Bacteria - mycoplasmas +#prok:planctomycetes latest Bacteria - planctomycetes +#prok:proteobacteria latest Bacteria - proteobacteria +#prok:spirochetes latest Bacteria - spirochetes +#prok:thermotogales latest Bacteria - thermotogales +#prok:verrucomicrobia latest Bacteria - verrucomicrobia +#fung:ascomycetes latest Fungi - ascomycetes +#fung:basidiomycetes latest Fungi - basidiomycetes +#fung:budding yeasts latest Fungi - budding yeasts +#fung:chytrids latest Fungi - chytrids +#fung:fungi latest Fungi - fungi +#fung:microsporidians latest Fungi - microsporidians +#plnt:green algae latest Plants (Viridiplantae) - green algae +#plnt:mosses latest Plants (Viridiplantae) - mosses +#plnt:plants latest Plants (Viridiplantae) - plants +#prst:algae latest Protists (other Eukaryota) - algae +#prst:alveolates latest Protists (other Eukaryota) - alveolates +#prst:cellular slime molds latest Protists (other Eukaryota) - cellular slime molds +#prst:cercozoans latest Protists (other Eukaryota) - cercozoans +#prst:choanoflagellates latest Protists (other Eukaryota) - choanoflagellates +#prst:euglenoids latest Protists (other Eukaryota) - euglenoids +#prst:monads latest Protists (other Eukaryota) - monads +#prst:protists latest Protists (other Eukaryota) - protists +#prst:slime nets latest Protists (other Eukaryota) - slime nets +#synt:synthetic latest Synthetic - synthetic +#unkn:unknown latest Unknown / Unclassified +#virs:eukaryotic viruses latest Virus - eukaryotic viruses +#virs:prokaryotic viruses latest Virus - prokaryotic viruses +#virs:viruses latest Virus - viruses
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Jan 12 22:11:39 2024 +0000 @@ -0,0 +1,17 @@ +<tables> + <!-- Location of NCBI FCS GX tool config --> + <table name="ncbi_fcs_gx_config" comment_char="#"> + <columns>value, name, use_source_manifest, phone_home, phone_home_label, node_cache_dir</columns> + <file path="tool-data/ncbi_fcs_gx_config.tsv" /> + </table> + <!-- Location of NCBI FCS GX databases --> + <table name="ncbi_fcs_gx_databases" comment_char="#"> + <columns>value, source_manifest, name</columns> + <file path="tool-data/ncbi_fcs_gx_databases.loc" /> + </table> + <!-- NCBI FCS GX divisions --> + <table name="ncbi_fcs_gx_divisions" comment_char="#"> + <columns>value, tag, name</columns> + <file path="tool-data/ncbi_fcs_gx_divisions.tsv" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Jan 12 22:11:39 2024 +0000 @@ -0,0 +1,17 @@ +<tables> + <!-- Location of NCBI FCS GX tool config --> + <table name="ncbi_fcs_gx_config" comment_char="#"> + <columns>value, name, use_source_manifest, phone_home, phone_home_label, node_cache_dir</columns> + <file path="${__HERE__}/test-data/ncbi_fcs_gx_config.tsv" /> + </table> + <!-- Location of NCBI FCS GX databases --> + <table name="ncbi_fcs_gx_databases" comment_char="#"> + <columns>value, source_manifest, name</columns> + <file path="${__HERE__}/test-data/ncbi_fcs_gx_databases.loc" /> + </table> + <!-- NCBI FCS GX divisions --> + <table name="ncbi_fcs_gx_divisions" comment_char="#"> + <columns>value, tag, name</columns> + <file path="${__HERE__}/test-data/ncbi_fcs_gx_divisions.tsv" /> + </table> +</tables>