Mercurial > repos > iuc > ncbi_fcs_gx
comparison ncbi_fcs_gx.xml @ 0:3cdb96f2855d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ncbi_fcs_gx commit 4a6561ed00e004260be3f3c29d81e814c60e20af
author | iuc |
---|---|
date | Fri, 12 Jan 2024 22:11:39 +0000 |
parents | |
children | 49f8eae39606 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3cdb96f2855d |
---|---|
1 <tool id="ncbi_fcs_gx" name="NCBI FCS GX" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description>detects contamination from foreign organisms in genome sequences</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="edam_ontology"/> | |
7 <expand macro="requirements"/> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 GX_NUM_CORES=\${GALAXY_SLOTS:-2} | |
10 #if $mode.mode_selector == "screen" | |
11 ## copy data to local storage | |
12 #set manifest_pathname = $mode.screen_adv.database.fields.name | |
13 #if $mode.config_tag.fields.use_source_manifest == "1" | |
14 #set manifest_pathname = $mode.screen_adv.database.fields.source_manifest | |
15 #end if | |
16 mkdir -p '$mode.config_tag.fields.node_cache_dir' && | |
17 sync_files.py get --mft '$manifest_pathname' --dir '$mode.config_tag.fields.node_cache_dir' > /dev/null 2>&1 && | |
18 ## run gx | |
19 run_gx.py | |
20 #if $mode.config_tag.fields.phone_home == "1" | |
21 --phone-home-label '$mode.config_tag.fields.phone_home_label' | |
22 #end if | |
23 --fasta '$mode.fasta' | |
24 #if $mode.id.id_selector == "gx_div" | |
25 --tax-id '1' | |
26 --div '$mode.id.div' | |
27 #else | |
28 --tax-id '$mode.id.tax_id' | |
29 #end if | |
30 #if $mode.species != "" | |
31 --species '$mode.species' | |
32 #end if | |
33 --split-fasta '$mode.screen_adv.split_fasta' | |
34 #if $mode.screen_adv.div: | |
35 --div '$mode.screen_adv.div' | |
36 #end if | |
37 --gx-db '$mode.config_tag.fields.node_cache_dir' | |
38 --out-basename output | |
39 --action-report true | |
40 --generate-logfile false | |
41 #elif $mode.mode_selector == "clean" | |
42 ## run gx | |
43 gx clean-genome | |
44 --input '$mode.input' | |
45 --action-report '$mode.action_report' | |
46 --contam-fasta-out 'contam.fa' | |
47 --min-seq-len '$mode.min_seq_len' | |
48 --output 'clean.fa' | |
49 #end if | |
50 ]]></command> | |
51 <environment_variables> | |
52 <environment_variable name="GX_ALIGN_EXCLUDE_TAXA">$getVar('mode.screen_adv.gx_align_exclude_taxa', '')</environment_variable> | |
53 <environment_variable name="GX_EXTRA_CONTAM_DIVS"><![CDATA[#echo ','.join($getVar('mode.screen_adv.gx_extra_contam_divs', []))]]></environment_variable> | |
54 </environment_variables> | |
55 <inputs> | |
56 <conditional name="mode"> | |
57 <param name="mode_selector" type="select" label="Choose the mode"> | |
58 <option value="screen" selected="true">Screen genome</option> | |
59 <option value="clean">Clean genome</option> | |
60 </param> | |
61 <when value="screen"> | |
62 <!-- value, name, use_source_manifest, phone_home, phone_home_label, node_cache_dir --> | |
63 <param name="config_tag" type="select" label="Database"> | |
64 <options from_data_table="ncbi_fcs_gx_config"> | |
65 <filter type="sort_by" name="sorted_description" column="1"/> | |
66 </options> | |
67 <validator message="No database is available" type="no_options"/> | |
68 </param> | |
69 <param argument="--fasta" type="data" format="fasta" label="Input file (Fasta file)" help="To detect contamination from foreign organisms, a genome assembly in a fasta file."/> | |
70 <conditional name="id"> | |
71 <param name="id_selector" type="select" label="Taxonomy entry"> | |
72 <option value="gx_div" selected="true">GX Division</option> | |
73 <option value="ncbi_tax">NCBI Taxonomic identifier</option> | |
74 </param> | |
75 <when value="gx_div"> | |
76 <param argument="--div" type="select"> | |
77 <options from_data_table="ncbi_fcs_gx_divisions"> | |
78 <filter type="param_value" ref="config_tag" column="1" /> | |
79 <filter type="sort_by" name="sorted_description" column="2" /> | |
80 </options> | |
81 <validator message="No GX Divisions are available" type="no_options"/> | |
82 </param> | |
83 </when> | |
84 <when value="ncbi_tax"> | |
85 <!-- https://www.ncbi.nlm.nih.gov/taxonomy --> | |
86 <param argument="--tax-id" type="text" label="Taxonomic identifier" help="The appropriate tax-id for your genome assembly. The appropriate tax-id for an organism can be retrieved from the NCBI Taxonomy website."/> | |
87 </when> | |
88 </conditional> | |
89 <param argument="--species" type="text" optional="true" label="Species binomial name"/> | |
90 <section name="screen_adv" title="Advanced options"> | |
91 <!-- comma separated list of taxa to ignore in GX_ALIGN_EXCLUDE_TAXA environment variable --> | |
92 <param name="gx_align_exclude_taxa" type="text" value="" optional="true" label="Taxonomic identifier(s) to exclude" help="Multiple tax-ids may be provided as a comma-separated list."> | |
93 <validator type="regex" message="comma separated integers">^\s*\d+\s*(,\s*\d+\s*)*$</validator> | |
94 <sanitizer invalid_char=""> | |
95 <valid initial="string.digits"> | |
96 <add value=","/> | |
97 </valid> | |
98 </sanitizer> | |
99 </param> | |
100 <param name="gx_extra_contam_divs" type="select" multiple="true" optional="true" label="Additional contaminants to identify" help="Multiple gx-divisions may be selected."> | |
101 <options from_data_table="ncbi_fcs_gx_divisions"> | |
102 <filter type="param_value" ref="config_tag" column="1" /> | |
103 <filter type="sort_by" name="sorted_description" column="2" /> | |
104 </options> | |
105 <validator message="No GX Divisions are available" type="no_options"/> | |
106 </param> | |
107 <param argument="--split-fasta" type="boolean" checked="true" optional="true" label="Split fasta sequences on N-runs of length at least 10"/> | |
108 <param argument="--div" type="text" value="" optional="true" label="BLAST-div of the tax-id" help="from 'NCBI BLAST name' on taxon Info page"/> | |
109 <param name="database" type="select" label="Database location"> | |
110 <options from_data_table="ncbi_fcs_gx_databases"> | |
111 <filter type="param_value" ref="config_tag" column="0"/> | |
112 </options> | |
113 <validator message="No database location is available" type="no_options"/> | |
114 </param> | |
115 </section> | |
116 </when> | |
117 <when value="clean"> | |
118 <param argument="--input" type="data" format="fasta" label="Input file (Fasta file)" help="To detect contamination from foreign organisms, a genome assembly in a fasta file."/> | |
119 <param argument="--action-report" type="data" format="tabular" label="Select Action report"/> | |
120 <param argument="--min-seq-len" type="integer" value="200" label="Minimumm sequence length to keep"/> | |
121 </when> | |
122 </conditional> | |
123 </inputs> | |
124 <outputs> | |
125 <!-- mode == screen --> | |
126 <data name="taxonomy_report" format="tabular" from_work_dir="output.taxonomy.rpt" label="${tool.name} on ${on_string}: Taxonomy report"> | |
127 <filter>mode['mode_selector'] == 'screen'</filter> | |
128 </data> | |
129 <data name="action_report" format="tabular" from_work_dir="output.fcs_gx_report.txt" label="${tool.name} on ${on_string}: Action report"> | |
130 <filter>mode['mode_selector'] == 'screen'</filter> | |
131 </data> | |
132 <!-- mode == clean --> | |
133 <data name="contam_fasta" format="fasta" from_work_dir="contam.fa" label="${tool.name} on ${on_string}: Fasta for EXCLUDE entries"> | |
134 <filter>mode['mode_selector'] == 'clean'</filter> | |
135 </data> | |
136 <data name="clean_fasta" format="fasta" from_work_dir="clean.fa" metadata_source="mode.input" label="${tool.name} on ${on_string}: Cleaned Fasta"> | |
137 <filter>mode['mode_selector'] == 'clean'</filter> | |
138 </data> | |
139 </outputs> | |
140 <tests> | |
141 <test expect_num_outputs="2"> | |
142 <param name="mode_selector" value="screen"/> | |
143 <param name="config_tag" value="test-only" /> | |
144 <param name="id_selector" value="ncbi_tax"/> | |
145 <param name="fasta" value="fcsgx_test.fa.gz" ftype="fasta"/> | |
146 <param name="tax_id" value="6973"/> | |
147 <output name="taxonomy_report" file="output.taxonomy.rpt" compare="diff" lines_diff="2" /> | |
148 <output name="action_report" file="output.fcs_gx_report.txt" compare="diff" lines_diff="2" /> | |
149 </test> | |
150 <test expect_num_outputs="2"> | |
151 <param name="mode_selector" value="clean"/> | |
152 <param name="id_selector" value="ncbi_tax"/> | |
153 <param name="input" value="fcsgx_test.fa.gz" ftype="fasta"/> | |
154 <param name="action_report" value="output.fcs_gx_report.txt" ftype="tabular"/> | |
155 <output name="contam_fasta" decompress="true" file="output.contam.fa.gz" ftype="fasta" /> | |
156 <output name="clean_fasta" decompress="true" file="output.clean.fa.gz" ftype="fasta" /> | |
157 </test> | |
158 </tests> | |
159 <help><![CDATA[ | |
160 FCS-GX detects contamination from foreign organisms in genome sequences using the genome cross-species aligner (GX). The FCS-GX executable retrieves a Docker or Singularity container and runs a pipeline to align sequences to a large database of NCBI genomes through modified k-mer seeds and assign a most likely taxonomic division. | |
161 | |
162 FCS-GX classifies sequences as contaminant when their taxonomic assignment is different from the user provided taxonomic identifier. A contamination summary provides an overview of observed contaminant divisions, counts, and total sizes, and an action report provides details and recommended actions for each problematic sequence. | |
163 | |
164 https://github.com/ncbi/fcs/wiki/FCS-GX | |
165 ]]></help> | |
166 <expand macro="citations"/> | |
167 </tool> |