eukcc_single: eukcc_single.xml comparison

comparison eukcc_single.xml @ 0:65d952c59d8b draft default tip

planemo upload for repository https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/eukcc commit ea26eabce05391af21e0919ac5309d23396960e3

author	ufz
date	Fri, 25 Jul 2025 10:54:22 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:65d952c59d8b
+<tool id="eukcc_single" name="EukCC" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.0" license="MIT">
+<description>estimate completeness and contamination of a novel eukaryotic MAG</description>
+<macros>
+<import>macros.xml</import>
+</macros>
+<xrefs>
+<xref type="bio.tools">eukcc</xref>
+</xrefs>
+<expand macro="requirements"/>
+<expand macro="version_command"/>
+<command detect_errors="exit_code"><![CDATA[
+#import re
+#set $identifier= re.sub(r'[^\w\-.]', '_', $fasta.element_identifier)
+ln -s '$fasta' '$identifier'  &&
+mkdir output/ &&
+eukcc single
+--out output/
+--db '$db.fields.path'
+--threads "\${GALAXY_SLOTS:-1}"
+## --threads_epa THREADS_EPA
+##     Number of threads to use for epa-ng, recommended: 1 (Default: 1)
+'$identifier'
+$sequence_type
+#if str($advanced.taxids) != ""
+--taxids $advanced.taxids
+#end if
+#if $advanced.genomes
+--genomes
+#for $genome in $advanced.genomes
+'$genome'
+#end for
+#end if
+--set_size $advanced.set_size
+#if $advanced.use_placement
+--use_placement '$advanced.use_placement'
+#end if
+--set_number_species $advanced.set_number_species
+--marker_prevalence $advanced.marker_prevalence
+--max_set_size $advanced.max_set_size
+$advanced.marker_gene_selection
+$advanced.use_ncbi_tree
+## --gmes                Use GeneMark-ES instead of metaeuk (much slower) (default: False)
+## --ignore_tree         Advanced option, mainly for debugging. Can ignore the tree if genomes are knwon via taxids for example
+$advanced.simple
+--clade $advanced.clade
+## --rerun, -r           Rerun and remove any previously computed data in the target folder
+$advanced.no_dynamic_root
+$advanced.extra
+## remove header and path to job working dir from output
+&& tail -n +2 output/eukcc.csv | sed "s|\$(pwd)/\?||"  > '$eukcc'
+#if $advanced.extra
+&& gzip -d -c output/scmg_marker_table.csv.gz | tail -n +2 > '$scmg_marker_table'
+#end if
+]]></command>
+<inputs>
+<param name="fasta" type="data" format="fasta" label="A single bin" help="Estimate quality of this bin"/>
+<param argument="--db" type="select" label="Reference data">
+<options from_data_table="eukcc">
+<validator type="no_options" message="Built-in reference is not available. Contact the Galaxy Admin" />
+</options>
+</param>
+<param name="sequence_type" type="select" label="Sequence type">
+<option value="">Auto</option>
+<option value="--DNA">DNA</option>
+<option value="--AA">AA</option>
+</param>
+<section name="advanced" title="Advanced options" expanded="false">
+<param argument="--taxids" type="text" label="Taxids to use as set starting point">
+<validator type="regex" message="Must be a space separated list of tax IDs">^[0-9 ]*$</validator>
+</param>
+<param argument="--genomes" type="data" format="fasta" optional="true" multiple="true" label="Genome files to base a SCMG set upon"/>
+<param argument="--set_size" type="integer" min="0" value="20" label="Minimal number of marker genes to use" help="" />
+<param argument="--use_placement" type="data" format="csv" optional="true" label="Previous result" help="to use exact same marker gene set" />
+<param argument="--set_number_species" type="integer" min="1" value="3" label="Minimal number of species to define a set" help="" />
+<param argument="--marker_prevalence" type="float" min="0" max="100" value="95" label="Percentage of species in which markers should be found" help="" />
+<param argument="--max_set_size" type="integer" min="0" value="500" label="Maximal number of marker genes used" help="set to 0 to include all possible marker genes" />
+<param name="marker_gene_selection" type="select" label="Marker gene selection method" help="">
+<option value="--select_best_guess">Use best guess to select marker gene set</option>
+<option value="--select_species">Use species count to select best marker gene set</option>
+</param>
+<param argument="--use_ncbi_tree" type="boolean" truevalue="--use_ncbi_tree" falsevalue="" checked="false" label="Use NCBI tree" help="Instead of using the EukCC phylogenetic tree, rely on NCBI taxids" />
+<param argument="--simple" type="boolean" truevalue="--simple" falsevalue="" checked="false" label="Use global DB instead of clade specific DBs" help="faster, not suitable for protozoa" />
+<param argument="--clade" type="select" label="Define clade as base">
+<option value="base">Root</option>
+<option value="fungi">Fungi</option>
+<option value="protozoa">Protozoa</option>
+<option value="plants">Plants</option>
+</param>
+<param argument="--no_dynamic_root" type="boolean" truevalue="" falsevalue="--no_dynamic_root" checked="false" label="re-root tree dynamically" help="Disable for best set detection" />
+<param argument="--extra" type="boolean" truevalue="--extra" falsevalue="" checked="false" label="Produce extra outputs" />
+</section>
+</inputs>
+<outputs>
+<data name="eukcc" format="tabular">
+<actions>
+<action type="metadata" name="column_names" default="fasta,completeness,contamination,ncbi_lng"/>
+</actions>
+</data>
+<data name="scmg_marker_table" format="tabular" label="${tool.name} on ${on_string}: SCMG marker table">
+<filter>advanced['extra']</filter>
+<actions>
+<action type="metadata" name="column_names" default="target,query,bitscore,evalue,expected_GA"/>
+</actions>
+</data>
+</outputs>
+<tests>
+<!-- reference data to large for test in CI. Download locally with test-data.sh to run tests.
+<test expect_num_outputs="1">
+<param name="fasta" value="10000_lines_GCA_903798045.1_TARA_EukCC_1_genomic.fna"/>
+<param name="db" value="1.2"/>
+<output name="eukcc">
+<assert_contents>
+<has_text text="GCA_903798045.1"/>
+<has_text text="41874"/> <!\-\- 41874 = Bathycoccus \-\->
+<has_n_lines n="1"/>
+<has_n_columns n="4"/>
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="2">
+<param name="fasta" value="10000_lines_GCA_903798045.1_TARA_EukCC_1_genomic.fna"/>
+<param name="db" value="1.2"/>
+<section name="advanced">
+<param name="extra" value="true"/>
+</section>
+<output name="eukcc">
+<assert_contents>
+<has_text text="GCA_903798045.1"/>
+<has_n_lines n="1"/>
+<has_n_columns n="4"/>
+</assert_contents>
+</output>
+<output name="scmg_marker_table">
+<assert_contents>
+<has_n_lines n="314"/>
+<has_n_columns n="5"/>
+</assert_contents>
+</output>
+</test> -->
+</tests>
+<help><![CDATA[
+.. class:: infomark
+**What it does**
+It consumes bins in FASTA format and outputs a table with estimated completeness, contamination and taxonomy lineage (given as dash separated list of TaxIDs).
+You should not use EukCC on already published genomes, if they have used during training of the marker gene sets.
+If you want to make sure, you can see all used accessions in the database file db_base/backbone/base_taxinfo.csv.
+]]></help>
+<citations>
+<citation type="doi">10.1186/s13059-020-02155-4</citation>
+</citations>
+</tool>

Mercurial > repos > ufz > eukcc_single

comparison eukcc_single.xml @ 0:65d952c59d8b draft default tip