comparison eukcc_single.xml @ 0:65d952c59d8b draft default tip

planemo upload for repository https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/eukcc commit ea26eabce05391af21e0919ac5309d23396960e3
author ufz
date Fri, 25 Jul 2025 10:54:22 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:65d952c59d8b
1 <tool id="eukcc_single" name="EukCC" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.0" license="MIT">
2 <description>estimate completeness and contamination of a novel eukaryotic MAG</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <xrefs>
7 <xref type="bio.tools">eukcc</xref>
8 </xrefs>
9 <expand macro="requirements"/>
10 <expand macro="version_command"/>
11 <command detect_errors="exit_code"><![CDATA[
12 #import re
13 #set $identifier= re.sub(r'[^\w\-.]', '_', $fasta.element_identifier)
14 ln -s '$fasta' '$identifier' &&
15 mkdir output/ &&
16 eukcc single
17 --out output/
18 --db '$db.fields.path'
19 --threads "\${GALAXY_SLOTS:-1}"
20 ## --threads_epa THREADS_EPA
21 ## Number of threads to use for epa-ng, recommended: 1 (Default: 1)
22 '$identifier'
23 $sequence_type
24 #if str($advanced.taxids) != ""
25 --taxids $advanced.taxids
26 #end if
27 #if $advanced.genomes
28 --genomes
29 #for $genome in $advanced.genomes
30 '$genome'
31 #end for
32 #end if
33 --set_size $advanced.set_size
34 #if $advanced.use_placement
35 --use_placement '$advanced.use_placement'
36 #end if
37 --set_number_species $advanced.set_number_species
38 --marker_prevalence $advanced.marker_prevalence
39 --max_set_size $advanced.max_set_size
40 $advanced.marker_gene_selection
41 $advanced.use_ncbi_tree
42 ## --gmes Use GeneMark-ES instead of metaeuk (much slower) (default: False)
43 ## --ignore_tree Advanced option, mainly for debugging. Can ignore the tree if genomes are knwon via taxids for example
44 $advanced.simple
45 --clade $advanced.clade
46 ## --rerun, -r Rerun and remove any previously computed data in the target folder
47 $advanced.no_dynamic_root
48 $advanced.extra
49 ## remove header and path to job working dir from output
50 && tail -n +2 output/eukcc.csv | sed "s|\$(pwd)/\?||" > '$eukcc'
51 #if $advanced.extra
52 && gzip -d -c output/scmg_marker_table.csv.gz | tail -n +2 > '$scmg_marker_table'
53 #end if
54 ]]></command>
55 <inputs>
56 <param name="fasta" type="data" format="fasta" label="A single bin" help="Estimate quality of this bin"/>
57 <param argument="--db" type="select" label="Reference data">
58 <options from_data_table="eukcc">
59 <validator type="no_options" message="Built-in reference is not available. Contact the Galaxy Admin" />
60 </options>
61 </param>
62 <param name="sequence_type" type="select" label="Sequence type">
63 <option value="">Auto</option>
64 <option value="--DNA">DNA</option>
65 <option value="--AA">AA</option>
66 </param>
67 <section name="advanced" title="Advanced options" expanded="false">
68 <param argument="--taxids" type="text" label="Taxids to use as set starting point">
69 <validator type="regex" message="Must be a space separated list of tax IDs">^[0-9 ]*$</validator>
70 </param>
71 <param argument="--genomes" type="data" format="fasta" optional="true" multiple="true" label="Genome files to base a SCMG set upon"/>
72 <param argument="--set_size" type="integer" min="0" value="20" label="Minimal number of marker genes to use" help="" />
73 <param argument="--use_placement" type="data" format="csv" optional="true" label="Previous result" help="to use exact same marker gene set" />
74 <param argument="--set_number_species" type="integer" min="1" value="3" label="Minimal number of species to define a set" help="" />
75 <param argument="--marker_prevalence" type="float" min="0" max="100" value="95" label="Percentage of species in which markers should be found" help="" />
76 <param argument="--max_set_size" type="integer" min="0" value="500" label="Maximal number of marker genes used" help="set to 0 to include all possible marker genes" />
77 <param name="marker_gene_selection" type="select" label="Marker gene selection method" help="">
78 <option value="--select_best_guess">Use best guess to select marker gene set</option>
79 <option value="--select_species">Use species count to select best marker gene set</option>
80 </param>
81 <param argument="--use_ncbi_tree" type="boolean" truevalue="--use_ncbi_tree" falsevalue="" checked="false" label="Use NCBI tree" help="Instead of using the EukCC phylogenetic tree, rely on NCBI taxids" />
82 <param argument="--simple" type="boolean" truevalue="--simple" falsevalue="" checked="false" label="Use global DB instead of clade specific DBs" help="faster, not suitable for protozoa" />
83 <param argument="--clade" type="select" label="Define clade as base">
84 <option value="base">Root</option>
85 <option value="fungi">Fungi</option>
86 <option value="protozoa">Protozoa</option>
87 <option value="plants">Plants</option>
88 </param>
89 <param argument="--no_dynamic_root" type="boolean" truevalue="" falsevalue="--no_dynamic_root" checked="false" label="re-root tree dynamically" help="Disable for best set detection" />
90 <param argument="--extra" type="boolean" truevalue="--extra" falsevalue="" checked="false" label="Produce extra outputs" />
91 </section>
92 </inputs>
93 <outputs>
94 <data name="eukcc" format="tabular">
95 <actions>
96 <action type="metadata" name="column_names" default="fasta,completeness,contamination,ncbi_lng"/>
97 </actions>
98 </data>
99 <data name="scmg_marker_table" format="tabular" label="${tool.name} on ${on_string}: SCMG marker table">
100 <filter>advanced['extra']</filter>
101 <actions>
102 <action type="metadata" name="column_names" default="target,query,bitscore,evalue,expected_GA"/>
103 </actions>
104 </data>
105 </outputs>
106 <tests>
107 <!-- reference data to large for test in CI. Download locally with test-data.sh to run tests.
108 <test expect_num_outputs="1">
109 <param name="fasta" value="10000_lines_GCA_903798045.1_TARA_EukCC_1_genomic.fna"/>
110 <param name="db" value="1.2"/>
111 <output name="eukcc">
112 <assert_contents>
113 <has_text text="GCA_903798045.1"/>
114 <has_text text="41874"/> <!\-\- 41874 = Bathycoccus \-\->
115 <has_n_lines n="1"/>
116 <has_n_columns n="4"/>
117 </assert_contents>
118 </output>
119 </test>
120 <test expect_num_outputs="2">
121 <param name="fasta" value="10000_lines_GCA_903798045.1_TARA_EukCC_1_genomic.fna"/>
122 <param name="db" value="1.2"/>
123 <section name="advanced">
124 <param name="extra" value="true"/>
125 </section>
126 <output name="eukcc">
127 <assert_contents>
128 <has_text text="GCA_903798045.1"/>
129 <has_n_lines n="1"/>
130 <has_n_columns n="4"/>
131 </assert_contents>
132 </output>
133 <output name="scmg_marker_table">
134 <assert_contents>
135 <has_n_lines n="314"/>
136 <has_n_columns n="5"/>
137 </assert_contents>
138 </output>
139 </test> -->
140 </tests>
141 <help><![CDATA[
142
143 .. class:: infomark
144
145 **What it does**
146
147 It consumes bins in FASTA format and outputs a table with estimated completeness, contamination and taxonomy lineage (given as dash separated list of TaxIDs).
148
149 You should not use EukCC on already published genomes, if they have used during training of the marker gene sets.
150 If you want to make sure, you can see all used accessions in the database file db_base/backbone/base_taxinfo.csv.
151
152 ]]></help>
153 <citations>
154 <citation type="doi">10.1186/s13059-020-02155-4</citation>
155 </citations>
156 </tool>