Mercurial > repos > ufz > eukcc_single
comparison eukcc_single.xml @ 0:65d952c59d8b draft default tip
planemo upload for repository https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/eukcc commit ea26eabce05391af21e0919ac5309d23396960e3
| author | ufz |
|---|---|
| date | Fri, 25 Jul 2025 10:54:22 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:65d952c59d8b |
|---|---|
| 1 <tool id="eukcc_single" name="EukCC" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.0" license="MIT"> | |
| 2 <description>estimate completeness and contamination of a novel eukaryotic MAG</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <xrefs> | |
| 7 <xref type="bio.tools">eukcc</xref> | |
| 8 </xrefs> | |
| 9 <expand macro="requirements"/> | |
| 10 <expand macro="version_command"/> | |
| 11 <command detect_errors="exit_code"><![CDATA[ | |
| 12 #import re | |
| 13 #set $identifier= re.sub(r'[^\w\-.]', '_', $fasta.element_identifier) | |
| 14 ln -s '$fasta' '$identifier' && | |
| 15 mkdir output/ && | |
| 16 eukcc single | |
| 17 --out output/ | |
| 18 --db '$db.fields.path' | |
| 19 --threads "\${GALAXY_SLOTS:-1}" | |
| 20 ## --threads_epa THREADS_EPA | |
| 21 ## Number of threads to use for epa-ng, recommended: 1 (Default: 1) | |
| 22 '$identifier' | |
| 23 $sequence_type | |
| 24 #if str($advanced.taxids) != "" | |
| 25 --taxids $advanced.taxids | |
| 26 #end if | |
| 27 #if $advanced.genomes | |
| 28 --genomes | |
| 29 #for $genome in $advanced.genomes | |
| 30 '$genome' | |
| 31 #end for | |
| 32 #end if | |
| 33 --set_size $advanced.set_size | |
| 34 #if $advanced.use_placement | |
| 35 --use_placement '$advanced.use_placement' | |
| 36 #end if | |
| 37 --set_number_species $advanced.set_number_species | |
| 38 --marker_prevalence $advanced.marker_prevalence | |
| 39 --max_set_size $advanced.max_set_size | |
| 40 $advanced.marker_gene_selection | |
| 41 $advanced.use_ncbi_tree | |
| 42 ## --gmes Use GeneMark-ES instead of metaeuk (much slower) (default: False) | |
| 43 ## --ignore_tree Advanced option, mainly for debugging. Can ignore the tree if genomes are knwon via taxids for example | |
| 44 $advanced.simple | |
| 45 --clade $advanced.clade | |
| 46 ## --rerun, -r Rerun and remove any previously computed data in the target folder | |
| 47 $advanced.no_dynamic_root | |
| 48 $advanced.extra | |
| 49 ## remove header and path to job working dir from output | |
| 50 && tail -n +2 output/eukcc.csv | sed "s|\$(pwd)/\?||" > '$eukcc' | |
| 51 #if $advanced.extra | |
| 52 && gzip -d -c output/scmg_marker_table.csv.gz | tail -n +2 > '$scmg_marker_table' | |
| 53 #end if | |
| 54 ]]></command> | |
| 55 <inputs> | |
| 56 <param name="fasta" type="data" format="fasta" label="A single bin" help="Estimate quality of this bin"/> | |
| 57 <param argument="--db" type="select" label="Reference data"> | |
| 58 <options from_data_table="eukcc"> | |
| 59 <validator type="no_options" message="Built-in reference is not available. Contact the Galaxy Admin" /> | |
| 60 </options> | |
| 61 </param> | |
| 62 <param name="sequence_type" type="select" label="Sequence type"> | |
| 63 <option value="">Auto</option> | |
| 64 <option value="--DNA">DNA</option> | |
| 65 <option value="--AA">AA</option> | |
| 66 </param> | |
| 67 <section name="advanced" title="Advanced options" expanded="false"> | |
| 68 <param argument="--taxids" type="text" label="Taxids to use as set starting point"> | |
| 69 <validator type="regex" message="Must be a space separated list of tax IDs">^[0-9 ]*$</validator> | |
| 70 </param> | |
| 71 <param argument="--genomes" type="data" format="fasta" optional="true" multiple="true" label="Genome files to base a SCMG set upon"/> | |
| 72 <param argument="--set_size" type="integer" min="0" value="20" label="Minimal number of marker genes to use" help="" /> | |
| 73 <param argument="--use_placement" type="data" format="csv" optional="true" label="Previous result" help="to use exact same marker gene set" /> | |
| 74 <param argument="--set_number_species" type="integer" min="1" value="3" label="Minimal number of species to define a set" help="" /> | |
| 75 <param argument="--marker_prevalence" type="float" min="0" max="100" value="95" label="Percentage of species in which markers should be found" help="" /> | |
| 76 <param argument="--max_set_size" type="integer" min="0" value="500" label="Maximal number of marker genes used" help="set to 0 to include all possible marker genes" /> | |
| 77 <param name="marker_gene_selection" type="select" label="Marker gene selection method" help=""> | |
| 78 <option value="--select_best_guess">Use best guess to select marker gene set</option> | |
| 79 <option value="--select_species">Use species count to select best marker gene set</option> | |
| 80 </param> | |
| 81 <param argument="--use_ncbi_tree" type="boolean" truevalue="--use_ncbi_tree" falsevalue="" checked="false" label="Use NCBI tree" help="Instead of using the EukCC phylogenetic tree, rely on NCBI taxids" /> | |
| 82 <param argument="--simple" type="boolean" truevalue="--simple" falsevalue="" checked="false" label="Use global DB instead of clade specific DBs" help="faster, not suitable for protozoa" /> | |
| 83 <param argument="--clade" type="select" label="Define clade as base"> | |
| 84 <option value="base">Root</option> | |
| 85 <option value="fungi">Fungi</option> | |
| 86 <option value="protozoa">Protozoa</option> | |
| 87 <option value="plants">Plants</option> | |
| 88 </param> | |
| 89 <param argument="--no_dynamic_root" type="boolean" truevalue="" falsevalue="--no_dynamic_root" checked="false" label="re-root tree dynamically" help="Disable for best set detection" /> | |
| 90 <param argument="--extra" type="boolean" truevalue="--extra" falsevalue="" checked="false" label="Produce extra outputs" /> | |
| 91 </section> | |
| 92 </inputs> | |
| 93 <outputs> | |
| 94 <data name="eukcc" format="tabular"> | |
| 95 <actions> | |
| 96 <action type="metadata" name="column_names" default="fasta,completeness,contamination,ncbi_lng"/> | |
| 97 </actions> | |
| 98 </data> | |
| 99 <data name="scmg_marker_table" format="tabular" label="${tool.name} on ${on_string}: SCMG marker table"> | |
| 100 <filter>advanced['extra']</filter> | |
| 101 <actions> | |
| 102 <action type="metadata" name="column_names" default="target,query,bitscore,evalue,expected_GA"/> | |
| 103 </actions> | |
| 104 </data> | |
| 105 </outputs> | |
| 106 <tests> | |
| 107 <!-- reference data to large for test in CI. Download locally with test-data.sh to run tests. | |
| 108 <test expect_num_outputs="1"> | |
| 109 <param name="fasta" value="10000_lines_GCA_903798045.1_TARA_EukCC_1_genomic.fna"/> | |
| 110 <param name="db" value="1.2"/> | |
| 111 <output name="eukcc"> | |
| 112 <assert_contents> | |
| 113 <has_text text="GCA_903798045.1"/> | |
| 114 <has_text text="41874"/> <!\-\- 41874 = Bathycoccus \-\-> | |
| 115 <has_n_lines n="1"/> | |
| 116 <has_n_columns n="4"/> | |
| 117 </assert_contents> | |
| 118 </output> | |
| 119 </test> | |
| 120 <test expect_num_outputs="2"> | |
| 121 <param name="fasta" value="10000_lines_GCA_903798045.1_TARA_EukCC_1_genomic.fna"/> | |
| 122 <param name="db" value="1.2"/> | |
| 123 <section name="advanced"> | |
| 124 <param name="extra" value="true"/> | |
| 125 </section> | |
| 126 <output name="eukcc"> | |
| 127 <assert_contents> | |
| 128 <has_text text="GCA_903798045.1"/> | |
| 129 <has_n_lines n="1"/> | |
| 130 <has_n_columns n="4"/> | |
| 131 </assert_contents> | |
| 132 </output> | |
| 133 <output name="scmg_marker_table"> | |
| 134 <assert_contents> | |
| 135 <has_n_lines n="314"/> | |
| 136 <has_n_columns n="5"/> | |
| 137 </assert_contents> | |
| 138 </output> | |
| 139 </test> --> | |
| 140 </tests> | |
| 141 <help><![CDATA[ | |
| 142 | |
| 143 .. class:: infomark | |
| 144 | |
| 145 **What it does** | |
| 146 | |
| 147 It consumes bins in FASTA format and outputs a table with estimated completeness, contamination and taxonomy lineage (given as dash separated list of TaxIDs). | |
| 148 | |
| 149 You should not use EukCC on already published genomes, if they have used during training of the marker gene sets. | |
| 150 If you want to make sure, you can see all used accessions in the database file db_base/backbone/base_taxinfo.csv. | |
| 151 | |
| 152 ]]></help> | |
| 153 <citations> | |
| 154 <citation type="doi">10.1186/s13059-020-02155-4</citation> | |
| 155 </citations> | |
| 156 </tool> |
