comparison biobox_add_taxid.xml @ 4:2e0af1e2d487 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/amber/ commit 09a7f1234811e6104671c0fbb51a515e13815041
author iuc
date Sat, 08 Feb 2025 11:33:03 +0000
parents 450a61fc097f
children
comparison
equal deleted inserted replaced
3:8d0b2def5e65 4:2e0af1e2d487
1 <tool id="biobox_add_taxid" name="Biobox add taxid" version="@SCRIPT_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> 1 <tool id="biobox_add_taxid" name="Biobox add taxid" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Add taxid output from BAT or GTDB to biobox binning data</description> 2 <description>Add taxid output from BAT or GTDB to biobox binning data</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <token name="@TOOL_VERSION@">1.2</token>
5 <token name="@SCRIPT_VERSION@">0.6</token> 5 <token name="@VERSION_SUFFIX@">0</token>
6 <token name="@PROFILE@">24.1</token>
6 </macros> 7 </macros>
7 <requirements> 8 <requirements>
8 <requirement type="package" version="@SCRIPT_VERSION@">biobox_add_taxid</requirement> 9 <requirement type="package" version="@TOOL_VERSION@">biobox_add_taxid</requirement>
9 </requirements> 10 </requirements>
10 <command detect_errors="exit_code"> 11 <command detect_errors="exit_code">
11 <![CDATA[ 12 <![CDATA[
12 13
13 mkdir -p input taxonkit gtdb_to_taxdump && 14 ln -s '$biobox_file' 'biobox.tsv' &&
14 15
15 ln -s '${biobox_file}' '$biobox_file.element_identifier' && 16 #if $input.is_select == 'contig':
17 ln -s '$contig2taxid' 'contig.tsv' &&
18 #else:
19 ln -s '$binid2taxid' 'bin.tsv' &&
20 #end if
16 21
17 #for $i, $f in enumerate($tool_type.input): 22 biobox_add_taxid.py
18 ln -s '$f' 'input/${i}.tsv' && 23 'biobox.tsv'
19 #end for 24 #if $input.is_select == 'contig':
25 -c 'contig.tsv'
26 #else:
27 -b 'bin.tsv'
28 #end if
29 -k_c ${key_col}
30 -t_c ${taxid_col}
20 31
21 #if $tool_type.is_select == 'GTDB': 32 &&
22 #for $i, $f in enumerate($gtdb_to_taxdump):
23 ln -s '$f' 'gtdb_to_taxdump/gtdb_to_taxdump_${i}.tsv' &&
24 #end for
25 #for $i, $f in enumerate($taxonkit):
26 ln -s '$f' 'taxonkit/taxonkit_${i}.tsv' &&
27 #end for
28 #end if
29 33
30 biobox_add_taxid.py 34 cp 'modified_biobox_file.tsv' '$output'
31 '$biobox_file.element_identifier'
32 '$tool_type.is_select'
33 'input'
34 #if $tool_type.is_select == 'GTDB':
35 -g 'gtdb_to_taxdump'
36 -t 'taxonkit'
37 -c $column
38 #end if
39 &&
40
41 #if $tool_type.is_select == 'GTDB':
42 cp *_add_taxid_GTDB* $output
43 #else:
44 cp *_add_taxid_BAT* $output
45 #end if
46 35
47 ]]> 36 ]]>
48 </command> 37 </command>
49 <inputs> 38 <inputs>
50 <param name="biobox_file" type="data" format="tabular" label="CAMI amber biobox file input" help="Input the CAMI amber biobox file here which are corespond with the biner and the BAT/GTDB output which did used the biner as input! "/> 39 <param name="biobox_file" type="data" format="tabular" label="Input biobox file"
51 <conditional name="tool_type"> 40 help="This file can be generated with the utility tool from CAMI AMBER named convert to biobox"/>
52 <param name="is_select" type="select" label="Select the tool which output should be used here" help="Select BAT when you use the bin2classifier file(s) or select GTDB when using the summary file(s)"> 41 <conditional name="input">
53 <option value="BAT">BAT</option> 42 <param name="is_select" type="select" label="Select typ of input">
54 <option value="GTDB">GTDB</option> 43 <option value="contig">ContigID2TaxID</option>
44 <option value="bin">BinID2TaxID</option>
55 </param> 45 </param>
56 <when value="GTDB"> 46 <when value="contig">
57 <param argument="--gtdb_to_taxdump" type="data" multiple="true" format="tabular" label="Input the output from gtdb_to_taxdump here" help="Use the output from gtdb_to_taxdump here since we need the mapped names from GTDB to NCBI to get the arcoding taxids from NCBI"/> 47 <param argument="--contig2taxid" type="data" format="tabular" label="Input ConitgID2TaxID file"
58 <param argument="--taxonkit" type="data" format="tabular" multiple="true" label="Input the output from Taxonkit here" help="Use the output from Taxonkit here since the need the mapped NCBI names to the arcording taxids"/> 48 help="This input comes from Kraken2"/>
59 <param argument="--column" type="data_column" data_ref="taxonkit" label="Input the colum with the NCBI names"/> 49 <param argument="--key_col" type="data_column" data_ref="contig2taxid" label="Select the column where the ContigID are stated"/>
60 <param name="input" type="data" multiple="true" format="tabular" label="Input the GTDB-Tk summary file(s) here" 50 <param argument="--taxid_col" type="data_column" data_ref="contig2taxid" label="Select the column where the TaxID are stated"/>
61 help="Use the GTDB-Tk file(s) which are coresponding with the binning file!"/>
62 </when> 51 </when>
63 <when value="BAT"> 52 <when value="bin">
64 <param name="input" type="data" multiple="true" format="tabular" label="Input bin2classifier file(s) from BAT here" 53 <param argument="--binid2taxid" type="data" format="tabular" label="Input BinID2TaxID file"
65 help="Use the BAT file(s) which are coresponding with the binning file!"/> 54 help="This input comes from Kraken2"/>
55 <param argument="--key_col" type="data_column" data_ref="binid2taxid" label="Select the column where the BinID are stated"/>
56 <param argument="--taxid_col" type="data_column" data_ref="binid2taxid" label="Select the column where the TaxID are stated"/>
66 </when> 57 </when>
67 </conditional> 58 </conditional>
68 </inputs> 59 </inputs>
69 <outputs> 60 <outputs>
70 <data name="output" format="tabular" label="${tool.name}: BIOBOX ADD TAXID COLUMN"/> 61 <data name="output" format="tabular" label="${tool.name}: BIOBOX ADD TAXID COLUMN"/>
71 </outputs> 62 </outputs>
72 <tests> 63 <tests>
73 <test expect_num_outputs="1"> 64 <test>
74 <param name="biobox_file" ftype="tabular" value="test_biobox_file.tsv"/> 65 <param name="biobox_file" value="biobox_file.tsv" ftype="tabular"/>
75 <conditional name="tool_type"> 66 <conditional name="input">
76 <param name="is_select" value="BAT"/> 67 <param name="is_select" value="bin"/>
77 <param name="input" ftype="tabular" value="test_bat_file.tsv"/> 68 <param name="binid2taxid" value="binid2taxid.tsv" ftype="tabular"/>
69 <param name="key_col" value="1"/>
70 <param name="taxid_col" value="2"/>
78 </conditional> 71 </conditional>
79 <output name="output" file="test_biobox_file_add_taxid_bat.tsv"/> 72 <output name="output" file="modified_biobox_file_bin.tsv"/>
80 </test> 73 </test>
81 <test expect_num_outputs="1"> 74 <test>
82 <param name="biobox_file" ftype="tabular" value="test_biobox_file.tsv"/> 75 <param name="biobox_file" value="biobox_file.tsv" ftype="tabular"/>
83 <conditional name="tool_type"> 76 <conditional name="input">
84 <param name="is_select" value="GTDB"/> 77 <param name="is_select" value="contig"/>
85 <param name="gtdb_to_taxdump" ftype="tabular" value="test_gtdb_to_taxdump_file.tsv"/> 78 <param name="contig2taxid" value="kraken2.tsv" ftype="tabular"/>
86 <param name="taxonkit" ftype="tabular" value="test_taxonkit_file_1.tsv,test_taxonkit_file_2.tsv"/> 79 <param name="key_col" value="2"/>
87 <param name="column" value="1"/> 80 <param name="taxid_col" value="3"/>
88 <param name="input" ftype="tabular" value="test_gtdb_file.tsv"/>
89 </conditional> 81 </conditional>
90 <output name="output" file="test_biobox_file_add_taxid_gtdb.tsv"/> 82 <output name="output" file="modified_biobox_file_contig.tsv"/>
91 </test> 83 </test>
92 </tests> 84 </tests>
93 <help> 85 <help>
94 <![CDATA[ 86 <![CDATA[
95 87
96 **USAGE OF THIS TOOL** 88 **USAGE OF THIS TOOL**
97 89
98 This tool was desgin for using the BAT/GTDB-Tk output to expand the CAMI amber biobox file with the taxid column to access taxa evaluation with amber! 90 This tool can be used to add the TaxID column to a biobox file.
99
100 **IMPORTAND NOTE WHEN USING GTDB OUTPUT**
101
102 Befor using GTDB-Tk output you have to use 2 other tools to map the GTDB names to the NCBI names and then map the NCBI names to there taxids.
103 To map the GTDB names to the NCBI names you extract the GTDB names from the summary file and with them you can now use the gtdb_to_taxdum tool!
104 After using the gtdb_to_taxdum tool you have the extract the NCBI names here and then use the tool Taxonkit[name2taxid] to finaly get the taxids.
105 91
106 **INPUTS** 92 **INPUTS**
107 - The CAMI amber biobox file 93
108 - BAT[bin2classifier] file(s) OR GTDB-Tk[Summary] file(s) 94 - A biobox file. This file can be generated with the CAMI AMBER utility Tool named 'convert to biobox'
109 - The gtdb_to_taxdum output[WHEN USING GTDB-Tk] 95 - Either a Contig2TaxID file which is generated from Kraken2 (the classification file from Kraken2) or a BinID2TaxID which has to be done manualy
110 - The Taxonkit[name2taxid] output[WHEN USING GTDB-Tk] 96
97 Example for a BinID2TaxID file
98
99 ::
100
101 #BinID TaxID
102 test1 11056
103 test2 444944
104 ABC 888
111 105
112 **OUTPUT** 106 **OUTPUT**
113 107
114 A CAMI amber biobox file with a taxid colum 108 - A CAMI AMBER biobox file with a taxid column
109
115 ]]> 110 ]]>
116 </help> 111 </help>
117 <citations> 112 <citations>
118 <citation type="bibtex">@misc{BibEntry2024Aug, 113 <citation type="bibtex">@misc{BibEntry2024Aug,
119 title = {{biobox{$\_$}add{$\_$}taxid}}, 114 title = {{biobox{$\_$}add{$\_$}taxid}},
120 author = {Santino Faack (SantaMcCloud)}, 115 author = {Santino Faack (SantaMcCloud)},
121 journal = {GitHub}, 116 journal = {GitHub},
122 year = {2024}, 117 year = {2024},
123 month = aug, 118 month = aug,
124 note = {[Online; accessed 22. Aug. 2024]}, 119 url = {https://github.com/SantaMcCloud/biobox_add_taxid/tree/release-1.0}
125 url = {https://github.com/SantaMcCloud/biobox_add_taxid/tree/release-0.3}
126 }</citation> 120 }</citation>
127 </citations> 121 </citations>
128 </tool> 122 </tool>