view kraken_biom.xml @ 2:b582a159bb10 draft default tip

planemo upload for repository https://github.com/smdabdoub/kraken-biom commit 79f3b3dd54ec6d401563f965ba93b7fca4cadda6
author iuc
date Mon, 11 Mar 2024 16:01:58 +0000
parents 65eb9962d272
children
line wrap: on
line source

<tool id="kraken_biom" name="Kraken-biom" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Create BIOM-format tables from kraken output</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <version_command>kraken-biom --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
#for $i, $s in enumerate($kraken_reports)
    #if $s
       ln -s '$s' './$i-kraken_report.tabular' &&
    #end if
#end for
#if $metadata
    ln -s '$metadata' './metadata.tsv' &&
#end if
kraken-biom
#for $i, $s in enumerate($kraken_reports)
    #if $s
        '$i-kraken_report.tabular'
    #end if
#end for
#if $max
    --max $max
#end if
#if $min
    --min $min
#end if
#if $metadata
    --metadata 'metadata.tsv'
#end if
#if $otu_fp
    --otu_fp 'OTU_FP'
#end if
#if $fmt
    --fmt $fmt
#end if
    ]]></command>
    <inputs>
        <param name="kraken_reports" type="data" multiple="true" format="tabular" label="Input files to Kraken-biom: Kraken report output file(s)"/>
        <param argument="--max" type="select" optional="true" label="Max" help="Assigned reads will be recorded only if they are at or below max rank. Default: O">
            <option value="O" selected="true">O</option>
            <option value="D">D</option>
            <option value="P">P</option>
            <option value="C">C</option>
            <option value="F">F</option>
            <option value="G">G</option>
            <option value="S">S</option>
            <option value="SS">SS</option>
        </param>
        <param argument="--min" type="select" optional="true" label="Min" help="Reads assigned at and below min rank will be recorded as being assigned to the min rank level. Default: S">
            <option value="O">O</option>
            <option value="D">D</option>
            <option value="P">P</option>
            <option value="C">C</option>
            <option value="F">F</option>
            <option value="G">G</option>
            <option value="S" selected="true">S</option>
            <option value="SS">SS</option>
        </param>
        <param argument="--metadata" type="data" format="tabular" optional="true" label="Sample metadata file" help="This should be in TSV or CSV (Tabular) format. The first column should be the Sample ID (e.g. 0-kraken_report, 1-kraken_report, etc.). This is the same name as the input files. If no metadata is given, basic metadata is added to support importing the BIOM table into sites like phinch (http://phinch.org/index.html). Example for metadata files: http://qiime.org/documentation/ file_formats.html#mapping-file-overview"/>
        <param name="otu_fp" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Do you want to create an OTU IDs file" help="Create a file containing just the (NCBI) OTU IDs for use with a service such as phyloT (http://phylot.biobyte.de/) to generate a phylogenetic tree for use in downstream analysis such as UniFrac, iTol (itol.embl.de), or PhyloToAST (phylotoast.org)"/>
        <param argument="--fmt" type="select" optional="true" label="Output Format" help="Set the output format of the BIOM table. Default is HDF5">
            <option value="tsv">TSV</option>
            <option value="json">JSON</option>
            <option value="hdf5" selected="true">Biom2 (HDF5)</option>
        </param>
    </inputs>
    <outputs>
        <data name="biomOutput" format="tabular" from_work_dir="./table.biom" label="Kraken-biom output file">
            <change_format>
                <when input="fmt" value="json" format="biom1" />
            </change_format> 
            <change_format>
                <when input="fmt" value="hdf5" format="biom2" />
            </change_format>
        </data>
        <data name="otuOutput" format="mothur.map" from_work_dir="./OTU_FP" label="OTU File">       
            <filter>otu_fp is True</filter>
        </data>     
    </outputs>  
    <tests>
    <test expect_num_outputs="1">
        <param name="kraken_reports" value="MG_AIR20200707.tabular,MG_AIR20200708.tabular,MG_AIR20200709.tabular"/>
        <param name="fmt" value="hdf5"/>
        <output name="biomOutput" ftype="biom2">
            <assert_contents>
                <has_text text="creation-date"/>
            </assert_contents>
        </output>
    </test>
    <test expect_num_outputs="1">
        <param name="kraken_reports" value="MG_AIR20200707.tabular,MG_AIR20200708.tabular,MG_AIR20200709.tabular"/>
        <param name="fmt" value="tsv"/>
        <output name="biomOutput" ftype="tabular">
            <assert_contents>
                <has_n_lines n="8246"/>
                <has_text text="Constructed from biom file"/>
            </assert_contents>
        </output>
    </test>    
    <test expect_num_outputs="1">
        <param name="kraken_reports" value="MG_AIR20200707.tabular,MG_AIR20200708.tabular,MG_AIR20200709.tabular"/>
        <param name="fmt" value="json"/>
        <output name="biomOutput" ftype="biom1">
            <assert_contents>
                <has_text text="Biological Observation"/>
            </assert_contents>
        </output>
    </test>
    <test expect_num_outputs="2">
        <param name="kraken_reports" value="MG_AIR20200707.tabular,MG_AIR20200708.tabular,MG_AIR20200709.tabular"/>
        <param name="otu_fp" value="true"/>
        <param name="fmt" value="json"/>
        <output name="biomOutput" ftype="biom1">
            <assert_contents>
                <has_text text="Biological Observation"/>
            </assert_contents>
        </output>
        <output name="otuOutput" ftype="mothur.map">
            <assert_contents>
                <has_n_lines n="8244"/>
            </assert_contents>
        </output>
    </test>
    <test expect_num_outputs="2">
        <param name="kraken_reports" value="MG_AIR20200707.tabular,MG_AIR20200708.tabular,MG_AIR20200709.tabular"/>
        <param name="metadata" value="SamplesMetaDataTabular.csv"/>        
        <param name="otu_fp" value="true"/>
        <param name="fmt" value="json"/>
        <output name="biomOutput" ftype="biom1">
            <assert_contents>
                <has_text text="Biological Observation"/>
            </assert_contents>
        </output>
        <output name="otuOutput" ftype="mothur.map">
            <assert_contents>
                <has_n_lines n="8244"/>
            </assert_contents>
        </output>
    </test>    
    </tests>
    <help><![CDATA[
Kraken-biom
===========
Create BIOM-format tables (http://biom-format.org) from Kraken output (http://ccb.jhu.edu/software/kraken/).

Input
=====
The program takes as input, one or more files output from the kraken-report tool. Each file is parsed and the counts for each OTU (operational taxonomic unit) are recorded, along with database ID (e.g. NCBI), and lineage. The extracted data are then stored in a BIOM table where each count is linked to the Sample and OTU it belongs to. Sample IDs are extracted from the input filenames (everything up to the '.').

OTUs are defined by the --max and --min arguments. By default these are set to Order and Species respectively. This means that counts assigned directly to an Order, Family, or Genus are recorded under the associated OTU ID, and counts assigned at or below the Species level are assigned to the OTU ID for the species. Setting a minimum rank below Species is not yet available.

Output
======
The BIOM format currently has two major versions. Version 1.0 uses the JSON (JavaScript Object Notation) format as a base. Version 2.x uses the HDF5 (Hierarchical Data Format v5) as a base. The output format can be specified with the --fmt option. Note that a tab-separated (tsv) output format is also available. The resulting file will not contain most of the metadata, but can be opened by spreadsheet programs.

Version 2 of the BIOM format is used by default for output, but requires the Python library 'h5py'. If the library is not installed, kraken-biom will automatically switch to using version 1.0. Note that the output can optionally be compressed with gzip (--gzip) for version 1.0 and TSV files. Version 2 files are automatically compressed.
    ]]></help> 
    <citations>
        <citation type="bibtex">
@misc{githubseqtk,
  author = {Dabdoub, SM},
  year = {2016},
  title = {kraken-biom},
  publisher = {GitHub},
  journal = {GitHub repository},
  url = {https://github.com/smdabdoub/kraken-biom},
}</citation>
    </citations>
 </tool>