view comebin.xml @ 3:ddfb9c10740a draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/comebin/ commit f33c09bd4b736c401cdc037e328c7be4a24d8cf5
author iuc
date Thu, 21 Aug 2025 08:48:35 +0000
parents 15f6ef1d7c96
children
line wrap: on
line source

<tool id="comebin" name="COMEBin" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Binning of metagenomic contigs using COntrastive Multi-viEw representation learning</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code">
        <![CDATA[
            
            #import re
            
            mkdir 'outputs' 'bam_files' &&

            #set $safename_fasta = re.sub('[^\w\-_\.]', '_', $assembly_file.element_identifier)
            ln -s '$assembly_file' '${safename_fasta}.fasta' &&

            #for $file in $bam_files:
                #set $safename = re.sub('[^\w\-_\.]', '_', $file.element_identifier)
                ln -s '$file' './bam_files/${safename}.bam' &&
            #end for 
            
            run_comebin.sh
            -a '${safename_fasta}.fasta'
            -o 'outputs'
            -p 'bam_files'
            -t \${GALAXY_SLOTS:-12}
            -l ${loss}
            -n ${learning}
            -e ${emb_comebin}
            -c ${emb_cov}
            -b ${batch}
        
        ]]>
    </command> 
    <inputs>
        <param name="assembly_file" type="data" format="fasta" label="Metagenomic assembly file"/>
        <param name="bam_files" type="data" format="bam" multiple="true" label="Input bam file(s)"/>
        <param name="learning" type="integer" value="6" label="Views for contrastive multiple-view learning"/>
        <param name="loss" type="float" value="0.15" label="Temperature in loss function" help="0.07 is recommended for assemblies with an N50 > 10000"/>
        <param name="emb_comebin" type="integer" value="2048" label="Embedding size for COMEBin network"/>
        <param name="emb_cov" type="integer" value="2048" label="Embedding size for coverage network"/>
        <param name="batch" type="integer" value="1024" label="Batch size"/>
    </inputs>
    <outputs>
        <collection name="bins" type="list" format="fasta" label="${tool.name}: BINS">
            <discover_datasets pattern="__name__" directory="outputs/comebin_res/comebin_res_bins/"/>
        </collection>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="assembly_file" ftype="fasta" value="input_single.fasta"/>
            <param name="bam_files" ftype="bam" value="input_single.bam"/>
            <param name="learning" value="6"/>
            <param name="loss" value="0.15"/>
            <param name="emb_comebin" value="2048"/>
            <param name="emb_cov" value="2048"/>
            <param name="batch" value="1024"/>
        </test>
    </tests>
    <help>
        <![CDATA[

            **IMPORTANT**

            It might happen that this tool will throw out an error. If this error happen: ValueError: max() arg is an empty sequence
            then try to run this tool with different values it might fix this. When this error happen: UnboundLocalError: local variable 'logits' referenced before assignment
            then the imputed data might not be enough such that this tool will function correctly.

            Also the run-time of this tool will be high since it is machining learning tool for binning!

            **Input**

            - The assembly file in fasta format
            - The corresponded bam file(s) to the assembly file

            **Output**

            As output from the tool is a collection of bins created with machining learning

            
        ]]>
    </help>
    <expand macro="citations"/>
</tool>