Mercurial > repos > iuc > semibin_train
view train.xml @ 6:3215ab8a94de draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/semibin commit 2c08a2e49a2844efe92340c5a9e9c8323e4a33d6
| author | iuc |
|---|---|
| date | Tue, 28 Oct 2025 08:20:57 +0000 |
| parents | 2344bc30a326 |
| children |
line wrap: on
line source
<tool id="semibin_train" name="SemiBin: Train" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description> the semi-supervised deep learning model </description> <macros> <import>macros.xml</import> </macros> <expand macro="biotools"/> <expand macro="requirements"/> <expand macro="version"/> <command detect_errors="exit_code"><![CDATA[ #import re #if $mode.select == 'single' ln -s '$mode.input_fasta' 'contigs.$input_fasta.ext' && #else #for $e in $mode.input_fasta #set $identifier = re.sub('[^\s\w\-]', '_', str($e.element_identifier)) ln -s '$e' '${identifier}.$e.ext' && #end for #end if SemiBin2 train_semi #if $mode.select == 'single' --input-fasta 'contigs.$input_fasta.ext' --data '$mode.data' --data-split '$mode.data_split' --cannot-link '$mode.cannot_link' #else --train-from-many --input-fasta #for $e in $mode.input_fasta #set $identifier = re.sub('[^\s\w\-]', '_', str($e.element_identifier)) '${identifier}.$e.ext' #end for --data #for $e in $mode.data '$e' #end for --data-split #for $e in $mode.data_split '$e' #end for --cannot-link #for $e in $mode.cannot_link '$e' #end for #end if --output 'output' --threads \${GALAXY_SLOTS:-1} --epoches $epoches --batch-size $batch_size --random-seed $random_seed #if $min_len.method == 'min-len' --min-len $min_len.min_len #else if $min_len.method == 'ratio' --ratio $min_len.ratio #end if --orf-finder '$orf_finder' ]]></command> <inputs> <conditional name="mode"> <param argument="select" type="select" label="Mode to train the models"> <option value="single" selected="true">From one sample</option> <option value="several">From multiple samples (train model across several samples can get better pre-trained model for single-sample binning)</option> </param> <when value="single"> <param argument="--input-fasta" type="data" format="fasta,fasta.gz,fasta.bz2" label="Contig sequences"/> <param argument="--data" type="data" format="csv" label="Train data"/> <param argument="--data-split" type="data" format="csv" label="Split train data"/> <param argument="--cannot-link" type="data" format="txt" label="Cannot-link constraints"/> </when> <when value="several"> <param argument="--input-fasta" type="data" multiple="true" format="fasta,fasta.gz,fasta.bz2" label="Contig sequences"/> <param argument="--data" type="data" format="csv" multiple="true" label="Train data"/> <param argument="--data-split" type="data" format="csv" multiple="true" label="Split train data"/> <param argument="--cannot-link" type="data" format="txt" multiple="true" label="Cannot-link constraints"/> </when> </conditional> <expand macro="min_len"/> <expand macro="orf-finder"/> <expand macro="random-seed"/> <expand macro="epoches"/> <expand macro="batch-size"/> </inputs> <outputs> <expand macro="train_output"/> </outputs> <tests> <test expect_num_outputs="1"> <conditional name="mode"> <param name="select" value="single"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="data" ftype="csv" value="data.csv"/> <param name="data_split" ftype="csv" value="data_split.csv"/> <param name="cannot_link" ftype="txt" value="cannot.txt"/> </conditional> <conditional name="min_len"> <param name="method" value="min-len"/> <param name="min_len" value="2500" /> </conditional> <param name="orf_finder" value="prodigal"/> <param name="random_seed" value="0"/> <param name="epoches" value="1"/> <param name="batch_size" value="2048"/> <output name="model" ftype="h5"> <assert_contents> <has_size value="3119000" delta="2000" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="mode"> <param name="select" value="single"/> <param name="input_fasta" ftype="fasta.bz2" value="input_single.fasta.bz2"/> <param name="data" ftype="csv" value="data.csv"/> <param name="data_split" ftype="csv" value="data_split.csv"/> <param name="cannot_link" ftype="txt" value="cannot.txt"/> </conditional> <conditional name="min_len"> <param name="method" value="min-len"/> <param name="min_len" value="2500" /> </conditional> <param name="orf_finder" value="prodigal"/> <param name="random_seed" value="0"/> <param name="epoches" value="1"/> <param name="batch_size" value="2048"/> <output name="model" ftype="h5"> <assert_contents> <has_size value="3119000" delta="2000" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="mode"> <param name="select" value="single"/> <param name="input_fasta" ftype="fasta.gz" value="input_single.fasta.gz"/> <param name="data" ftype="csv" value="data.csv"/> <param name="data_split" ftype="csv" value="data_split.csv"/> <param name="cannot_link" ftype="txt" value="cannot.txt"/> </conditional> <conditional name="min_len"> <param name="method" value="min-len"/> <param name="min_len" value="2500" /> </conditional> <param name="orf_finder" value="prodigal"/> <param name="random_seed" value="0"/> <param name="epoches" value="1"/> <param name="batch_size" value="2048"/> <output name="model" ftype="h5"> <assert_contents> <has_size value="3119000" delta="2000" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="mode"> <param name="select" value="several"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta,input_single_2.fasta,input_single_3.fasta"/> <param name="data" ftype="csv" value="data.csv,data.csv,data.csv"/> <param name="data_split" ftype="csv" value="data_split.csv,data_split.csv,data_split.csv"/> <param name="cannot_link" ftype="txt" value="cannot.txt,cannot.txt,cannot.txt"/> </conditional> <conditional name="min_len"> <param name="method" value="ratio"/> <param name="ratio" value="0.05" /> </conditional> <param name="orf_finder" value="prodigal"/> <param name="random_seed" value="0"/> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> <output name="model" ftype="h5"> <assert_contents> <has_size value="3119000" delta="2000" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ @HELP_HEADER@ This tool trains the semi-supervised deep learning model Inputs ====== @HELP_INPUT_FASTA@ @HELP_CANNOT@ @HELP_DATA@ Outputs ======= @HELP_MODEL@ ]]></help> <expand macro="citations"/> </tool>
