Mercurial > repos > iuc > binning_refiner
diff binning_refiner.xml @ 0:f350d182f786 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/binning_refiner/ commit 591fef692e8efeb65c5214e6512aeaaf66201b26"
author | iuc |
---|---|
date | Fri, 18 Feb 2022 13:12:43 +0000 |
parents | |
children | 1c8d0916f97f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/binning_refiner.xml Fri Feb 18 13:12:43 2022 +0000 @@ -0,0 +1,125 @@ +<tool id="bin_refiner" name="Binning refiner" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>refines metagenome bins</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ +#import re + +## Binning refiner prepends the -p param value +## (which we set to be the string 'refined' in +## the command line) to a hard-coded string (i.e., +## '_Binning_refiner_outputs') to set the base +## output directory. + +## Binning_refiner requires 2 levels of directories +## for the input bins. +mkdir -p input_bin_dir/bins && +#for $f in $input_bins: + #set identifier = re.sub('[^\s\w\-]', '_', str($f.element_identifier)) + #set file_name = $identifier + '.' + $f.ext + ## Binning_refiner doesn't handle gzipped files. + #if $f.ext.endswith(".gz") + gunzip -c '${f}' > 'input_bin_dir/bins/${file_name}' && + #else: + ln -s '${f}' 'input_bin_dir/bins/${file_name}' && + #end if +#end for + +Binning_refiner +-i input_bin_dir +-p 'refined' +&& mv 'refined_Binning_refiner_outputs/refined_contigs.txt' '$output_refined_contigs' +&& mv 'refined_Binning_refiner_outputs/refined_sources_and_length.txt' '$output_sources_and_length' + ]]></command> + <inputs> + <param name="input_bins" format="fasta,fasta.gz" type="data" multiple="true" label="Binned fasta files"/> + <param argument="-m" type="integer" value="512" label="Minimum size (Kbp) of refined bin" help="Bins smaller than this will be eliminated"/> + </inputs> + <outputs> + <collection name="output_refined_bins" type="list" label="${tool.name} on ${on_string}: (refined bins)"> + <discover_datasets pattern="(?P<designation>.*)\.fasta" format="fasta" directory="refined_Binning_refiner_outputs/refined_refined_bins"/> + </collection> + <data name="output_refined_contigs" format="tabular" label="${tool.name} on ${on_string} (refined contigs)"/> + <data name="output_sources_and_length" format="tabular" label="${tool.name} on ${on_string} (sources and length)"/> + </outputs> + <tests> + <test expect_num_outputs="3"> + <param name="input_bins" value="MetaBAT_17.fa.gz,MetaBAT_18.fa.gz,MetaBAT_19.fa.gz,MetaBAT_20.fa.gz,MetaBAT_21.fa.gz,MetaBAT_22.fa.gz,MetaBAT_23.fa.gz,Concoct_1.fa.gz,Concoct_3.fa.gz,Concoct_8.fa.gz" ftype="fasta.gz"/> + <output_collection name="output_refined_bins" type="list" count="7"> + <element name="refined_1" ftype="fasta"> + <assert_contents> + <has_size value="1320640"/> + <has_text text=">scaffold_2064"/> + <has_n_lines n="21765"/> + </assert_contents> + </element> + <element name="refined_2" ftype="fasta"> + <assert_contents> + <has_size value="941488"/> + <has_text text=">scaffold_1301"/> + <has_n_lines n="15461"/> + </assert_contents> + </element> + <element name="refined_3" ftype="fasta"> + <assert_contents> + <has_size value="883642"/> + <has_text text=">scaffold_262"/> + <has_n_lines n="14495"/> + </assert_contents> + </element> + <element name="refined_4" ftype="fasta"> + <assert_contents> + <has_size value="758509"/> + <has_text text=">scaffold_923"/> + <has_n_lines n="12484"/> + </assert_contents> + </element> + <element name="refined_5" ftype="fasta"> + <assert_contents> + <has_size value="722197"/> + <has_text text=">scaffold_232"/> + <has_n_lines n="11849"/> + </assert_contents> + </element> + <element name="refined_6" ftype="fasta"> + <assert_contents> + <has_size value="637342"/> + <has_text text=">scaffold_259"/> + <has_n_lines n="10460"/> + </assert_contents> + </element> + <element name="refined_7" ftype="fasta"> + <assert_contents> + <has_size value="560996"/> + <has_text text=">scaffold_1510"/> + <has_n_lines n="9219"/> + </assert_contents> + </element> + </output_collection> + <output name="output_refined_contigs" file="output_refined_contigs.tabular" ftype="tabular"/> + <output name="output_sources_and_length" file="output_sources_and_length.tabular" ftype="tabular"/> + </test> + </tests> + <help><![CDATA[ +**What it does** + +Reconciles the outputs of different binning programs with the aim to improve the quality of genome bins, +especially with respect to contamination levels. + +The tool accepts one or more fasta datasets (i.e., bins) that were produced by metagenome binning tools +(CONCOCT MaxBin2, MetaBAT2 and others). + +All refined bins larger than the specified "Minimum size (Kbp) of refined bin" will be output as a dataset +collection of fasta files. Additional outputs include a tabular dataset containing the id of the contigs +in each refined bin (refined contigs) and another tabular dataset containing the size of each refined bin +and the origin of its contigs (sources and length). + +**More information** + +https://github.com/songweizhi/Binning_refiner + + ]]></help> + <expand macro="citations"/> +</tool>