Mercurial > repos > iuc > das_tool
view das_tool.xml @ 5:b048a987dd7d draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/das_tool commit b50985039ff11e1779f9f48f13e3b74fa1c0e955
author | iuc |
---|---|
date | Mon, 29 Apr 2024 20:17:04 +0000 |
parents | 7d997332582e |
children |
line wrap: on
line source
<tool id="das_tool" name="DAS Tool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>for genome-resolved metagenomics</description> <macros> <import>macros.xml</import> </macros> <expand macro="biotools"/> <expand macro="requirements"/> <expand macro="version"/> <command detect_errors="exit_code"><![CDATA[ #import re #set $bins = [] #set $labels = [] #for $i, $s in enumerate($binning) #silent $bins.append(str($s.bins)) #if $s.labels != '' #silent $labels.append(str($s.labels)) #else #silent $labels.append(re.sub('[^\w\-_\.]', '_', $s.bins.element_identifier)) #end if #end for #if $adv.proteins ln -s '$adv.proteins' 'proteins' && #end if #set $bins = ','.join($bins) #set $labels = ','.join($labels) DAS_Tool --contigs '$contigs' --outputbasename 'outputs' --bins '$bins' --labels '$labels' --search_engine '$adv.search_engine' #if $adv.proteins --proteins 'proteins' #end if --score_threshold $adv.score_threshold --duplicate_penalty $adv.duplicate_penalty --megabin_penalty $adv.megabin_penalty --max_iter_post_threshold $adv.max_iter_post_threshold $output.write_bin_evals $output.write_bins.write_bins #if str($output.write_bins.write_bins) != '' $output.write_bins.write_unbinned #end if $output.debug --threads \${GALAXY_SLOTS:-1} ]]></command> <inputs> <param argument="--contigs" type="data" format="fasta" label="Contig sequences"/> <repeat name="binning" title="Bins" min="1"> <param argument="--bins" type="data" format="tabular" label="Contigs-to-bin table" help="Tabular with two columns: contig-IDs and bin-IDs. Fasta_to_Contigs2Bin can be used to Converts genome bins in fasta format to Contigs-to-bin table"/> <param argument="--labels" type="text" value="" label="Name of binning prediction name" help="If left empty the identifier of the contig-to-bin table is used. Only alphanumeric characters, dash, underscore and dor are allowed. Other characters are replaced by underscore."> <sanitizer invalid_char="_"> <valid initial="string.ascii_letters,string.digits"> <add value="-" /> <add value="_" /> <add value="." /> </valid> </sanitizer> </param> </repeat> <section name="adv" title="Advanced options"> <param argument="--search_engine" type="select" label="Engine used for single copy gene identification"> <option value="diamond" selected="true">diamond</option> <option value="blastp">blastp</option> </param> <param argument="--proteins" type="data" format="fasta" optional="true" label="Predicted proteins" help="The file should be prodigal fasta format: >contigID_geneNo"/> <param argument="--score_threshold" type="float" min="0" max="1" value="0.5" label="Score threshold until selection algorithm will keep selecting bins"/> <param argument="--duplicate_penalty" type="float" min="0" max="3" value="0.6" label="Penalty for duplicate single copy genes per bin (weight b)" help="Only change if you know what you are doing"/> <param argument="--megabin_penalty" type="float" min="0" max="3" value="0.5" label="Penalty for megabins (weight c)" help="Only change if you know what you are doing"/> <param argument="--max_iter_post_threshold" type="integer" min="1" value="10" label="Maximum number of iterations after reaching score threshold"/> </section> <section name="output" title="Outputs"> <param name="proteins" type="boolean" checked="false" label="Output predicted proteins?"/> <param argument="--write_bin_evals" type="boolean" truevalue="--write_bin_evals" falsevalue="" checked="false" label="Write evaluation of input bin sets?"/> <conditional name="write_bins"> <param argument="--write_bins" type="select" label="Export bins as fasta files?"> <option value="--write_bins" selected="true">Yes</option> <option value="">No</option> </param> <when value="--write_bins"> <param argument="--write_unbinned" type="boolean" truevalue="--write_unbinned" falsevalue="" checked="false" label="Export unbinned contigs as fasta file?"/> </when> <when value=""/> </conditional> <param argument="--debug" type="boolean" truevalue="--debug" falsevalue="" checked="false" label="Write debug information to log file?"/> </section> </inputs> <outputs> <data name="summary" format="tabular" from_work_dir="outputs_DASTool_summary.tsv" label="${tool.name} on ${on_string}: Summary of output bins" /> <data name="contigs2bin" format="tabular" from_work_dir="outputs_DASTool_contig2bin.tsv" label="${tool.name} on ${on_string}: Contigs to bin file for the output bins" /> <data name="log" format="txt" from_work_dir="outputs_DASTool.log" label="${tool.name} on ${on_string}: Log" /> <data name="eval" format="tabular" from_work_dir="outputs_allBins.eval" label="${tool.name} on ${on_string}: Quality and completeness estimates of input bin sets" > <filter>output['write_bin_evals']</filter> </data> <collection name="bins" type="list" label="${tool.name} on ${on_string}: Bins"> <filter>output['write_bins']['write_bins'] != ""</filter> <discover_datasets pattern="^(?!unbinned\.fa)((?P<designation>.*)\.fa)" format="fasta" directory="outputs_DASTool_bins"/> </collection> <data name="unbinned_contigs" format="fasta" from_work_dir="outputs_DASTool_bins/unbinned.fa" label="${tool.name} on ${on_string}: Unbinned contigs"> <filter>output['write_bins']['write_bins'] != "" and output['write_bins']['write_unbinned']</filter> </data> <data name="proteins" format="fasta" from_work_dir="outputs_proteins.faa" label="${tool.name} on ${on_string}: Proteins" > <filter>output['proteins']</filter> </data> </outputs> <tests> <test expect_num_outputs="4"> <param name="contigs" value="contigs.fasta"/> <repeat name="binning"> <param name="bins" value="metabat.tabular"/> <param name="labels" value="metabat"/> </repeat> <section name="adv"> <param name="search_engine" value="diamond"/> <param name="proteins" value="proteins.fasta"/> <param name="score_threshold" value="0.5"/> <param name="duplicate_penalty" value="0.6"/> <param name="megabin_penalty" value="0.5" /> </section> <section name="output"> <param name="write_bin_evals" value="true"/> <conditional name="write_bins"> <param name="write_bins" value=""/> </conditional> <param name="debug" value="true"/> </section> <output name="summary" ftype="tabular"> <assert_contents> <has_text text="unique_SCGs"/> <has_text text="metabat.8"/> <has_text text="bacteria"/> </assert_contents> </output> <output name="contigs2bin" ftype="tabular"> <assert_contents> <has_text text="Ley3_66761_scaffold_6"/> </assert_contents> </output> <output name="log" ftype="txt"> <assert_contents> <has_text text="Skipping gene prediction"/> <has_text text="#Target sequences to report alignments for: 1"/> </assert_contents> </output> <output name="eval" ftype="tabular"> <assert_contents> <has_text text="unique_SCGs"/> <has_text text="metabat.8"/> </assert_contents> </output> </test> <!-- like the first test, but with empty label --> <test expect_num_outputs="4"> <param name="contigs" value="contigs.fasta"/> <repeat name="binning"> <param name="bins" value="metabat.tabular"/> <!-- <param name="labels" value="metabat"/> --> </repeat> <section name="adv"> <param name="search_engine" value="diamond"/> <param name="proteins" value="proteins.fasta"/> <param name="score_threshold" value="0.5"/> <param name="duplicate_penalty" value="0.6"/> <param name="megabin_penalty" value="0.5" /> </section> <section name="output"> <param name="write_bin_evals" value="true"/> <conditional name="write_bins"> <param name="write_bins" value=""/> </conditional> <param name="debug" value="true"/> </section> <output name="summary" ftype="tabular"> <assert_contents> <has_text text="unique_SCGs"/> <has_text text="metabat.8"/> <has_text text="bacteria"/> </assert_contents> </output> <output name="contigs2bin" ftype="tabular"> <assert_contents> <has_text text="Ley3_66761_scaffold_6"/> </assert_contents> </output> <output name="log" ftype="txt"> <assert_contents> <has_text text="Skipping gene prediction"/> <has_text text="#Target sequences to report alignments for: 1"/> </assert_contents> </output> <output name="eval" ftype="tabular"> <assert_contents> <has_text text="unique_SCGs"/> <has_text text="metabat.8"/> </assert_contents> </output> </test> <test expect_num_outputs="6"> <param name="contigs" value="contigs.fasta"/> <repeat name="binning"> <param name="bins" value="metabat.tabular"/> <param name="labels" value="metabat"/> </repeat> <section name="adv"> <param name="search_engine" value="diamond"/> <param name="score_threshold" value="0.5"/> <param name="duplicate_penalty" value="0.6"/> <param name="megabin_penalty" value="0.5" /> </section> <section name="output"> <param name="proteins" value="true"/> <param name="write_bin_evals" value="false"/> <conditional name="write_bins"> <param name="write_bins" value="--write_bins"/> <param name="write_unbinned" value="true"/> </conditional> <param name="debug" value="true"/> </section> <output name="summary" ftype="tabular"> <assert_contents> <has_text text="unique_SCGs"/> <has_text text="metabat.8"/> <has_text text="bacteria"/> </assert_contents> </output> <output name="contigs2bin" ftype="tabular"> <assert_contents> <has_text text="Ley3_66761_scaffold_6"/> </assert_contents> </output> <output name="log" ftype="txt"> <assert_contents> <has_text text="Parameters"/> <has_text text="Predicting genes"/> </assert_contents> </output> <output_collection name="bins" count="1"> <!--unbinned.fa must not appear in bins collection. unbinned.fa appears as additional output below.--> <element name="metabat.8" ftype="fasta"> <assert_contents> <has_text text=">Ley3_66761_scaffold_6"/> </assert_contents> </element> </output_collection> <output name="unbinned_contigs" ftype="fasta"> <assert_contents> <has_text text=">Ley3_66761_scaffold_505"/> </assert_contents> </output> <output name="proteins" ftype="fasta"> <assert_contents> <has_text text="Ley3_66761_scaffold_6_1 # 1 # 786 # 1 #"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ @HELP_HEADER@ Inputs ====== - Bins: Tab-separated files of contig-IDs and bin-IDs. Contigs to bin file example: :: Contig_1 bin.01 Contig_8 bin.01 Contig_42 bin.02 Contig_49 bin.03 - Contigs: Assembled contigs in fasta format: :: >Contig_1 ATCATCGTCCGCATCGACGAATTCGGCGAACGAGTACCCCTGACCATCTCCGATTA... >Contig_2 GATCGTCACGCAGGCTATCGGAGCCTCGACCCGCAAGCTCTGCGCCTTGGAGCAGG... - [Optional] Proteins: Predicted proteins in prodigal fasta format. The header contains contig-ID and gene number: :: >Contig_1_1 MPRKNKKLPRHLLVIRTSAMGDVAMLPHALRALKEAYPEVKVTVATKSLFHPFFEG... >Contig_1_2 MANKIPRVPVREQDPKVRATNFEEVCYGYNVEEATLEASRCLNCKNPRCVAACPVN... Outputs ======= - Summary of output bins including quality and completeness estimates - Contigs to bin file of output bins - [Optional] Quality and completeness estimates of input bin sets - [Optional] Bins in fasta format - [Optional] Unbinned contigs ]]></help> <expand macro="citations"/> </tool>