cat_bins: macros.xml comparison

comparison macros.xml @ 0:0094893f5001 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"

author	iuc
date	Tue, 10 Dec 2019 16:06:38 -0500
parents
children	18676df0cb3a

comparison

equal deleted inserted replaced

--1:000000000000
+:0094893f5001
+<macros>
+<token name="@VERSION@">5.0.3</token>
+<xml name="requirements">
+<requirements>
+<requirement type="package" version="@VERSION@">cat</requirement>
+<yield/>
+</requirements>
+</xml>
+<xml name="version_command">
+<version_command><![CDATA[CAT --version]]></version_command>
+</xml>
+<token name="@DATABASE_FOLDER@">CAT_database</token>
+<token name="@TAXONOMY_FOLDER@">taxonomy</token>
+<xml name="cat_db">
+<conditional name="db">
+<param name="db_src" type="select" label="CAT database (--database_folder,--taxonomy_folder) from">
+<option value="cached">local cached database</option>
+<option value="history">history</option>
+</param>
+<when value="cached">
+<param name="cat_builtin" type="select" label="Use a built-in CAT database" help="If the CAT database of interest is not listed, contact your Galaxy administrator">
+<options from_data_table="cat_database">
+<filter type="sort_by" column="2" />
+<validator type="no_options" message="No CAT database is available." />
+</options>
+</param>
+</when>
+<when value="history">
+<param name="cat_db" type="data" format="txt" label="A history dataset from CAT prepare tool"/>
+</when>
+</conditional>
+</xml>
+<token name="@CAT_DB@"><![CDATA[
+#if $db.db_src == 'cached':
+--database_folder '$db.cat_builtin.fields.database_folder'
+--taxonomy_folder '$db.cat_builtin.fields.taxonomy_folder'
+#else
+#import os.path
+#set $catdb = $db.cat_db.extra_files_path
+--database_folder '$os.path.join($catdb,"@DATABASE_FOLDER@")'
+--taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
+#end if
+]]></token>
+<token name="@CAT_TAXONOMY@"><![CDATA[
+#if $db.db_src == 'cached':
+--taxonomy_folder '$db.cat_builtin.fields.taxonomy_folder'
+#else
+#import os.path
+#set $catdb = $db.cat_db.extra_files_path
+--taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
+#end if
+]]></token>
+<xml name="test_catdb">
+<conditional name="db">
+<param name="db_src" value="cached"/>
+<param name="cat_builtin" value="CAT_prepare_test"/>
+</conditional>
+</xml>
+<xml name="use_intermediates">
+<conditional name="previous">
+<param name="use_previous" type="select" label="Use previous prodigal gene prediction and diamond alignment">
+<help>predicted_proteins.faa and alignment.diamond from previous CAT run.</help>
+<option value="yes">Yes</option>
+<option value="no" selected="true">No</option>
+</param>
+<when value="yes">
+<param argument="--proteins_fasta" type="data" format="fasta" label="prodigal predicted proteins fasta"/>
+<param argument="--diamond_alignment" type="data" format="tabular" label="alignment.diamond file"/>
+</when>
+<when value="no"/>
+</conditional>
+</xml>
+<token name="@USE_INTERMEDIATES@"><![CDATA[
+#if $previous.use_previous == 'yes'
+--proteins_fasta '$previous.proteins_fasta'
+--diamond_alignment '$previous.diamond_alignment'
+#end if
+--out_prefix 'cat_output'
+]]></token>
+<xml name="custom_settings">
+<param argument="--range" type="integer" value="10" min="0" max="49" label="range"/>
+<param argument="--fraction" type="float" value="0.5" min="0" max="0.99" label="fraction"/>
+</xml>
+<token name="@CUSTOM_SETTINGS@"><![CDATA[
+--range '$range'
+--fraction '$fraction'
+]]></token>
+<xml name="diamond_options">
+<conditional name="diamond">
+<param name="set_diamond_opts" type="select" label="Set advanced diamond options">
+<option value="yes">Yes</option>
+<option value="no" selected="true">No</option>
+</param>
+<when value="yes">
+<param argument="--sensitive" type="boolean" truevalue="--sensitive" falsevalue="" checked="false"
+label="Run DIAMOND in sensitive mode (considerably slower)"/>
+<param argument="--block_size" type="float" value="2.0" min="1" max="10" label="DIAMOND block-size parameter."
+help="lower will decrease memory and temporary disk space usage, higher will increase performance."/>
+<param argument="--index_chunks" type="integer" value="4" min="1" max="10" label="DIAMOND index-chunks parameter"
+help="Set to 1 on high memory machines. The parameter has no effect on temporary disk space usage."/>
+<param argument="--top" type="integer" value="50" min="1" max="50" label="DIAMOND top parameter"
+help="Governs hits within range of best hit that are written to the alignment file. This implies you know what you are doing."/>
+</when>
+<when value="no"/>
+</conditional>
+</xml>
+<token name="@DIAMOND_OPTIONS@"><![CDATA[
+#if $diamond.set_diamond_opts == 'yes':
+$diamond.sensitive
+--block_size '$diamond.block_size'
+--index_chunks '$diamond.index_chunks'
+#if $diamond.top < 50:
+--I_know_what_Im_doing
+--top '$diamond.top'
+#end if
+#end if
+]]></token>
+<xml name="add_names_options">
+<param argument="--only_official" type="boolean" truevalue="--only_official" falsevalue="" checked="true"
+label="Only output official level names."/>
+<param argument="--exclude_scores" type="boolean" truevalue="--exclude_scores" falsevalue="" checked="false"
+label="Exclude bit-score support scores in the lineage."/>
+</xml>
+<token name="@ADD_NAMES_OPTIONS@"><![CDATA[
+$only_official $exclude_scores
+]]></token>
+<xml name="add_names">
+<conditional name="names">
+<param name="add_names" type="select" label="CAT add_names for"
+help="annotate with taxonomic names.">
+<option value="no">No</option>
+<option value="orf2lca">ORF2LCA.names.txt</option>
+<option value="classification">classification.names.txt</option>
+<option value="both">ORF2LCA.names.txt and classification.names.txt</option>
+</param>
+<when value="no"/>
+<when value="orf2lca">
+<expand macro="add_names_options"/>
+</when>
+<when value="classification">
+<expand macro="add_names_options"/>
+</when>
+<when value="both">
+<expand macro="add_names_options"/>
+</when>
+</conditional>
+</xml>
+<token name="@TXT2TSV@">${__tool_directory__}/tabpad.py</token>
+<token name="@ADD_NAMES@"><![CDATA[
+#if $names.add_names in ['classification','both']:
+&& CAT add_names $names.only_official $names.exclude_scores
+@CAT_TAXONOMY@
+#if $bcat == 'CAT'
+-i 'cat_output.contig2classification.tsv'
+#else
+-i 'cat_output.bin2classification.tsv'
+#end if
+-o 'classification_names.txt'
+&& ${__tool_directory__}/tabpad.py -i 'classification_names.txt' -o '$classification_names'
+#end if
+#if $names.add_names in ['orf2lca','both']:
+&& CAT add_names $names.only_official $names.exclude_scores
+@CAT_TAXONOMY@
+-i 'cat_output.ORF2LCA.tsv'
+-o 'orf2lca_names.txt'
+&& ${__tool_directory__}/tabpad.py -i 'orf2lca_names.txt' -o '$orf2lca_names'
+#end if
+]]></token>
+<xml name="summarise">
+<param name="summarise" type="select" label="CAT summarise report"
+help="Report the number of assignments to each taxonomic name">
+<option value="no">No</option>
+<option value="classification">classification.summary.txt</option>
+</param>
+</xml>
+<token name="@SUMMARISE@"><![CDATA[
+#if $summarise in ['classification']:
+#if $names.add_names in ['classification','both'] and $names.only_official:
+#set $summary_input = $classification_names
+#else
+#set $summary_input = 'classification_offical_names'
+&& CAT add_names --only_official
+@CAT_TAXONOMY@
+#if $bcat == 'CAT'
+-i 'cat_output.contig2classification.tsv'
+#else
+-i 'cat_output.bin2classification.tsv'
+#end if
+-o '$summary_input'
+#end if
+&& CAT summarise
+#if $bcat == 'CAT'
+-c '$contigs_fasta'
+#end if
+-i '$summary_input'
+-o 'classification_summary.txt'
+&& ${__tool_directory__}/tabpad.py -i 'classification_summary.txt' -o '$classification_summary'
+#end if
+]]></token>
+<xml name="select_outputs">
+<param name="select_outputs" type="select" multiple="true" optional="false" label="Select outputs">
+<option value="log" selected="true">log</option>
+<option value="predicted_proteins_faa" selected="true">Prodigal predicted_proteins.faa</option>
+<option value="predicted_proteins_gff">Prodigal predicted_proteins.gff</option>
+<option value="alignment_diamond">Diamond blastp alignment.diamond</option>
+<option value="orf2lca" selected="true">ORF2LCA.txt (taxonomic assignment per predicted ORF)</option>
+<yield/>
+</param>
+</xml>
+<xml name="select_cat_outputs">
+<param name="bcat" type="hidden" value="CAT"/>
+<param name="seqtype" type="hidden" value="contig"/>
+<param name="sum_titles" type="hidden" value="contigs,number of ORFs,number of positions"/>
+<param name="bin_col" type="hidden" value=""/>
+<expand macro="select_outputs">
+<option value="contig2classification" selected="true">contig2classification.txt (taxonomic assignment per contig)</option>
+</expand>
+</xml>
+<xml name="select_bat_outputs">
+<param name="bcat" type="hidden" value="BAT"/>
+<param name="seqtype" type="hidden" value="bin"/>
+<param name="sum_titles" type="hidden" value="bins"/>
+<param name="bin_col" type="hidden" value="bin,"/>
+<expand macro="select_outputs">
+<option value="bin2classification" selected="true">bin2classification.txt (taxonomic assignment per metagenome assembly)</option>
+</expand>
+</xml>
+<xml name="outputs">
+<data name="log" format="txt" label="${bcat}.log" from_work_dir="cat_output.log">
+<filter>'log' in select_outputs or not select_outputs</filter>
+</data>
+<data name="predicted_proteins_faa" format="fasta" label="${bcat}.predicted_proteins.faa" from_work_dir="cat_output.predicted_proteins.faa">
+<filter>'predicted_proteins_faa' in select_outputs and previous['use_previous'] == 'no'</filter>
+</data>
+<data name="predicted_proteins_gff" format="gff" label="${bcat}.predicted_proteins.gff" from_work_dir="cat_output.predicted_proteins.gff">
+<filter>'predicted_proteins_gff' in select_outputs and previous['use_previous'] == 'no'</filter>
+</data>
+<data name="alignment_diamond" format="tabular" label="${bcat}.alignment.diamond" from_work_dir="cat_output.alignment.diamond">
+<filter>'alignment_diamond' in select_outputs and previous['use_previous'] == 'no'</filter>
+<actions>
+<action name="comment_lines" type="metadata" default="1" />
+<action name="column_names" type="metadata" default="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore" />
+</actions>
+</data>
+<data name="orf2lca" format="tabular" label="${bcat}.ORF2LCA.txt" from_work_dir="cat_output.ORF2LCA.tsv">
+<filter>'orf2lca' in select_outputs</filter>
+<actions>
+<action name="comment_lines" type="metadata" default="1" />
+<action name="column_names" type="metadata" default="ORF,${bin_col}lineage,bit-score" />
+</actions>
+</data>
+<data name="contig2classification" format="tabular" label="${bcat}.contig2classification.txt" from_work_dir="cat_output.contig2classification.tsv">
+<filter>'contig2classification' in select_outputs</filter>
+<actions>
+<action name="comment_lines" type="metadata" default="1" />
+<action name="column_names" type="metadata" default="contig,classification,reason,lineage,lineage scores" />
+</actions>
+</data>
+<data name="bin2classification" format="tabular" label="${bcat}.bin2classification.txt" from_work_dir="cat_output.bin2classification.tsv">
+<filter>'bin2classification' in select_outputs</filter>
+<actions>
+<action name="comment_lines" type="metadata" default="1" />
+<action name="column_names" type="metadata" default="bin,classification,reason,lineage,lineage scores" />
+</actions>
+</data>
+<data name="orf2lca_names" format="tabular" label="${bcat}.ORF2LCA.names.txt">
+<filter>names['add_names'] in ['both','orf2lca']</filter>
+<actions>
+<action name="comment_lines" type="metadata" default="1" />
+<action name="column_names" type="metadata" default="ORF,${bin_col}lineage,bit-score,superkingdom,phylum,class,order,family,genus,species" />
+</actions>
+</data>
+<data name="classification_names" format="tabular" label="${bcat}.${seqtype}2classification.names.txt">
+<filter>names['add_names'] in ['both','classification']</filter>
+<actions>
+<action name="comment_lines" type="metadata" default="1" />
+<action name="column_names" type="metadata" default="${seqtype},classification,reason,lineage,lineage scores,superkingdom,phylum,class,order,family,genus,species" />
+</actions>
+</data>
+<data name="classification_summary" format="tabular" label="${bcat}.${seqtype}2classification.summary.txt">
+<filter>'classification' in summarise</filter>
+<actions>
+<action name="comment_lines" type="metadata" default="4" />
+<action name="column_names" type="metadata" default="rank,clade,number of ${sum_titles}" />
+</actions>
+</data>
+</xml>
+<token name="@COMMON_HELP@"><![CDATA[
+The Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) workflows are described at: https://github.com/dutilh/CAT
+- CAT contigs/CAT bins - runs Prodigal_ prokaryotic protein prediction on the fasta input.
+- CAT contigs/CAT bins - runs Diamond_ to align predicted proteins to the reference proteins in the CAT database.
+- CAT contigs/CAT bins - assigns taxonomic classification to fasta entries and ORFs based on alignments.
+- CAT add_names - annotates outputs with taxonomic names.
+- CAT summerise - reports number of assignments to each taxonomic name.
+A CAT database can either be installed by data_manager_cat or in the local history by CAT prepare tool.
+.. _Prodigal: https://github.com/hyattpd/Prodigal
+.. _Diamond: https://github.com/bbuchfink/diamond
+]]></token>
+<token name="@OUTPUTS_HELP@"><![CDATA[
+**OUTPUTS**
+Any of the files produced by the CAT workflow are available as outputs
+- Prodigal
+- predicted_proteins.faa
+- predicted_proteins.gff
+- Diamond
+- alignment.diamond
+- CAT contigs/bins
+- contigs/bin2classification.txt
+- ORF2LCA.txt
+- CAT add_names (optional)
+- contigs/bin2classification.names.txt
+- ORF2LCA.names.txt
+- CAT summarise (optional)
+- contigs/bin2classification.summary.txt
+]]></token>
+<token name="@OPTIONS_HELP@"><![CDATA[
+Optional arguments:
+-r, --range               cut-off range after alignment [0-49] (default: 10).
+-f, --fraction            fraction of bit-score support for each classification
+[0-0.99] (default: 0.5).
+-p, --proteins_fasta
+Path to predicted proteins fasta file. If supplied,
+CAT will skip the protein prediction step.
+-a, --diamond_alignment
+Path to DIAMOND alignment table. If supplied, CAT will
+skip the DIAMOND alignment step and directly classify
+the sequences. A predicted proteins fasta file should
+also be supplied with argument [-p / --proteins].
+DIAMOND specific optional arguments:
+--sensitive     Run DIAMOND in sensitive mode (default: not enabled).
+--block_size    DIAMOND block-size parameter (default: 2.0). Lower
+numbers will decrease memory and temporary disk space
+usage.
+--index_chunks
+DIAMOND index-chunks parameter (default: 4). Set to 1
+on high memory machines. The parameter has no effect
+on temporary disk space usage.
+--top
+DIAMOND top parameter [0-50] (default: 50). Governs
+hits within range of best hit that are written to the
+alignment file. This is not the [-r / --range]
+parameter!
+Setting the DIAMOND --top parameter
+You can speed up DIAMOND considerably, and at the same time greatly reduce disk usage, by setting the DIAMOND --top parameter to lower values. This will govern hits within range of the best hit that are written to the alignment file.
+You have to be very carefull to 1) not confuse this parameter with the r / --range parameter, which does a similar cut-off but after alignment and 2) be aware that if you want to run CAT or BAT again afterwards with different values of the -r / --range parameter, your options will be limited to the range you have chosen with --top earlier, because all hits that fall outside this range will not be included in the alignment file. Importantly, CAT and BAT currently do not warn you if you choose -r / --range in a second run higher than --top in a previous one, so it's up to you to remember this!
+If you have understood all this, or you do not plan to tune -r / --range at all afterwards, you can enjoy a huge speedup with much smaller alignment files! For CAT you can for example set --top 11 and for BAT --top 6.
+]]></token>
+<xml name="citations">
+<citations>
+<citation type="doi">https://doi.org/10.1101/072868</citation>
+<citation type="doi">https://doi.org/10.1186/s13059-019-1817-x</citation>
+<citation type="doi">https://doi.org/10.1038/nmeth.3176</citation>
+<citation type="doi">https://doi.org/10.1186/1471-2105-11-119</citation>
+<yield />
+</citations>
+</xml>
+</macros>

Mercurial > repos > iuc > cat_bins

comparison macros.xml @ 0:0094893f5001 draft