Mercurial > repos > iuc > cat_prepare
view macros.xml @ 1:8315b5cebb82 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit b6c674376eade4fdf9ffb31380f6689ede84a091"
author | iuc |
---|---|
date | Wed, 08 Jan 2020 13:04:08 -0500 |
parents | b6c5e7343617 |
children | a40bbd70b8b1 |
line wrap: on
line source
<macros> <token name="@VERSION@">5.0.3</token> <xml name="requirements"> <requirements> <requirement type="package" version="@VERSION@">cat</requirement> <yield/> </requirements> </xml> <xml name="version_command"> <version_command><![CDATA[CAT --version]]></version_command> </xml> <token name="@DATABASE_FOLDER@">CAT_database</token> <token name="@TAXONOMY_FOLDER@">taxonomy</token> <xml name="cat_db"> <conditional name="db"> <param name="db_src" type="select" label="CAT database (--database_folder,--taxonomy_folder) from"> <option value="cached">local cached database</option> <option value="history">history</option> </param> <when value="cached"> <param name="cat_builtin" type="select" label="Use a built-in CAT database" help="If the CAT database of interest is not listed, contact your Galaxy administrator"> <options from_data_table="cat_database"> <filter type="sort_by" column="2" /> <validator type="no_options" message="No CAT database is available." /> </options> </param> </when> <when value="history"> <param name="cat_db" type="data" format="txt" label="A history dataset from CAT prepare tool"/> </when> </conditional> </xml> <token name="@CAT_DB@"><![CDATA[ #if $db.db_src == 'cached': --database_folder '$db.cat_builtin.fields.database_folder' --taxonomy_folder '$db.cat_builtin.fields.taxonomy_folder' #else #import os.path #set $catdb = $db.cat_db.extra_files_path --database_folder '$os.path.join($catdb,"@DATABASE_FOLDER@")' --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")' #end if ]]></token> <token name="@CAT_TAXONOMY@"><![CDATA[ #if $db.db_src == 'cached': --taxonomy_folder '$db.cat_builtin.fields.taxonomy_folder' #else #import os.path #set $catdb = $db.cat_db.extra_files_path --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")' #end if ]]></token> <xml name="test_catdb"> <conditional name="db"> <param name="db_src" value="cached"/> <param name="cat_builtin" value="CAT_prepare_test"/> </conditional> </xml> <xml name="use_intermediates"> <conditional name="previous"> <param name="use_previous" type="select" label="Use previous prodigal gene prediction and diamond alignment"> <help>predicted_proteins.faa and alignment.diamond from previous CAT run.</help> <option value="yes">Yes</option> <option value="no" selected="true">No</option> </param> <when value="yes"> <param argument="--proteins_fasta" type="data" format="fasta" label="prodigal predicted proteins fasta"/> <param argument="--diamond_alignment" type="data" format="tabular" label="alignment.diamond file"/> </when> <when value="no"/> </conditional> </xml> <token name="@USE_INTERMEDIATES@"><![CDATA[ #if $previous.use_previous == 'yes' --proteins_fasta '$previous.proteins_fasta' --diamond_alignment '$previous.diamond_alignment' #end if --out_prefix 'cat_output' ]]></token> <xml name="custom_settings_bins"> <param argument="--range" type="integer" value="5" min="0" max="49" label="range"/> <param argument="--fraction" type="float" value="0.3" min="0" max="0.99" label="fraction"/> </xml> <xml name="custom_settings_contigs"> <param argument="--range" type="integer" value="10" min="0" max="49" label="range"/> <param argument="--fraction" type="float" value="0.5" min="0" max="0.99" label="fraction"/> </xml> <token name="@CUSTOM_SETTINGS@"><![CDATA[ --range '$range' --fraction '$fraction' ]]></token> <xml name="diamond_options"> <conditional name="diamond"> <param name="set_diamond_opts" type="select" label="Set advanced diamond options"> <option value="yes">Yes</option> <option value="no" selected="true">No</option> </param> <when value="yes"> <param argument="--sensitive" type="boolean" truevalue="--sensitive" falsevalue="" checked="false" label="Run DIAMOND in sensitive mode (considerably slower)"/> <param argument="--block_size" type="float" value="2.0" min="1" max="10" label="DIAMOND block-size parameter." help="lower will decrease memory and temporary disk space usage, higher will increase performance."/> <param argument="--index_chunks" type="integer" value="4" min="1" max="10" label="DIAMOND index-chunks parameter" help="Set to 1 on high memory machines. The parameter has no effect on temporary disk space usage."/> <param argument="--top" type="integer" value="50" min="1" max="50" label="DIAMOND top parameter" help="Governs hits within range of best hit that are written to the alignment file. This implies you know what you are doing."/> </when> <when value="no"/> </conditional> </xml> <token name="@DIAMOND_OPTIONS@"><![CDATA[ #if $diamond.set_diamond_opts == 'yes': $diamond.sensitive --block_size '$diamond.block_size' --index_chunks '$diamond.index_chunks' #if $diamond.top < 50: --I_know_what_Im_doing --top '$diamond.top' #end if #end if ]]></token> <xml name="add_names_options"> <param argument="--only_official" type="boolean" truevalue="--only_official" falsevalue="" checked="true" label="Only output official level names."/> <param argument="--exclude_scores" type="boolean" truevalue="--exclude_scores" falsevalue="" checked="false" label="Exclude bit-score support scores in the lineage."/> </xml> <token name="@ADD_NAMES_OPTIONS@"><![CDATA[ $only_official $exclude_scores ]]></token> <xml name="add_names"> <conditional name="names"> <param name="add_names" type="select" label="CAT add_names for" help="annotate with taxonomic names."> <option value="no">No</option> <option value="orf2lca">ORF2LCA.names.txt</option> <option value="classification">classification.names.txt</option> <option value="both">ORF2LCA.names.txt and classification.names.txt</option> </param> <when value="no"/> <when value="orf2lca"> <expand macro="add_names_options"/> </when> <when value="classification"> <expand macro="add_names_options"/> </when> <when value="both"> <expand macro="add_names_options"/> </when> </conditional> </xml> <token name="@TXT2TSV@">${__tool_directory__}/tabpad.py</token> <token name="@ADD_NAMES@"><![CDATA[ #if $names.add_names in ['classification','both']: && CAT add_names $names.only_official $names.exclude_scores @CAT_TAXONOMY@ #if $bcat == 'CAT' -i 'cat_output.contig2classification.tsv' #else -i 'cat_output.bin2classification.tsv' #end if -o 'classification_names.txt' && ${__tool_directory__}/tabpad.py -i 'classification_names.txt' -o '$classification_names' #end if #if $names.add_names in ['orf2lca','both']: && CAT add_names $names.only_official $names.exclude_scores @CAT_TAXONOMY@ -i 'cat_output.ORF2LCA.tsv' -o 'orf2lca_names.txt' && ${__tool_directory__}/tabpad.py -i 'orf2lca_names.txt' -o '$orf2lca_names' #end if ]]></token> <xml name="summarise"> <param name="summarise" type="select" label="CAT summarise report" help="Report the number of assignments to each taxonomic name"> <option value="no">No</option> <option value="classification">classification.summary.txt</option> </param> </xml> <token name="@SUMMARISE@"><![CDATA[ #if $summarise in ['classification']: #if $names.add_names in ['classification','both'] and $names.only_official: #set $summary_input = $classification_names #else #set $summary_input = 'classification_offical_names' && CAT add_names --only_official @CAT_TAXONOMY@ #if $bcat == 'CAT' -i 'cat_output.contig2classification.tsv' #else -i 'cat_output.bin2classification.tsv' #end if -o '$summary_input' #end if && CAT summarise #if $bcat == 'CAT' -c '$contigs_fasta' #end if -i '$summary_input' -o 'classification_summary.txt' && ${__tool_directory__}/tabpad.py -i 'classification_summary.txt' -o '$classification_summary' #end if ]]></token> <xml name="select_outputs"> <param name="select_outputs" type="select" multiple="true" optional="false" label="Select outputs"> <option value="log" selected="true">log</option> <option value="predicted_proteins_faa" selected="true">Prodigal predicted_proteins.faa</option> <option value="predicted_proteins_gff">Prodigal predicted_proteins.gff</option> <option value="alignment_diamond">Diamond blastp alignment.diamond</option> <option value="orf2lca" selected="true">ORF2LCA.txt (taxonomic assignment per predicted ORF)</option> <yield/> </param> </xml> <xml name="select_cat_outputs"> <param name="bcat" type="hidden" value="CAT"/> <param name="seqtype" type="hidden" value="contig"/> <param name="sum_titles" type="hidden" value="contigs,number of ORFs,number of positions"/> <param name="bin_col" type="hidden" value=""/> <expand macro="select_outputs"> <option value="contig2classification" selected="true">contig2classification.txt (taxonomic assignment per contig)</option> </expand> </xml> <xml name="select_bat_outputs"> <param name="bcat" type="hidden" value="BAT"/> <param name="seqtype" type="hidden" value="bin"/> <param name="sum_titles" type="hidden" value="bins"/> <param name="bin_col" type="hidden" value="bin,"/> <expand macro="select_outputs"> <option value="bin2classification" selected="true">bin2classification.txt (taxonomic assignment per metagenome assembly)</option> </expand> </xml> <xml name="outputs"> <data name="log" format="txt" label="${bcat}.log" from_work_dir="cat_output.log"> <filter>'log' in select_outputs or not select_outputs</filter> </data> <data name="predicted_proteins_faa" format="fasta" label="${bcat}.predicted_proteins.faa" from_work_dir="cat_output.predicted_proteins.faa"> <filter>'predicted_proteins_faa' in select_outputs and previous['use_previous'] == 'no'</filter> </data> <data name="predicted_proteins_gff" format="gff" label="${bcat}.predicted_proteins.gff" from_work_dir="cat_output.predicted_proteins.gff"> <filter>'predicted_proteins_gff' in select_outputs and previous['use_previous'] == 'no'</filter> </data> <data name="alignment_diamond" format="tabular" label="${bcat}.alignment.diamond" from_work_dir="cat_output.alignment.diamond"> <filter>'alignment_diamond' in select_outputs and previous['use_previous'] == 'no'</filter> <actions> <action name="comment_lines" type="metadata" default="1" /> <action name="column_names" type="metadata" default="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore" /> </actions> </data> <data name="orf2lca" format="tabular" label="${bcat}.ORF2LCA.txt" from_work_dir="cat_output.ORF2LCA.tsv"> <filter>'orf2lca' in select_outputs</filter> <actions> <action name="comment_lines" type="metadata" default="1" /> <action name="column_names" type="metadata" default="ORF,${bin_col}lineage,bit-score" /> </actions> </data> <data name="contig2classification" format="tabular" label="${bcat}.contig2classification.txt" from_work_dir="cat_output.contig2classification.tsv"> <filter>'contig2classification' in select_outputs</filter> <actions> <action name="comment_lines" type="metadata" default="1" /> <action name="column_names" type="metadata" default="contig,classification,reason,lineage,lineage scores" /> </actions> </data> <data name="bin2classification" format="tabular" label="${bcat}.bin2classification.txt" from_work_dir="cat_output.bin2classification.tsv"> <filter>'bin2classification' in select_outputs</filter> <actions> <action name="comment_lines" type="metadata" default="1" /> <action name="column_names" type="metadata" default="bin,classification,reason,lineage,lineage scores" /> </actions> </data> <data name="orf2lca_names" format="tabular" label="${bcat}.ORF2LCA.names.txt"> <filter>names['add_names'] in ['both','orf2lca']</filter> <actions> <action name="comment_lines" type="metadata" default="1" /> <action name="column_names" type="metadata" default="ORF,${bin_col}lineage,bit-score,superkingdom,phylum,class,order,family,genus,species" /> </actions> </data> <data name="classification_names" format="tabular" label="${bcat}.${seqtype}2classification.names.txt"> <filter>names['add_names'] in ['both','classification']</filter> <actions> <action name="comment_lines" type="metadata" default="1" /> <action name="column_names" type="metadata" default="${seqtype},classification,reason,lineage,lineage scores,superkingdom,phylum,class,order,family,genus,species" /> </actions> </data> <data name="classification_summary" format="tabular" label="${bcat}.${seqtype}2classification.summary.txt"> <filter>'classification' in summarise</filter> <actions> <action name="comment_lines" type="metadata" default="4" /> <action name="column_names" type="metadata" default="rank,clade,number of ${sum_titles}" /> </actions> </data> </xml> <token name="@COMMON_HELP@"><![CDATA[ The Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) workflows are described at: https://github.com/dutilh/CAT - CAT contigs/CAT bins - runs Prodigal_ prokaryotic protein prediction on the fasta input. - CAT contigs/CAT bins - runs Diamond_ to align predicted proteins to the reference proteins in the CAT database. - CAT contigs/CAT bins - assigns taxonomic classification to fasta entries and ORFs based on alignments. - CAT add_names - annotates outputs with taxonomic names. - CAT summerise - reports number of assignments to each taxonomic name. A CAT database can either be installed by data_manager_cat or in the local history by CAT prepare tool. .. _Prodigal: https://github.com/hyattpd/Prodigal .. _Diamond: https://github.com/bbuchfink/diamond ]]></token> <token name="@OUTPUTS_HELP@"><![CDATA[ **OUTPUTS** Any of the files produced by the CAT workflow are available as outputs - Prodigal - predicted_proteins.faa - predicted_proteins.gff - Diamond - alignment.diamond - CAT contigs/bins - contigs/bin2classification.txt - ORF2LCA.txt - CAT add_names (optional) - contigs/bin2classification.names.txt - ORF2LCA.names.txt - CAT summarise (optional) - contigs/bin2classification.summary.txt ]]></token> <token name="@OPTIONS_HELP@"><![CDATA[ Optional arguments: -r, --range cut-off range after alignment [0-49] (default: 10). -f, --fraction fraction of bit-score support for each classification [0-0.99] (default: 0.5). -p, --proteins_fasta Path to predicted proteins fasta file. If supplied, CAT will skip the protein prediction step. -a, --diamond_alignment Path to DIAMOND alignment table. If supplied, CAT will skip the DIAMOND alignment step and directly classify the sequences. A predicted proteins fasta file should also be supplied with argument [-p / --proteins]. DIAMOND specific optional arguments: --sensitive Run DIAMOND in sensitive mode (default: not enabled). --block_size DIAMOND block-size parameter (default: 2.0). Lower numbers will decrease memory and temporary disk space usage. --index_chunks DIAMOND index-chunks parameter (default: 4). Set to 1 on high memory machines. The parameter has no effect on temporary disk space usage. --top DIAMOND top parameter [0-50] (default: 50). Governs hits within range of best hit that are written to the alignment file. This is not the [-r / --range] parameter! Setting the DIAMOND --top parameter You can speed up DIAMOND considerably, and at the same time greatly reduce disk usage, by setting the DIAMOND --top parameter to lower values. This will govern hits within range of the best hit that are written to the alignment file. You have to be very carefull to 1) not confuse this parameter with the r / --range parameter, which does a similar cut-off but after alignment and 2) be aware that if you want to run CAT or BAT again afterwards with different values of the -r / --range parameter, your options will be limited to the range you have chosen with --top earlier, because all hits that fall outside this range will not be included in the alignment file. Importantly, CAT and BAT currently do not warn you if you choose -r / --range in a second run higher than --top in a previous one, so it's up to you to remember this! If you have understood all this, or you do not plan to tune -r / --range at all afterwards, you can enjoy a huge speedup with much smaller alignment files! For CAT you can for example set --top 11 and for BAT --top 6. ]]></token> <xml name="citations"> <citations> <citation type="doi">https://doi.org/10.1101/072868</citation> <citation type="doi">https://doi.org/10.1186/s13059-019-1817-x</citation> <citation type="doi">https://doi.org/10.1038/nmeth.3176</citation> <citation type="doi">https://doi.org/10.1186/1471-2105-11-119</citation> <yield /> </citations> </xml> </macros>