Mercurial > repos > iuc > checkm_lineage_wf
diff lineage_wf.xml @ 0:760dc0c0e689 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/checkm commit 2a3b068a98bf0e913dc03e0d5c2182cfd102cf27
author | iuc |
---|---|
date | Fri, 29 Jul 2022 20:30:08 +0000 |
parents | |
children | f0107b9f2dc3 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lineage_wf.xml Fri Jul 29 20:30:08 2022 +0000 @@ -0,0 +1,295 @@ +<tool id="checkm_lineage_wf" name="CheckM lineage_wf" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description> + Assessing the completeness and contamination of genome bins using lineage-specific marker sets + </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="biotools"/> + <expand macro="requirements"/> + <expand macro="version"/> + <command detect_errors="exit_code"><![CDATA[ +@BIN_INPUTS@ + +checkm lineage_wf + 'bins' + 'output' + $tree_analyze.reduced_tree + $tree_analyze.ali + $tree_analyze.nt + $tree_analyze.genes + --unique '$lineage_set.unique' + --multi '$lineage_set.multi' + $lineage_set.force_domain + $lineage_set.no_refinement + $qa.individual_markers + $qa.skip_adj_correction + $qa.skip_pseudogene_correction + --aai_strain $qa.aai_strain + $qa.ignore_thresholds + --e_value $qa.e_value + --length $qa.length + --file '$results' + --tab_table + --extension 'fasta' + --threads \${GALAXY_SLOTS:-1} + --pplacer_threads \${GALAXY_SLOTS:-1} + ]]></command> + <inputs> + <expand macro="bin_inputs" /> + <section name="tree_analyze" title="Bin placement in the genome tree and marker gene identification"> + <expand macro="tree_params" /> + </section> + <section name="lineage_set" title="Bin lineage-specific marker set inference"> + <expand macro="lineage_set_params" /> + </section> + <section name="qa" title="Bin assessment"> + <expand macro="qa_params" /> + </section> + <param name="extra_outputs" type="select" multiple="true" optional="true" label="Extra outputs"> + <option value="phylo_hmm_info">Phylogenetic HMM model info for each bin</option> + <option value="bin_stats_tree">Phylogenetic bin stats</option> + <option value="hmmer_tree">Phylogenetic HMM hits to each bin</option> + <option value="concatenated_tre">Concatenated tree</option> + <option value="concatenated_fasta">Concatenated masked sequences</option> + <expand macro="tree_extra_output_options" /> + <option value="marker_file">Marker genes</option> + <option value="hmmer_analyze">Marker gene HMM hits to each bin</option> + <option value="bin_stats_analyze">Marker gene bin stats</option> + <option value="checkm_hmm_info">Marker gene HMM info for each bin</option> + <expand macro="analyze_extra_output_options" /> + <option value="bin_stats_ext">Marker gene bin extensive stats</option> + <expand macro="qa_extra_output_options" /> + </param> + </inputs> + <outputs> + <data name="results" format="tabular" label="${tool.name} on ${on_string}: Bin statistics"/> + <!--tree outputs--> + <data name="phylo_hmm_info" format="zip" from_work_dir="output/storage/phylo_hmm_info.pkl.gz" label="${tool.name} on ${on_string}: Phylogenetic HMM model info for each bin"> + <filter>'phylo_hmm_info' in extra_outputs</filter> + </data> + <data name="bin_stats_tree" format="tabular" from_work_dir="output/storage/bin_stats.tree.tsv" label="${tool.name} on ${on_string}: Phylogenetic bin stats"> + <filter>'bin_stats_tree' in extra_outputs</filter> + </data> + <collection name="hmmer_tree" type="list" label="${tool.name} on ${on_string}: Phylogenetic HMM hits to each bin"> + <filter>'hmmer_tree' in extra_outputs</filter> + <discover_datasets pattern="(?P<designation>.*)/hmmer\.tree\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/> + </collection> + <data name="concatenated_fasta" format="fasta" from_work_dir="output/storage/tree/concatenated.fasta" label="${tool.name} on ${on_string}: Concatenated masked sequences"> + <filter>'concatenated_fasta' in extra_outputs</filter> + </data> + <data name="concatenated_tre" format="phyloxml" from_work_dir="output/storage/tree/concatenated.tre" label="${tool.name} on ${on_string}: Concatenated tree"> + <filter>'concatenated_tre' in extra_outputs</filter> + </data> + <collection name="hmmer_tree_ali" type="list" label="${tool.name} on ${on_string}: Phylogenetic HMMER alignment file for each bin"> + <filter>tree_analyze['ali'] and 'hmmer_tree_ali' in extra_outputs</filter> + <discover_datasets pattern="(?P<designation>.*)/hmmer\.tree\.ali\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/> + </collection> + <data name="concatenated_pplacer_json" format="json" from_work_dir="output/storage/tree/concatenated.pplacer.json" label="${tool.name} on ${on_string}: Concatenated pplacer JSON"> + <filter>'concatenate_pplacer_json' in extra_outputs</filter> + </data> + <collection name="genes_fna" type="list" label="${tool.name} on ${on_string}: Protein gene sequences for each bin"> + <filter>not tree_analyze['genes'] and tree_analyze['nt'] and 'genes_fna' in extra_outputs</filter> + <discover_datasets pattern="(?P<designation>.*)/genes\.fna" format="fasta" directory="output/bins/" recurse="true" match_relative_path="true"/> + </collection> + <collection name="genes_faa" type="list" label="${tool.name} on ${on_string}: Nucleotide gene sequences for each bin"> + <filter>'genes_faa' in extra_outputs</filter> + <discover_datasets pattern="(?P<designation>.*)/genes\.faa" format="fasta" directory="output/bins/" recurse="true" match_relative_path="true"/> + </collection> + <collection name="genes_gff" type="list" label="${tool.name} on ${on_string}: Gene feature files for each bin"> + <filter>not tree_analyze['genes'] and 'genes_gff' in extra_outputs</filter> + <discover_datasets pattern="(?P<designation>.*)/genes\.gff" format="gff" directory="output/bins/" recurse="true" match_relative_path="true"/> + </collection> + <!--lineage_set outputs--> + <data name="marker_file" format="tabular" from_work_dir="output/lineage.ms" label="${tool.name} on ${on_string}: Marker genes"> + <filter>'marker_file' in extra_outputs</filter> + </data> + <!--analyze outputs--> + <collection name="hmmer_analyze" type="list" label="${tool.name} on ${on_string}: Marker gene HMM hits to each bin"> + <filter>'hmmer_analyze' in extra_outputs</filter> + <discover_datasets pattern="(?P<designation>.*)/hmmer\.analyze\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/> + </collection> + <data name="bin_stats_analyze" format="tabular" from_work_dir="output/storage/bin_stats.analyze.tsv" label="${tool.name} on ${on_string}: Marker gene bin stats"> + <filter>'bin_stats_analyze' in extra_outputs</filter> + </data> + <data name="checkm_hmm_info" format="zip" from_work_dir="output/storage/checkm_hmm_info.pkl.gz" label="${tool.name} on ${on_string}: Marker gene HMM info for each bin" > + <filter>'checkm_hmm_info' in extra_outputs</filter> + </data> + <collection name="hmmer_analyze_ali" type="list" label="${tool.name} on ${on_string}: HMMER alignment file for each bin"> + <filter>tree_analyze['ali'] and 'hmmer_analyze_ali' in extra_outputs</filter> + <discover_datasets pattern="(?P<designation>.*)/hmmer\.analyze\.ali\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/> + </collection> + <!--qa outputs--> + <data name="bin_stats_ext" format="tabular" from_work_dir="output/storage/bin_stats_ext.tsv" label="${tool.name} on ${on_string}: Marker gene bin extensive stats"> + <filter>'bin_stats_ext' in extra_outputs</filter> + </data> + <expand macro="qa_extra_outputs" /> + + </outputs> + <tests> + <test expect_num_outputs="1"> + <conditional name="bins"> + <param name="select" value="individual"/> + <param name="bins_ind" ftype="fasta" value="637000110.fna"/> + </conditional> + <section name="tree_analyze"> + <param name="reduced_tree" value="true"/> + <param name="ali" value="false"/> + <param name="nt" value="false"/> + <param name="genes" value="false"/> + </section> + <section name="lineage_set"> + <param name="unique" value="10"/> + <param name="multi" value="10"/> + <param name="force_domain" value="false"/> + <param name="no_refinement" value="false"/> + </section> + <section name="qa"> + <param name="individual_markers" value="false"/> + <param name="skip_adj_correction" value="false"/> + <param name="skip_pseudogene_correction" value="false"/> + <param name="aai_strain" value="0.9"/> + <param name="ignore_thresholds" value="false"/> + <param name="e_value" value="1e-10"/> + <param name="length" value="0.7"/> + </section> + <param name="extra_outputs" value=""/> + <output name="results" ftype="tabular"> + <assert_contents> + <has_text text="637000110"/> + <has_text text="Marker lineage"/> + <has_text text="k__Bacteria"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="12"> + <conditional name="bins"> + <param name="select" value="collection"/> + <param name="bins_coll"> + <collection type="list"> + <element name="637000110" ftype="fasta" value="637000110.fna"/> + </collection> + </param> + </conditional> + <section name="tree_analyze"> + <param name="reduced_tree" value="true"/> + <param name="ali" value="true"/> + <param name="nt" value="false"/> + <param name="genes" value="false"/> + </section> + <section name="lineage_set"> + <param name="unique" value="10"/> + <param name="multi" value="10"/> + <param name="force_domain" value="false"/> + <param name="no_refinement" value="false"/> + </section> + <section name="qa"> + <param name="individual_markers" value="false"/> + <param name="skip_adj_correction" value="false"/> + <param name="skip_pseudogene_correction" value="false"/> + <param name="aai_strain" value="0.9"/> + <param name="ignore_thresholds" value="false"/> + <param name="e_value" value="1e-10"/> + <param name="length" value="0.7"/> + </section> + <param name="extra_outputs" value="phylo_hmm_info,bin_stats_tree,hmmer_tree,concatenated_tre,concatenated_fasta,marker_file,hmmer_analyze,bin_stats_analyze,bin_stats_ext,checkm_hmm_info,marker_gene_stats"/> + <output name="results" ftype="tabular"> + <assert_contents> + <has_text text="637000110"/> + <has_text text="Marker lineage"/> + <has_text text="k__Bacteria"/> + </assert_contents> + </output> + <output name="phylo_hmm_info" ftype="zip"> + <assert_contents> + <has_size value="1575" delta="10"/> + </assert_contents> + </output> + <output name="bin_stats_tree" ftype="tabular"> + <assert_contents> + <has_text text="637000110"/> + <has_text text="Mean scaffold length"/> + <has_text text="Translation table"/> + </assert_contents> + </output> + <output_collection name="hmmer_tree" count="1"> + <element name="637000110" ftype="txt"> + <assert_contents> + <has_text text="target name"/> + <has_text text="AC_000091_79"/> + </assert_contents> + </element> + </output_collection> + <output name="concatenated_fasta" ftype="fasta"> + <assert_contents> + <has_text text="637000110"/> + <has_text text="MLKAGVHFGHQ"/> + </assert_contents> + </output> + <output name="concatenated_tre" ftype="phyloxml"> + <assert_contents> + <has_text text="IMG_646564547"/> + <has_text text="g__Methanocaldococcus"/> + </assert_contents> + </output> + <output name="marker_file" ftype="tabular"> + <assert_contents> + <has_text text="Lineage Marker File"/> + <has_text text="637000110"/> + <has_text text="k__Bacteria"/> + </assert_contents> + </output> + <output_collection name="hmmer_analyze" count="1"> + <element name="637000110" ftype="txt"> + <assert_contents> + <has_text text="target name"/> + <has_text text="AC_000091_859"/> + </assert_contents> + </element> + </output_collection> + <output name="bin_stats_analyze" ftype="tabular"> + <assert_contents> + <has_text text="637000110"/> + <has_text text="GC"/> + <has_text text="GC std"/> + </assert_contents> + </output> + <output name="bin_stats_ext" ftype="tabular"> + <assert_contents> + <has_text text="637000110"/> + <has_text text="marker lineage"/> + </assert_contents> + </output> + <output name="checkm_hmm_info" ftype="zip"> + <assert_contents> + <has_size value="17052" delta="200"/> + </assert_contents> + </output> + <output name="marker_gene_stats" ftype="tabular"> + <assert_contents> + <has_text text="637000110"/> + <has_text text="AC_000091_79"/> + <has_text text="PF00318.15"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +@HELP_HEADER@ + +This command runs the recommended workflow for assessing the completeness and contamination of genome bins is to use lineage-specific marker sets. +This workflow consists of 4 mandatory (M) steps and 1 recommended (R) step: + +- (M) The tree command places genome bins into a reference genome tree +- (R) The tree_qa command indicates the number of phylogenetically informative marker genes found in each genome bin along with a taxonomic string indicating its approximate placement in the tree. + + If desired, genome bins with few phylogenetically marker genes may be removed in order to reduce the computational requirements of the following commands. + Alternatively, if only genomes from a particular taxonomic group are of interest these can be moved to a new directory and analyzed separately. + +- (M) The lineage_set command creates a marker file indicating lineage-specific marker sets suitable for evaluating each genome. +- (M) The analyze command identifies marker genes and estimates the completeness and contamination of each genome bin. +- (M) The qa command can be used to produce different tables summarizing the quality of each genome bin. + + ]]></help> + <expand macro="citations"/> +</tool>