Mercurial > repos > iuc > checkm_taxonomy_wf
diff taxonomy_wf.xml @ 0:91230b9141c1 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/checkm commit 2a3b068a98bf0e913dc03e0d5c2182cfd102cf27
| author | iuc |
|---|---|
| date | Fri, 29 Jul 2022 20:32:07 +0000 |
| parents | |
| children | 2e995a9aa00e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/taxonomy_wf.xml Fri Jul 29 20:32:07 2022 +0000 @@ -0,0 +1,227 @@ +<tool id="checkm_taxonomy_wf" name="CheckM taxonomy_wf" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description> + Analyze all genome bins with the same marker set + </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="biotools"/> + <expand macro="requirements"/> + <expand macro="version"/> + <command detect_errors="exit_code"><![CDATA[ +@BIN_INPUTS@ + +checkm taxonomy_wf + '$rank_taxon.rank' + '$rank_taxon.taxon.fields.value' + 'bins' + 'output' + $analyze.ali + $analyze.nt + $analyze.genes + $qa.individual_markers + $qa.skip_adj_correction + $qa.skip_pseudogene_correction + --aai_strain $qa.aai_strain + $qa.ignore_thresholds + --e_value $qa.e_value + --length $qa.length + --file '$results' + --tab_table + --extension 'fasta' + --threads \${GALAXY_SLOTS:-1} + ]]></command> + <inputs> + <expand macro="rank_taxon"/> + <expand macro="bin_inputs"/> + <section name="analyze" title="Marker gene identification"> + <expand macro="analyze_params" /> + </section> + <section name="qa" title="Bin assessment"> + <expand macro="qa_params" /> + </section> + <param name="extra_outputs" type="select" multiple="true" optional="true" label="Extra outputs"> + <option value="marker_file">Marker genes</option> + <option value="hmmer_analyze">Marker gene HMM hits to each bin</option> + <option value="bin_stats_analyze">Marker gene bin stats</option> + <option value="checkm_hmm_info">Marker gene HMM info for each bin</option> + <expand macro="analyze_extra_output_options" /> + <option value="bin_stats_ext">Marker gene bin extensive stats</option> + <expand macro="qa_extra_output_options" /> + </param> + </inputs> + <outputs> + <data name="results" format="tabular" label="${tool.name} on ${on_string}: Bin statistics"/> + <!-- taxon_set outputs --> + <data name="marker_file" format="tabular" from_work_dir="output/*.ms" label="${tool.name} on ${on_string}: Marker genes"> + <filter>'marker_file' in extra_outputs</filter> + </data> + <!--analyze outputs--> + <collection name="hmmer_analyze" type="list" label="${tool.name} on ${on_string}: Marker gene HMM hits to each bin"> + <filter>'hmmer_analyze' in extra_outputs</filter> + <discover_datasets pattern="(?P<designation>.*)/hmmer\.analyze\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/> + </collection> + <data name="bin_stats_analyze" format="tabular" from_work_dir="output/storage/bin_stats.analyze.tsv" label="${tool.name} on ${on_string}: Marker gene bin stats"> + <filter>'bin_stats_analyze' in extra_outputs</filter> + </data> + <data name="checkm_hmm_info" format="zip" from_work_dir="output/storage/checkm_hmm_info.pkl.gz" label="${tool.name} on ${on_string}: Marker gene HMM info for each bin" > + <filter>'checkm_hmm_info' in extra_outputs</filter> + </data> + <collection name="hmmer_analyze_ali" type="list" label="${tool.name} on ${on_string}: HMMER alignment file for each bin"> + <filter>analyze['ali'] and 'hmmer_analyze_ali' in extra_outputs</filter> + <discover_datasets pattern="(?P<designation>.*)/hmmer\.analyze\.ali\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/> + </collection> + <!--qa outputs--> + <data name="bin_stats_ext" format="tabular" from_work_dir="output/storage/bin_stats_ext.tsv" label="${tool.name} on ${on_string}: Marker gene bin extensive stats"> + <filter>'bin_stats_ext' in extra_outputs</filter> + </data> + <expand macro="qa_extra_outputs" /> + <data name="marker_gene_stats" format="tabular" from_work_dir="output/storage/marker_gene_stats.tsv" label="${tool.name} on ${on_string}: Marker gene statistics"> + <filter>'marker_gene_stats' in extra_outputs</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="1"> + <conditional name="bins"> + <param name="select" value="collection"/> + <param name="bins_coll"> + <collection type="list"> + <element name="637000110" ftype="fasta" value="637000110.fna"/> + </collection> + </param> + </conditional> + <conditional name="rank_taxon"> + <param name="rank" value="life"/> + <param name="taxon" value="Prokaryote"/> + </conditional> + <section name="analyze"> + <param name="ali" value="false"/> + <param name="nt" value="false"/> + <param name="genes" value="false"/> + </section> + <section name="qa"> + <param name="individual_markers" value="false"/> + <param name="skip_adj_correction" value="false"/> + <param name="skip_pseudogene_correction" value="false"/> + <param name="aai_strain" value="0.9"/> + <param name="ignore_thresholds" value="false"/> + <param name="e_value" value="1e-10"/> + <param name="length" value="0.7"/> + </section> + <param name="extra_outputs" value=""/> + <output name="results" ftype="tabular"> + <assert_contents> + <has_text text="637000110"/> + <has_text text="Marker lineage"/> + <has_text text="Prokaryote"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="8"> + <conditional name="bins"> + <param name="select" value="collection"/> + <param name="bins_coll"> + <collection type="list"> + <element name="637000110" ftype="fasta" value="637000110.fna"/> + </collection> + </param> + </conditional> + <conditional name="rank_taxon"> + <param name="rank" value="life"/> + <param name="taxon" value="Prokaryote"/> + </conditional> + <section name="analyze"> + <param name="ali" value="true"/> + <param name="nt" value="false"/> + <param name="genes" value="false"/> + </section> + <section name="qa"> + <param name="individual_markers" value="false"/> + <param name="skip_adj_correction" value="false"/> + <param name="skip_pseudogene_correction" value="false"/> + <param name="aai_strain" value="0.9"/> + <param name="ignore_thresholds" value="false"/> + <param name="e_value" value="1e-10"/> + <param name="length" value="0.7"/> + </section> + <param name="extra_outputs" value="marker_file,hmmer_analyze,hmmer_analyze_ali,bin_stats_analyze,bin_stats_ext,checkm_hmm_info,marker_gene_stats"/> + <output name="results" ftype="tabular"> + <assert_contents> + <has_text text="637000110"/> + <has_text text="Marker lineage"/> + <has_text text="Prokaryote"/> + </assert_contents> + </output> + <output name="marker_file" ftype="tabular"> + <assert_contents> + <has_text text="[Taxon Marker File]"/> + <has_text text="Prokaryote"/> + <has_text text="PF01000.21"/> + </assert_contents> + </output> + <output_collection name="hmmer_analyze" count="1"> + <element name="637000110" ftype="txt"> + <assert_contents> + <has_text text="target name"/> + <has_text text="AC_000091_570"/> + </assert_contents> + </element> + </output_collection> + <output name="bin_stats_analyze" ftype="tabular"> + <assert_contents> + <has_text text="637000110"/> + <has_text text="GC"/> + <has_text text="GC std"/> + </assert_contents> + </output> + <output name="checkm_hmm_info" ftype="zip"> + <assert_contents> + <has_size value="4373" delta="100"/> + </assert_contents> + </output> + <output_collection name="hmmer_analyze_ali" count="1"> + <element name="637000110" ftype="txt"> + <assert_contents> + <has_text text="hmmsearch"/> + <has_text text="Query"/> + <has_text text="TOBE_2"/> + </assert_contents> + </element> + </output_collection> + <output name="bin_stats_ext" ftype="tabular"> + <assert_contents> + <has_text text="637000110"/> + <has_text text="marker lineage"/> + </assert_contents> + </output> + <output name="marker_gene_stats" ftype="tabular"> + <assert_contents> + <has_text text="637000110"/> + <has_text text="AC_000091_79"/> + <has_text text="PF00318.15"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +@HELP_HEADER@ + +This command runs recommended workflow to analyze all genome bins with the same marker set. A common example would be a set of genomes from the same taxonomic group. + +The workflow for using a taxonomic-specific marker set consists of 3 mandatory (M) steps and 1 recommended (R) step: + +- (R) The taxon_list command produces a table indicating all taxa for which a marker set can be produced. All support taxa at a given taxonomic rank can be produced by passing taxon_list the --rank flag +- (M) The taxon_set command is used to produce marker sets for a specific taxon: +- (M) The analyze command identifies marker genes within each genome bin and estimate completeness and contamination. All putative genomes to be analyzed must be provided. +- (M) The qa command is used to produce different tables summarizing the quality of each genome bin. + +Inputs +====== + + +Outputs +======= + + ]]></help> + <expand macro="citations"/> +</tool>
