comparison drep_dereplicate.xml @ 1:ef7cd2e7bc05 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/drep commit 5e6e589002d554be180e575080e9ad66cc78ed74"
author iuc
date Sat, 12 Feb 2022 17:40:42 +0000
parents 8dfcdbeaeed8
children 368cb4bef9d8
comparison
equal deleted inserted replaced
0:8dfcdbeaeed8 1:ef7cd2e7bc05
1 <tool id="drep_dereplicate" name="dRep dereplicate" version="@VERSION@.0" python_template_version="3.5"> 1 <tool id="drep_dereplicate" name="dRep dereplicate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" python_template_version="3.5">
2 <description>De-replicate a list of genomes</description> 2 <description>De-replicate a list of genomes</description>
3 <expand macro="biotools" />
3 <macros> 4 <macros>
4 <import>macros.xml</import> 5 <import>macros.xml</import>
5 </macros> 6 </macros>
6 <expand macro="requirements" /> 7 <expand macro="requirements">
8 <requirement type="package" version="1.1.3">checkm-genome</requirement>
9 </expand>
7 <command detect_errors="exit_code"><![CDATA[ 10 <command detect_errors="exit_code"><![CDATA[
8 @PREPARE_GENOMES@ 11 @PREPARE_GENOMES@
9 dRep dereplicate outdir 12 dRep dereplicate outdir
10 @FILTER_OPTIONS@ 13 @GENOMES@
11 @GENOME_COMPARISON_OPTIONS@ 14 @FILTER_OPTIONS@
12 @CLUSTERING_OPTIONS@ 15 @QUALITY_ASSESSMENT_OPTIONS@
13 @SCORING_OPTIONS@ 16 @COMPARISON_CLUSTERING_OPTIONS@
14 @TAXONOMY_OPTIONS@ 17 @SCORING_OPTIONS@
15 @WARNING_OPTIONS@ 18 @WARNING_OPTIONS@
16 @GENOMES@ 19 --debug
17 --debug 20 || (rc=\$?;
18 || (rc=\$?; 21 ls -ltr `find outdir -type f`;
19 ls -ltr `find outdir -type f`; 22 cat outdir/data/checkM/checkM_outdir/checkm.log;
20 cat outdir/data/checkM/checkM_outdir/checkm.log; 23 cat outdir/log/logger.log;
21 cat outdir/log/logger.log; 24 exit \$rc)
22 exit \$rc)
23 ]]></command> 25 ]]></command>
24 <inputs> 26 <inputs>
25 <expand macro="genomes"/> 27 <expand macro="genomes"/>
26 <expand macro="filtering_options"/> 28 <expand macro="filtering_options"/>
27 <expand macro="genome_comparison_options"/> 29 <expand macro="quality_assessment_options"/>
28 <expand macro="clustering_options"/> 30 <expand macro="comparison_clustering_options"/>
29 <expand macro="scoring_options"/> 31 <expand macro="scoring_options"/>
30 <expand macro="taxonomy_options"/>
31 <expand macro="warning_options"/> 32 <expand macro="warning_options"/>
32 <expand macro="select_drep_outputs"/> 33 <expand macro="select_drep_outputs"/>
33 </inputs> 34 </inputs>
34 <outputs> 35 <outputs>
35 <collection name="dereplicated_genomes" type="list" label="dereplicated_genomes"> 36 <collection name="dereplicated_genomes" type="list" label="dereplicated_genomes">
36 <discover_datasets pattern="__designation__" directory="outdir/dereplicated_genomes" ext='fasta'/> 37 <discover_datasets pattern="__designation__" directory="outdir/dereplicated_genomes" ext='fasta'/>
37 </collection> 38 </collection>
38 <expand macro="drep_outputs" /> 39 <expand macro="drep_outputs" />
39 </outputs> 40 </outputs>
40 <tests> 41 <tests>
41 <expand macro="test_defaults_log"> 42 <test expect_num_outputs="8">
42 <has_text text="dRep dereplicate finished" /> 43 <expand macro="test_string_inputs"/>
43 </expand> 44 <expand macro="test_default_filtering_options"/>
44 <test> 45 <expand macro="test_default_quality_assessment_options"/>
45 <param name="genomes" ftype="fasta" value="Enterococcus_casseliflavus_EC20.fasta,Enterococcus_faecalis_T2.fna,Enterococcus_faecalis_TX0104.fa"/> 46 <expand macro="test_default_comparison_clustering_options"/>
46 <conditional name="filter"> 47 <expand macro="test_default_scoring_options"/>
47 <param name="set_options" value="yes"/> 48 <expand macro="test_default_warning_options"/>
48 <conditional name="quality"> 49 <expand macro="test_default_select_drep_outputs"/>
49 <param name="source" value="checkm"/> 50 <expand macro="test_log_output">
50 <param name="checkM_method" value="taxonomy_wf"/> 51 <has_text text="dRep dereplicate finished" />
51 </conditional> 52 </expand>
52 </conditional> 53 </test>
53 <output name="log"> 54 <test expect_num_outputs="8">
54 <assert_contents> 55 <expand macro="test_integer_inputs"/>
55 <has_text text="dRep dereplicate finished" /> 56 <expand macro="test_default_filtering_options"/>
56 </assert_contents> 57 <expand macro="test_default_quality_assessment_options"/>
57 </output> 58 <expand macro="test_default_comparison_clustering_options"/>
59 <expand macro="test_default_scoring_options"/>
60 <expand macro="test_default_warning_options"/>
61 <expand macro="test_default_select_drep_outputs"/>
62 <expand macro="test_log_output">
63 <has_text text="dRep dereplicate finished" />
64 </expand>
58 </test> 65 </test>
59 </tests> 66 </tests>
60 <help><![CDATA[ 67 <help><![CDATA[
61 **dRep dereplicate** 68 **dRep dereplicate**
62 69
63 `dRep <https://drep.readthedocs.io/en/latest/overview.html>`_ performs rapid pair-wise comparison of genome sets. 70 `dRep <https://drep.readthedocs.io/en/latest/overview.html>`_ performs rapid pair-wise comparison of genome sets.
64
65
66
67
68 71
69 `De-replication <https://drep.readthedocs.io/en/latest/overview.html#genome-de-replication>`_ is the process of identifying sets of genomes that are the “same” in a list of genomes, and removing all but the “best” genome from each redundant set. How similar genomes need to be to be considered “same”, how to determine which genome is “best”, and other important decisions are discussed in `Choosing parameters. <https://drep.readthedocs.io/en/latest/choosing_parameters.html>`_ Detailed options for each module are described at: https://drep.readthedocs.io/en/latest/module_descriptions.html 72 `De-replication <https://drep.readthedocs.io/en/latest/overview.html#genome-de-replication>`_ is the process of identifying sets of genomes that are the “same” in a list of genomes, and removing all but the “best” genome from each redundant set. How similar genomes need to be to be considered “same”, how to determine which genome is “best”, and other important decisions are discussed in `Choosing parameters. <https://drep.readthedocs.io/en/latest/choosing_parameters.html>`_ Detailed options for each module are described at: https://drep.readthedocs.io/en/latest/module_descriptions.html
70 73
71 A common use for genome de-replication is the case of individual assembly of metagenomic data. If metagenomic samples are collected in a series, a common way to assemble the short reads is with a “co-assembly”. That is, combining the reads from all samples and assembling them together. The problem with this is assembling similar strains together can severely fragment assemblies, precluding recovery of a good genome bin. An alternative option is to assemble each sample separately, and then “de-replicate” the bins from each assembly to make a final genome set. 74 A common use for genome de-replication is the case of individual assembly of metagenomic data. If metagenomic samples are collected in a series, a common way to assemble the short reads is with a “co-assembly”. That is, combining the reads from all samples and assembling them together. The problem with this is assembling similar strains together can severely fragment assemblies, precluding recovery of a good genome bin. An alternative option is to assemble each sample separately, and then “de-replicate” the bins from each assembly to make a final genome set.
72 75
85 88
86 89
87 **OUTPUTS** 90 **OUTPUTS**
88 91
89 - `Figures <https://drep.readthedocs.io/en/latest/example_output.html#figures>`_ that show the relationship of the Genome inputs. 92 - `Figures <https://drep.readthedocs.io/en/latest/example_output.html#figures>`_ that show the relationship of the Genome inputs.
90 - `Warnings <https://drep.readthedocs.io/en/latest/example_output.html#warnings>`_ report two things: de-replicated genome similarity and secondary clusters that were almost different. 93 - `Warnings <https://drep.readthedocs.io/en/latest/example_output.html#warnings>`_ report two things: de-replicated genome similarity and secondary clusters that were almost different.
91 - A Dataset collection of the “best” genome of each secondary cluster. 94 - A Dataset collection of the “best” genome of each secondary cluster.
92 - `Tables from intermediate steps <https://drep.readthedocs.io/en/latest/advanced_use.html>`_ 95 - `Tables from intermediate steps <https://drep.readthedocs.io/en/latest/advanced_use.html>`_
93 96
94 * Chdb.csv # CheckM results for Bdb 97 * Chdb.csv # CheckM results for Bdb
95 * Widb.csv # Winning genomes' checkM information 98 * Widb.csv # Winning genomes' checkM information
96 99
97 100
98 ]]></help> 101 ]]></help>
99 <expand macro="citations" /> 102 <expand macro="citations" />
100 </tool> 103 </tool>