Mercurial > repos > galaxy-australia > smudgeplot
comparison smudgeplot.xml @ 0:19462781bfe4 draft
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7a48d3cb54e682fb7dad612417391f327288dd0a-dirty"
author | galaxy-australia |
---|---|
date | Wed, 27 Apr 2022 06:10:53 +0000 |
parents | |
children | 28f5d7ea992a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:19462781bfe4 |
---|---|
1 <tool id="smudgeplot" name="Smudgeplot" version="@TOOL_VERSION@+galaxy+@VERSION_SUFFIX@"> | |
2 <description> - inference of ploidy and heterozygosity structure using whole genome sequencing</description> | |
3 | |
4 <macros> | |
5 <token name="@TOOL_VERSION@">0.2.5</token> | |
6 <token name="@VERSION_SUFFIX@">1</token> | |
7 </macros> | |
8 | |
9 <xrefs> | |
10 <xref type="bio.tools">smudgeplots</xref> | |
11 </xrefs> | |
12 | |
13 <requirements> | |
14 <requirement type="package" version="@TOOL_VERSION@">smudgeplot</requirement> | |
15 <requirement type="package" version="2.3.0">kmer-jellyfish</requirement> | |
16 </requirements> | |
17 | |
18 <command detect_errors="exit_code"><![CDATA[ | |
19 | |
20 #if $file.input.input_select == 'reads' | |
21 | |
22 ## ~~~~~~~~~~~~~~~ Generate kmer-dump with presets ~~~~~~~~~~~~~~~~~~~~~ | |
23 | |
24 ## Jellyfish kmer count | |
25 ## --------------------------------------------------------------------- | |
26 | |
27 #if $file.input.reads[0].is_of_type("fastq.gz") or $file.input.reads[0].is_of_type("fasta.gz") | |
28 gunzip -c | |
29 #for $f in $file.input.reads | |
30 #if $f | |
31 '$f' | |
32 #end if | |
33 | jellyfish count -m 21 -t 4 -s 1M -o 1_counts.jf -C /dev/stdin | |
34 #end for | |
35 | |
36 #else | |
37 jellyfish count -m 21 -t 4 -s 1M -o 1_counts.jf -C | |
38 #for $f in $file.input.reads | |
39 #if $f | |
40 '$f' | |
41 #end if | |
42 #end for | |
43 #end if | |
44 | |
45 && jellyfish histo 1_counts.jf > 1_kmer_k21.hist | |
46 | |
47 ## Calculate lower and upper kmer count cutoffs | |
48 ## --------------------------------------------------------------------- | |
49 | |
50 #if $file.input.lower_cutoff is not None: | |
51 && L=$file.input.lower_cutoff | |
52 #else | |
53 && L=\$(smudgeplot.py cutoff 1_kmer_k21.hist L) | |
54 #end if | |
55 | |
56 #if $file.input.upper_cutoff is not None: | |
57 && U=$file.input.upper_cutoff | |
58 #else | |
59 && U=\$(smudgeplot.py cutoff 1_kmer_k21.hist U) | |
60 #end if | |
61 | |
62 ## --------------------------------------------------------------------- | |
63 ## Dump and extract coverage | |
64 | |
65 && echo "Dump with cutoffs L=\$L, U=\$U" | |
66 && jellyfish dump -c -L \$L -U \$U 1_counts.jf > 2_dump.jf | |
67 && smudgeplot.py hetkmers -o 2_kmer_pairs 2_dump.jf | |
68 | |
69 #else | |
70 | |
71 ## ~~~~~~~~~~~~~~~~~~~ Use provided kmer dump ~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
72 | |
73 smudgeplot.py hetkmers -o 2_kmer_pairs '$file.input.dump' | |
74 | |
75 #end if | |
76 | |
77 ## --------------------------------------------------------------------- | |
78 ## Plot | |
79 | |
80 && smudgeplot.py plot 2_kmer_pairs_coverages.tsv -o my_genome | |
81 | |
82 ]]></command> | |
83 | |
84 <inputs> | |
85 <section name="file" title="File inputs" expanded="true"> | |
86 <conditional name="input"> | |
87 <param | |
88 name="input_select" type="select" label="Select input type" | |
89 help="For more control, create your own Kmer dump using Jellyfish. | |
90 See Smudgeplot on GitHub for more details: https://github.com/KamilSJaron/smudgeplot" | |
91 > | |
92 <option value="reads" selected="true"> Sequencing reads </option> | |
93 <option value="dump"> Kmer dump file </option> | |
94 </param> | |
95 | |
96 <when value="reads"> | |
97 <param | |
98 name="reads" type="data" format="fastq,fasta" | |
99 label="Sequencing reads" multiple="true" | |
100 help="Sequencing reads corresponding to your genome. | |
101 Don't worry about read pairing as it is not used in Kmer-counting. | |
102 If selecting multiple datasets, please do not mix datatypes!" | |
103 /> | |
104 | |
105 <param | |
106 name="lower_cutoff" | |
107 label="Lower kmer cutoff" | |
108 type="integer" | |
109 optional="true" | |
110 help="Optionally set a manual lower limit for filtering kmers with | |
111 smudgeplot hetkmers. If no value is set, a cutoff will be | |
112 estimated with smudgeplot cutoff. Use the GenomeScope tool to | |
113 visualize your kmer histogram when choosing cutoff values." | |
114 /> | |
115 | |
116 <param | |
117 name="upper_cutoff" | |
118 label="Upper kmer cutoff" | |
119 type="integer" | |
120 optional="true" | |
121 help="Optionally set a manual upper limit for filtering kmers with | |
122 smudgeplot hetkmers. If no value is set, a cutoff will be | |
123 estimated with smudgeplot cutoff. Use the GenomeScope tool to | |
124 visualize your kmer histogram when choosing cutoff values." | |
125 /> | |
126 </when> | |
127 | |
128 <when value="dump"> | |
129 <param | |
130 name="dump" type="data" format="txt" | |
131 label="Kmer dump" | |
132 help="Upload your own Kmer dump file created with the Jellyfish or KMC tool. | |
133 This enables control over kmer-counting parameters." | |
134 /> | |
135 </when> | |
136 </conditional> | |
137 </section> | |
138 | |
139 <param name="table_output" type="boolean" label="Output summary table"></param> | |
140 <param name="verbose_output" type="boolean" label="Output verbose summary"></param> | |
141 <param name="warnings_output" type="boolean" label="Output genome warnings"></param> | |
142 </inputs> | |
143 | |
144 <outputs> | |
145 <data | |
146 name="smudgeplot" format="png" | |
147 from_work_dir="my_genome_smudgeplot.png" | |
148 label="${tool.name} on ${on_string}: Smudgeplot" | |
149 /> | |
150 <data | |
151 name="smudgeplot_log" format="png" | |
152 from_work_dir="my_genome_smudgeplot_log10.png" | |
153 label="${tool.name} on ${on_string}: Smudgeplot (log10)" | |
154 /> | |
155 <data | |
156 name="genome_summary" format="tabular" | |
157 from_work_dir="my_genome_summary_table.tsv" | |
158 label="${tool.name} on ${on_string}: Genome summary table" | |
159 > | |
160 <filter>table_output</filter> | |
161 </data> | |
162 <data | |
163 name="genome_summary_verbose" format="txt" | |
164 from_work_dir="my_genome_verbose_summary.txt" | |
165 label="${tool.name} on ${on_string}: Genome verbose summary" | |
166 > | |
167 <filter>verbose_output</filter> | |
168 </data> | |
169 <data | |
170 name="genome_warnings" format="txt" | |
171 from_work_dir="my_genome_warnings.txt" | |
172 label="${tool.name} on ${on_string}: Genome warnings" | |
173 > | |
174 <filter>warnings_output</filter> | |
175 </data> | |
176 </outputs> | |
177 | |
178 <tests> | |
179 <test> | |
180 <param name="input_select" value="reads"/> | |
181 <param name="reads" value="test_reads.fasta" ftype="fasta"/> | |
182 <param name="lower_cutoff" value="2"/> | |
183 <param name="upper_cutoff" value="25"/> | |
184 <output name="smudgeplot" ftype="png" file="my_genome_smudgeplot.png"/> | |
185 </test> | |
186 </tests> | |
187 | |
188 <help><![CDATA[ | |
189 | |
190 .. class:: infomark | |
191 | |
192 **What it does** | |
193 | |
194 This tool extracts heterozygous kmer pairs from kmer count databases and performs gymnastics with them. We are able to disentangle genome structure by comparing the sum of kmer pair coverages (CovA + CovB) to their relative coverage (CovB / (CovA + CovB)). Such an approach also allows us to analyze obscure genomes with duplications, various ploidy levels, etc. | |
195 | |
196 Smudgeplots are computed from raw or even better from trimmed reads and show the haplotype structure using heterozygous kmer pairs. For example: | |
197 | |
198 .. image:: https://raw.githubusercontent.com/usegalaxy-au/tools-au/master/tools/smudgeplot/static/img/smudge.png | |
199 :height: 520 | |
200 :alt: Smudge plot image | |
201 | |
202 Every haplotype structure has a unique smudge on the graph and the heat of the smudge indicates how frequently the haplotype structure is represented in the genome compared to the other structures. The image above is an ideal case, where the sequencing coverage is sufficient to beautifully separate all the smudges, providing very strong and clear evidence of triploidy. | |
203 | |
204 Please see `Smudgeplot on GitHub <https://github.com/KamilSJaron/smudgeplot>`_ | |
205 for further documentation and tutorials. | |
206 | |
207 **Inputs** | |
208 | |
209 You have two choices when running Smudgeplot in Galaxy: | |
210 | |
211 1. Input reads file(s) for default kmer-counting with Jellyfish | |
212 | |
213 This should be at least one file which providing coverage of your genome of interest. | |
214 The tool accepts compressed (.gz) inputs. If choosing this option, you can | |
215 (optionally) specify manual cutoff values for the kmer dump step. The Smudgeplot | |
216 docs suggest that you can use GenomeScope on a kmer histogram in order to choose | |
217 reasonable lower and upper cutoff values. | |
218 | |
219 2. Input your own kmer dump file for more control of kmer counting parameters | |
220 | |
221 This file would be created by running ``jellyfish count`` and then ``jellyfish dump`` - the process is well described | |
222 `on GitHub <https://github.com/KamilSJaron/smudgeplot>`_. | |
223 | |
224 **Outputs** | |
225 | |
226 - ``smudgeplot.png`` smudgeplot image | |
227 - ``smudgeplot_log10.png`` smudgeplot with log scale | |
228 - ``my_genome_summary.tsv`` summarized genome statistics | |
229 - ``my_genome_verbose.txt`` detailed genome statistics | |
230 - ``my_genome_warnings.txt`` warnings emitted from the Smudgeplot tool | |
231 | |
232 **Default operation** | |
233 | |
234 If choosing reads as the input, a default kmer counting procedure will be used | |
235 to create a kmer dump. This default process is summarized as follows: | |
236 | |
237 - ``jellyfish count -m 21 > counts.jf`` | |
238 - ``jellyfish histo counts.jf > counts.hist`` | |
239 - ``smudgeplot.py cutoff counts.hist`` to get kmer cutoff values (U & L) | |
240 - ``jellyfish dump -c -L <L> -U <U> counts.jf > dump.jf`` | |
241 | |
242 The kmer dump file is then used to create a smudgeplot: | |
243 | |
244 - ``smudgeplot.py hetkmers -o kmer_pairs dump.jf`` | |
245 - ``smudgeplot.py plot kmer_pairs_coverages.tsv -o my_genome`` | |
246 | |
247 ]]></help> | |
248 </tool> |