comparison cactus_cactus.xml @ 0:51c3c42bc644 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/cactus commit 827619d22d2931d8fb34ed6844cfa91433e1ac2c
author iuc
date Tue, 06 Feb 2024 00:30:39 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:51c3c42bc644
1 <tool id="cactus_cactus" name="Cactus" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
2 <description>whole-genome multiple sequence alignment</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="xrefs"/>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9
10 ## Set up seqfile
11
12 #if $aln_mode.aln_mode_select == 'interspecies':
13 cat $aln_mode.in_tree >> seqfile.txt &&
14 #end if
15 #for $seq in $in_seqs:
16 #set seq_fn = str($seq.label) + '.' + $seq.fasta.ext
17 ln -s '$seq.fasta' '$seq_fn' &&
18 printf '%s %s\n' '$seq.label' '$seq_fn' >> seqfile.txt
19 &&
20 #end for
21
22 ## Run cactus
23
24 #if $aln_mode.aln_mode_select == 'intraspecies':
25 ## Run cactus-pangenome
26 ## --reference should be the first argument
27 ## https://github.com/ComparativeGenomicsToolkit/cactus/issues/1093#issuecomment-1620088688
28 cactus-pangenome
29 --reference $aln_mode.ref_level
30 --binariesMode local
31 --maxCores \${GALAXY_SLOTS:-4}
32 --maxMemory \${GALAXY_MEMORY_MB:-16384}M
33 --outDir ./
34 --outName alignment
35 jobStore
36 seqfile.txt
37 #else if $aln_mode.aln_mode_select == 'interspecies':
38 ## Run cactus normally
39 cactus
40 --binariesMode local
41 --maxCores \${GALAXY_SLOTS:-4}
42 --maxMemory \${GALAXY_MEMORY_MB:-16384}M
43 --workDir ./
44 jobStore
45 seqfile.txt
46 alignment.full.hal
47 #end if
48
49 ]]></command>
50 <inputs>
51 <conditional name="aln_mode">
52 <param name="aln_mode_select" type="select" label="Alignment mode" help="The taxonomic relationship between input genomes. If genomes are from multiple individuals of the same species, select 'Within-species'">
53 <option value="interspecies" selected="true">Between-species</option>
54 <option value="intraspecies">Within-species</option>
55 </param>
56 <when value="interspecies">
57 <param name="in_tree" type="data" format="nhx" label="Guide tree" help="Phylogenetic tree in Newick format. Required by Cactus to achieve linear scaling with number of input genomes"/>
58 </when>
59 <when value="intraspecies">
60 <param name="ref_level" type="text" value="" label="Reference genome" help="Pangenomes from Minigraph-Cactus depend on a predetermined reference genome. Specify one of the Input Genomes as the reference genome. This must match the label used in 'Genome Label'.">
61 <sanitizer invalid_char="">
62 <valid initial="string.letters,string.digits">
63 <add value="_"/>
64 </valid>
65 </sanitizer>
66 <validator type="regex">[0-9a-zA-Z_]+</validator>
67 </param>
68 </when>
69 </conditional>
70 <repeat name="in_seqs" title="Input genome">
71 <param name="label" type="text" value="" label="Genome label" help="NO SPACES. Must match a label in the guide tree.">
72 <sanitizer invalid_char="">
73 <valid initial="string.letters,string.digits">
74 <add value="_"/>
75 </valid>
76 </sanitizer>
77 <validator type="regex">[0-9a-zA-Z_]+</validator>
78 </param>
79 <param name="fasta" type="data" format="fasta,fasta.gz" label="Genome Sequence" help="Input genome"/>
80 </repeat>
81 <!-- add an option for root -->
82 <!-- root mr -->
83 </inputs>
84 <outputs>
85 <data name="out_hal" format="h5" from_work_dir="alignment.full.hal" label="${tool.name} on ${on_string} (HAL file)">
86 </data>
87 <data name="out_gfa" format="gfa2.gz" from_work_dir="alignment.gfa.gz" label="${tool.name} on ${on_string} (GFA file)">
88 <filter>aln_mode['aln_mode_select'] == 'intraspecies'</filter>
89 </data>
90 </outputs>
91 <tests>
92 <!-- test interspecies mode -->
93 <test expect_num_outputs="1">
94 <conditional name="aln_mode">
95 <param name="aln_mode_select" value="interspecies"/>
96 <param name="in_tree" value="test_tree.nhx"/>
97 </conditional>
98 <repeat name="in_seqs">
99 <param name="label" value="simCow_chr6"/>
100 <param name="fasta" value="simCow_chr6.fasta"/>
101 </repeat>
102 <repeat name="in_seqs">
103 <param name="label" value="simDog_chr6"/>
104 <param name="fasta" value="simDog_chr6.fasta"/>
105 </repeat>
106 <repeat name="in_seqs">
107 <param name="label" value="simHuman_chr6"/>
108 <param name="fasta" value="simHuman_chr6.fasta"/>
109 </repeat>
110 <repeat name="in_seqs">
111 <param name="label" value="simMouse_chr6"/>
112 <param name="fasta" value="simMouse_chr6.fasta"/>
113 </repeat>
114 <repeat name="in_seqs">
115 <param name="label" value="simRat_chr6"/>
116 <param name="fasta" value="simRat_chr6.fasta"/>
117 </repeat>
118 <output name="out_hal">
119 <assert_contents>
120 <has_size value="4472551" delta="200000"/>
121 </assert_contents>
122 </output>
123 </test>
124 <!-- within-species mode -->
125 <test expect_num_outputs="2">
126 <conditional name="aln_mode">
127 <param name="aln_mode_select" value="intraspecies"/>
128 <param name="ref_level" value="simCow_chr6"/>
129 </conditional>
130 <repeat name="in_seqs">
131 <param name="label" value="simCow_chr6"/>
132 <param name="fasta" value="simCow_chr6.fasta"/>
133 </repeat>
134 <repeat name="in_seqs">
135 <param name="label" value="simDog_chr6"/>
136 <param name="fasta" value="simDog_chr6.fasta"/>
137 </repeat>
138 <repeat name="in_seqs">
139 <param name="label" value="simHuman_chr6"/>
140 <param name="fasta" value="simHuman_chr6.fasta"/>
141 </repeat>
142 <repeat name="in_seqs">
143 <param name="label" value="simMouse_chr6"/>
144 <param name="fasta" value="simMouse_chr6.fasta"/>
145 </repeat>
146 <repeat name="in_seqs">
147 <param name="label" value="simRat_chr6"/>
148 <param name="fasta" value="simRat_chr6.fasta"/>
149 </repeat>
150 <output name="out_hal">
151 <assert_contents>
152 <has_size value="565214" delta="65214"/>
153 </assert_contents>
154 </output>
155 <output name="out_gfa">
156 <assert_contents>
157 <has_size value="173000" delta="200000"/>
158 </assert_contents>
159 </output>
160 </test>
161 <!-- FASTA header with spaces (used to fail) -->
162 <test expect_num_outputs="2">
163 <conditional name="aln_mode">
164 <param name="aln_mode_select" value="intraspecies"/>
165 <param name="ref_level" value="badheader1"/>
166 </conditional>
167 <repeat name="in_seqs">
168 <param name="label" value="badheader1"/>
169 <param name="fasta" value="bh1.fasta.gz"/>
170 </repeat>
171 <repeat name="in_seqs">
172 <param name="label" value="badheader2"/>
173 <param name="fasta" value="bh2.fasta.gz"/>
174 </repeat>
175 <output name="out_hal">
176 <assert_contents>
177 <has_size value="3382274" delta="200000"/>
178 </assert_contents>
179 </output>
180 <output name="out_gfa">
181 <assert_contents>
182 <has_size value="764748" delta="200000"/>
183 </assert_contents>
184 </output>
185 </test>
186 </tests>
187 <help><![CDATA[
188
189 .. class:: infomark
190
191 **What it does**
192
193 `Cactus <https://github.com/ComparativeGenomicsToolkit/cactus>`__ is a
194 reference-free whole-genome multiple alignment program. It can be used
195 to progressively align a large number of genomes.
196
197 -----
198
199 .. class:: infomark
200
201 **Usage**
202
203 **Between-species mode (Progressive Cactus)**
204
205 If you are aligning genomes from **multiple species**, you need to
206 provide a guide tree in Newick format. Cactus uses the guide tree to
207 progressively align genomes, meaning that it doesn’t need to align all
208 possible pairs of genomes.
209
210 A Newick-formatted tree for human, chimp and gorilla genomes looks like
211 this:
212
213 ::
214
215 (((human:0.006,chimp:0.006667):0.0022,gorilla:0.008825):0.0096,orang:0.01831);
216
217 The numbers are the branch lengths.
218
219 **Within-species mode (Minigraph-Cactus)**
220
221 You can also run Cactus in `pangenome
222 mode <https://github.com/ComparativeGenomicsToolkit/cactus/blob/master/doc/pangenome.md>`__
223 to align genomes of multiple individuals from the **same species**. In
224 this mode you will not use a guide tree. Cactus will use
225 `minigraph <https://github.com/lh3/minigraph>`__ to generate a graph of
226 the input genomes and then use the graph to order the alignments. To use
227 pangenome mode, select ‘Within-species’ in the ‘Alignment mode’
228 dropdown.
229
230 Unlike Between-species mode, Within-species mode depends on a predetermined reference genome.
231
232 -----
233
234 .. class:: infomark
235
236 **Input**
237
238 The developers recommend soft-masking your genomes with RepeatMasker
239 before running Cactus. RepeatMasker is available on Galaxy.
240
241 If you’re using Between-species mode, you need to provide labels for the
242 fasta files that match the leaves on the guide tree. In the example
243 above, you would use the label ‘human’ for the human fasta file.
244
245 -----
246
247 .. class:: infomark
248
249 **Output**
250
251 The main output of Cactus is in `HAL
252 format <https://github.com/ComparativeGenomicsToolkit/cactus#using-the-output>`__.
253 You can use the `Cactus: export <root?tool_id=cactus_export>`__ tool to
254 convert the Cactus output to a VG or Multiple Alignment Format (MAF)
255 file.
256
257
258 ]]></help>
259 <expand macro="citations"/>
260 </tool>