Mercurial > repos > galaxy-australia > cactus_cactus
comparison cactus_cactus.xml @ 0:85f68b344286 draft
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8f8363625623f2ff3f04d12d227673ac134eba24"
author | galaxy-australia |
---|---|
date | Mon, 04 Apr 2022 06:27:44 +0000 |
parents | |
children | 1bc1199f0ff4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:85f68b344286 |
---|---|
1 <tool id="cactus_cactus" name="Cactus" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> | |
2 <description>whole-genome multiple sequence alignment.</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="xrefs"/> | |
7 <expand macro="requirements"/> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 ## Check the FASTA headers | |
10 ## This is only necessary in pangenome mode | |
11 #if $aln_mode.aln_mode_select == 'intraspecies': | |
12 #for $seq in $in_seqs: | |
13 if | |
14 #if $seq.fasta.is_of_type('fasta.gz'): | |
15 zgrep | |
16 #else | |
17 grep | |
18 #end if | |
19 "^>" $seq.fasta | grep -q "[[:space:]]" ; then | |
20 echo "Error parsing input FASTA." ; | |
21 echo "Pangenome mode fails if there are spaces in the header." ; | |
22 echo "Please remove them with the NormalizeFasta tool." ; | |
23 exit 1 | |
24 ; fi && | |
25 #end for | |
26 #end if | |
27 | |
28 ## Set up seqfile | |
29 | |
30 #if $aln_mode.aln_mode_select == 'interspecies': | |
31 cat $aln_mode.in_tree >> seqfile.txt && | |
32 #end if | |
33 #set seq_line = '' | |
34 #for $seq in $in_seqs: | |
35 #set seq_fn = str($seq.label) + '.' + $seq.fasta.ext | |
36 ln -s '$seq.fasta' '$seq_fn' && | |
37 printf '%s %s\n' '$seq.label' '$seq_fn' >> seqfile.txt | |
38 #set seq_line += $seq_fn + ' ' | |
39 && | |
40 #end for | |
41 | |
42 ## Run cactus | |
43 | |
44 #if $aln_mode.aln_mode_select == 'intraspecies': | |
45 ## If we're doing a pangenome, we need to run the steps manually | |
46 minigraph -xggs | |
47 -t \${GALAXY_SLOTS:-4} | |
48 $seq_line | |
49 > pangenome.gfa | |
50 && | |
51 cactus-graphmap | |
52 --maxCores \${GALAXY_SLOTS:-4} | |
53 --maxMemory \${GALAXY_MEMORY_MB:-8192}M | |
54 ./jobStore | |
55 ./seqfile.txt | |
56 pangenome.gfa | |
57 pangenome.paf | |
58 --outputFasta pangenome.gfa.fa | |
59 --binariesMode local | |
60 --workDir ./ | |
61 && | |
62 cactus-align | |
63 --maxCores \${GALAXY_SLOTS:-4} | |
64 --maxMemory \${GALAXY_MEMORY_MB:-8192}M | |
65 ./jobStore | |
66 ./seqfile.txt | |
67 pangenome.paf | |
68 alignment.hal | |
69 --pangenome | |
70 --pafInput | |
71 --binariesMode local | |
72 --workDir ./ | |
73 #else if $aln_mode.aln_mode_select == 'interspecies': | |
74 ## Run cactus normally | |
75 cactus | |
76 --maxCores \${GALAXY_SLOTS:-4} | |
77 --maxMemory \${GALAXY_MEMORY_MB:-8192}M | |
78 jobStore seqfile.txt alignment.hal | |
79 --binariesMode local | |
80 --workDir ./ | |
81 #end if | |
82 | |
83 ]]></command> | |
84 <inputs> | |
85 <conditional name="aln_mode"> | |
86 <param name="aln_mode_select" type="select" label="Alignment mode" help="The taxonomic relationship between input genomes. If genomes are from multiple individuals of the same species, select 'Within-species'"> | |
87 <option value="interspecies" selected="true">Between-species</option> | |
88 <option value="intraspecies">Within-species</option> | |
89 </param> | |
90 <when value="interspecies"> | |
91 <param name="in_tree" type="data" format="nhx" label="Guide tree" help="Phylogenetic tree in Newick format. Required by Cactus to achieve linear scaling with number of input genomes" /> | |
92 </when> | |
93 <when value="intraspecies"> | |
94 </when> | |
95 </conditional> | |
96 <repeat name="in_seqs" title="Input genome"> | |
97 <param name="label" type="text" value="" label="Genome Label" help="NO SPACES. Must match a label in the guide tree."> | |
98 </param> | |
99 <param name="fasta" type="data" format="fasta,fasta.gz" label="Genome Sequence" help="Input genome"/> | |
100 </repeat> | |
101 <!-- add an option for root --> | |
102 <!-- root mr --> | |
103 </inputs> | |
104 <outputs> | |
105 <data name="out_hal" format="h5" from_work_dir="alignment.hal" label="${tool.name} on ${on_string} (HAL file)" /> | |
106 </outputs> | |
107 <tests> | |
108 <!-- test interspecies mode --> | |
109 <test expect_num_outputs="1"> | |
110 <conditional name="aln_mode"> | |
111 <param name="aln_mode_select" value="interspecies"/> | |
112 <param name="in_tree" value="test_tree.nhx"/> | |
113 </conditional> | |
114 <repeat name="in_seqs"> | |
115 <param name="label" value="simCow_chr6"/> | |
116 <param name="fasta" value="simCow_chr6.fasta"/> | |
117 </repeat> | |
118 <repeat name="in_seqs"> | |
119 <param name="label" value="simDog_chr6"/> | |
120 <param name="fasta" value="simDog_chr6.fasta"/> | |
121 </repeat> | |
122 <repeat name="in_seqs"> | |
123 <param name="label" value="simHuman_chr6"/> | |
124 <param name="fasta" value="simHuman_chr6.fasta"/> | |
125 </repeat> | |
126 <repeat name="in_seqs"> | |
127 <param name="label" value="simMouse_chr6"/> | |
128 <param name="fasta" value="simMouse_chr6.fasta"/> | |
129 </repeat> | |
130 <repeat name="in_seqs"> | |
131 <param name="label" value="simRat_chr6"/> | |
132 <param name="fasta" value="simRat_chr6.fasta"/> | |
133 </repeat> | |
134 <output name="out_hal"> | |
135 <assert_contents> | |
136 <has_size value="5272354" delta="200000" /> | |
137 </assert_contents> | |
138 </output> | |
139 </test> | |
140 <!-- within-species mode --> | |
141 <test expect_num_outputs="1"> | |
142 <conditional name="aln_mode"> | |
143 <param name="aln_mode_select" value="intraspecies"/> | |
144 </conditional> | |
145 <repeat name="in_seqs"> | |
146 <param name="label" value="simCow_chr6"/> | |
147 <param name="fasta" value="simCow_chr6.fasta"/> | |
148 </repeat> | |
149 <repeat name="in_seqs"> | |
150 <param name="label" value="simDog_chr6"/> | |
151 <param name="fasta" value="simDog_chr6.fasta"/> | |
152 </repeat> | |
153 <repeat name="in_seqs"> | |
154 <param name="label" value="simHuman_chr6"/> | |
155 <param name="fasta" value="simHuman_chr6.fasta"/> | |
156 </repeat> | |
157 <repeat name="in_seqs"> | |
158 <param name="label" value="simMouse_chr6"/> | |
159 <param name="fasta" value="simMouse_chr6.fasta"/> | |
160 </repeat> | |
161 <repeat name="in_seqs"> | |
162 <param name="label" value="simRat_chr6"/> | |
163 <param name="fasta" value="simRat_chr6.fasta"/> | |
164 </repeat> | |
165 <output name="out_hal"> | |
166 <assert_contents> | |
167 <has_size value="2119332" delta="200000" /> | |
168 </assert_contents> | |
169 </output> | |
170 </test> | |
171 <!-- compressed input --> | |
172 <test expect_num_outputs="1"> | |
173 <conditional name="aln_mode"> | |
174 <param name="aln_mode_select" value="intraspecies"/> | |
175 </conditional> | |
176 <repeat name="in_seqs"> | |
177 <param name="label" value="germ_25"/> | |
178 <param name="fasta" value="germ_25.fasta.gz"/> | |
179 </repeat> | |
180 <repeat name="in_seqs"> | |
181 <param name="label" value="vulg_25"/> | |
182 <param name="fasta" value="vulg_25.fasta.gz"/> | |
183 </repeat> | |
184 <repeat name="in_seqs"> | |
185 <param name="label" value="pens_25"/> | |
186 <param name="fasta" value="pens_25.fasta.gz"/> | |
187 </repeat> | |
188 <output name="out_hal"> | |
189 <assert_contents> | |
190 <has_size value="7204260" delta="200000" /> | |
191 </assert_contents> | |
192 </output> | |
193 </test> | |
194 <!-- FASTA header --> | |
195 <test expect_exit_code="1" expect_failure="true"> | |
196 <conditional name="aln_mode"> | |
197 <param name="aln_mode_select" value="intraspecies"/> | |
198 </conditional> | |
199 <repeat name="in_seqs"> | |
200 <param name="label" value="badheader1"/> | |
201 <param name="fasta" value="bh1.fasta.gz"/> | |
202 </repeat> | |
203 <repeat name="in_seqs"> | |
204 <param name="label" value="badheader2"/> | |
205 <param name="fasta" value="bh2.fasta.gz"/> | |
206 </repeat> | |
207 </test> | |
208 </tests> | |
209 <help><![CDATA[ | |
210 **What it does** | |
211 | |
212 `Cactus <https://github.com/ComparativeGenomicsToolkit/cactus>`__ is a | |
213 reference-free whole-genome multiple alignment program. It can be used | |
214 to progressively align a large number of genomes. | |
215 | |
216 **Usage** | |
217 | |
218 **Between-species mode** | |
219 | |
220 If you are aligning genomes from **multiple species**, you need to | |
221 provide a guide tree in Newick format. Cactus uses the guide tree to | |
222 progressively align genomes, meaning that it doesn’t need to align all | |
223 possible pairs of genomes. | |
224 | |
225 A Newick-formatted tree for human, chimp and gorilla genomes looks like | |
226 this: | |
227 | |
228 :: | |
229 | |
230 (((human:0.006,chimp:0.006667):0.0022,gorilla:0.008825):0.0096,orang:0.01831); | |
231 | |
232 The numbers are the branch lengths. | |
233 | |
234 **Beta: Within-species mode** | |
235 | |
236 You can also run Cactus in `pangenome | |
237 mode <https://github.com/ComparativeGenomicsToolkit/cactus/blob/master/doc/pangenome.md>`__ | |
238 to align genomes of multiple individuals from the **same species**. In | |
239 this mode you will not use a guide tree. Cactus will use | |
240 `minigraph <https://github.com/lh3/minigraph>`__ to generate a graph of | |
241 the input genomes and then use the graph to order the alignments. To use | |
242 pangenome mode, select ‘Within-species’ in the ‘Alignment mode’ | |
243 dropdown. | |
244 | |
245 ⚠️ To use pangenome mode, you will have to remove spaces from the headers in your FASTA file. | |
246 You can do this with the NormalizeFasta tool. | |
247 | |
248 **Input** | |
249 | |
250 The developers recommend soft-masking your genomes with RepeatMasker | |
251 before running Cactus. RepeatMasker is available on Galaxy. | |
252 | |
253 If you’re using Between-species mode, you need to provide labels for the | |
254 fasta files that match the leaves on the guide tree. In the example | |
255 above, you would use the label ‘human’ for the human fasta file. | |
256 | |
257 **Output** | |
258 | |
259 The main output of Cactus is in `HAL | |
260 format <https://github.com/ComparativeGenomicsToolkit/cactus#using-the-output>`__. | |
261 You can use the `Cactus: export <root?tool_id=cactus_export>`__ tool to | |
262 convert the Cactus output to a VG or Multiple Alignment Format (MAF) | |
263 file. | |
264 | |
265 | |
266 ]]></help> | |
267 <expand macro="citations"/> | |
268 </tool> |