Mercurial > repos > galaxy-australia > cactus_cactus
comparison cactus_cactus.xml @ 4:3c8227556fdc draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 2b4c7c63b0e4a1f730794a4a5825bce29ee2eb25
author | galaxy-australia |
---|---|
date | Wed, 09 Nov 2022 03:14:17 +0000 |
parents | 9422c5a87ee2 |
children | 48c13389050d |
comparison
equal
deleted
inserted
replaced
3:9422c5a87ee2 | 4:3c8227556fdc |
---|---|
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="xrefs"/> | 6 <expand macro="xrefs"/> |
7 <expand macro="requirements"/> | 7 <expand macro="requirements"/> |
8 <command detect_errors="exit_code"><![CDATA[ | 8 <command detect_errors="exit_code"><![CDATA[ |
9 export TMPDIR=\${_GALAXY_JOB_TMP_DIR} && | |
10 | |
9 ## Set up seqfile | 11 ## Set up seqfile |
10 | 12 |
11 #if $aln_mode.aln_mode_select == 'interspecies': | 13 #if $aln_mode.aln_mode_select == 'interspecies': |
12 cat $aln_mode.in_tree >> seqfile.txt && | 14 cat $aln_mode.in_tree >> seqfile.txt && |
13 #end if | 15 #end if |
14 #set seq_line = '' | |
15 #for $seq in $in_seqs: | 16 #for $seq in $in_seqs: |
16 #set seq_fn = str($seq.label) + '.' + $seq.fasta.ext | 17 #set seq_fn = str($seq.label) + '.' + $seq.fasta.ext |
17 ln -s '$seq.fasta' '$seq_fn' && | 18 ln -s '$seq.fasta' '$seq_fn' && |
18 printf '%s %s\n' '$seq.label' '$seq_fn' >> seqfile.txt | 19 printf '%s %s\n' '$seq.label' '$seq_fn' >> seqfile.txt |
19 #set seq_line += $seq_fn + ' ' | |
20 && | 20 && |
21 #end for | 21 #end for |
22 | 22 |
23 ## Run cactus | 23 ## Run cactus |
24 | 24 |
25 #if $aln_mode.aln_mode_select == 'intraspecies': | 25 #if $aln_mode.aln_mode_select == 'intraspecies': |
26 ## If we're doing a pangenome, we need to run the steps manually | 26 ## If we're doing a pangenome, we need to run the steps manually |
27 minigraph -xggs | 27 cactus-minigraph |
28 -t \${GALAXY_SLOTS:-4} | 28 --binariesMode local |
29 $seq_line | 29 --mapCores \${GALAXY_SLOTS:-4} |
30 > pangenome.gfa | 30 --maxMemory \${GALAXY_MEMORY_MB:-8192}M |
31 --reference $aln_mode.ref_level | |
32 --workDir ./ | |
33 ./jobStore | |
34 ./seqfile.txt | |
35 pangenome.gfa | |
31 && | 36 && |
32 cactus-graphmap | 37 cactus-graphmap |
33 --maxCores \${GALAXY_SLOTS:-4} | 38 --binariesMode local |
34 --maxMemory \${GALAXY_MEMORY_MB:-8192}M | 39 --maxCores \${GALAXY_SLOTS:-4} |
40 --maxMemory \${GALAXY_MEMORY_MB:-8192}M | |
41 --outputFasta pangenome.gfa.fa | |
42 --reference $aln_mode.ref_level | |
43 --workDir ./ | |
35 ./jobStore | 44 ./jobStore |
36 ./seqfile.txt | 45 ./seqfile.txt |
37 pangenome.gfa | 46 pangenome.gfa |
38 pangenome.paf | 47 pangenome.paf |
39 --outputFasta pangenome.gfa.fa | |
40 --binariesMode local | |
41 --workDir ./ | |
42 && | 48 && |
43 cactus-align | 49 cactus-align |
44 --maxCores \${GALAXY_SLOTS:-4} | 50 --binariesMode local |
45 --maxMemory \${GALAXY_MEMORY_MB:-8192}M | 51 --maxCores \${GALAXY_SLOTS:-4} |
52 --maxMemory \${GALAXY_MEMORY_MB:-8192}M | |
53 --outVG | |
54 --pangenome | |
55 --reference $aln_mode.ref_level | |
56 --workDir ./ | |
46 ./jobStore | 57 ./jobStore |
47 ./seqfile.txt | 58 ./seqfile.txt |
48 pangenome.paf | 59 pangenome.paf |
49 alignment.hal | 60 alignment.hal |
50 --pangenome | 61 && |
51 --binariesMode local | 62 cactus-graphmap-join |
52 --workDir ./ | 63 --binariesMode local |
64 --gfaffix | |
65 --giraffe | |
66 --maxCores \${GALAXY_SLOTS:-4} | |
67 --maxMemory \${GALAXY_MEMORY_MB:-8192}M | |
68 --outDir ./ | |
69 --outName alignment | |
70 --reference $aln_mode.ref_level | |
71 --vg alignment.vg | |
72 --wlineSep "." | |
73 ./jobStore | |
53 #else if $aln_mode.aln_mode_select == 'interspecies': | 74 #else if $aln_mode.aln_mode_select == 'interspecies': |
54 ## Run cactus normally | 75 ## Run cactus normally |
55 cactus | 76 cactus |
56 --maxCores \${GALAXY_SLOTS:-4} | 77 --binariesMode local |
57 --maxMemory \${GALAXY_MEMORY_MB:-8192}M | 78 --maxCores \${GALAXY_SLOTS:-4} |
79 --maxMemory \${GALAXY_MEMORY_MB:-8192}M | |
80 --workDir ./ | |
58 jobStore seqfile.txt alignment.hal | 81 jobStore seqfile.txt alignment.hal |
59 --binariesMode local | |
60 --workDir ./ | |
61 #end if | 82 #end if |
62 | 83 |
63 ]]></command> | 84 ]]></command> |
64 <inputs> | 85 <inputs> |
65 <conditional name="aln_mode"> | 86 <conditional name="aln_mode"> |
69 </param> | 90 </param> |
70 <when value="interspecies"> | 91 <when value="interspecies"> |
71 <param name="in_tree" type="data" format="nhx" label="Guide tree" help="Phylogenetic tree in Newick format. Required by Cactus to achieve linear scaling with number of input genomes" /> | 92 <param name="in_tree" type="data" format="nhx" label="Guide tree" help="Phylogenetic tree in Newick format. Required by Cactus to achieve linear scaling with number of input genomes" /> |
72 </when> | 93 </when> |
73 <when value="intraspecies"> | 94 <when value="intraspecies"> |
95 <param name="ref_level" type="text" value="" label="Reference genome" help="Pangenomes from Minigraph-Cactus depend on a predetermined reference genome. Specify one of the Input Genomes as the reference genome. This must match the label used in 'Genome Label'." /> | |
74 </when> | 96 </when> |
75 </conditional> | 97 </conditional> |
76 <repeat name="in_seqs" title="Input genome"> | 98 <repeat name="in_seqs" title="Input genome"> |
77 <param name="label" type="text" value="" label="Genome Label" help="NO SPACES. Must match a label in the guide tree."> | 99 <param name="label" type="text" value="" label="Genome Label" help="NO SPACES. Must match a label in the guide tree."> |
78 </param> | 100 </param> |
81 <!-- add an option for root --> | 103 <!-- add an option for root --> |
82 <!-- root mr --> | 104 <!-- root mr --> |
83 </inputs> | 105 </inputs> |
84 <outputs> | 106 <outputs> |
85 <data name="out_hal" format="h5" from_work_dir="alignment.hal" label="${tool.name} on ${on_string} (HAL file)" /> | 107 <data name="out_hal" format="h5" from_work_dir="alignment.hal" label="${tool.name} on ${on_string} (HAL file)" /> |
108 <data name="out_gfa" format="gfa2.gz" from_work_dir="alignment.gfa.gz" label="${tool.name} on ${on_string} (GFA file)" > | |
109 <filter>aln_mode[aln_mode_select] == 'intraspecies'</filter> | |
110 </data> | |
86 </outputs> | 111 </outputs> |
87 <tests> | 112 <tests> |
88 <!-- test interspecies mode --> | 113 <!-- test interspecies mode --> |
89 <test expect_num_outputs="1"> | 114 <test expect_num_outputs="2"> |
90 <conditional name="aln_mode"> | 115 <conditional name="aln_mode"> |
91 <param name="aln_mode_select" value="interspecies"/> | 116 <param name="aln_mode_select" value="interspecies"/> |
92 <param name="in_tree" value="test_tree.nhx"/> | 117 <param name="in_tree" value="test_tree.nhx"/> |
93 </conditional> | 118 </conditional> |
94 <repeat name="in_seqs"> | 119 <repeat name="in_seqs"> |
116 <has_size value="4783905" delta="200000" /> | 141 <has_size value="4783905" delta="200000" /> |
117 </assert_contents> | 142 </assert_contents> |
118 </output> | 143 </output> |
119 </test> | 144 </test> |
120 <!-- within-species mode --> | 145 <!-- within-species mode --> |
121 <test expect_num_outputs="1"> | 146 <test expect_num_outputs="2"> |
122 <conditional name="aln_mode"> | 147 <conditional name="aln_mode"> |
123 <param name="aln_mode_select" value="intraspecies"/> | 148 <param name="aln_mode_select" value="intraspecies"/> |
149 <param name="ref_level" value="simCow_chr6"/> | |
124 </conditional> | 150 </conditional> |
125 <repeat name="in_seqs"> | 151 <repeat name="in_seqs"> |
126 <param name="label" value="simCow_chr6"/> | 152 <param name="label" value="simCow_chr6"/> |
127 <param name="fasta" value="simCow_chr6.fasta"/> | 153 <param name="fasta" value="simCow_chr6.fasta"/> |
128 </repeat> | 154 </repeat> |
142 <param name="label" value="simRat_chr6"/> | 168 <param name="label" value="simRat_chr6"/> |
143 <param name="fasta" value="simRat_chr6.fasta"/> | 169 <param name="fasta" value="simRat_chr6.fasta"/> |
144 </repeat> | 170 </repeat> |
145 <output name="out_hal"> | 171 <output name="out_hal"> |
146 <assert_contents> | 172 <assert_contents> |
147 <has_size value="1349620" delta="200000" /> | 173 <has_size value="2088959" delta="200000" /> |
174 </assert_contents> | |
175 </output> | |
176 <output name="out_gfa"> | |
177 <assert_contents> | |
178 <has_size value="173000" delta="200000" /> | |
148 </assert_contents> | 179 </assert_contents> |
149 </output> | 180 </output> |
150 </test> | 181 </test> |
151 <!-- compressed input --> | 182 <!-- compressed input --> |
152 <test expect_num_outputs="1"> | 183 <test expect_num_outputs="2"> |
153 <conditional name="aln_mode"> | 184 <conditional name="aln_mode"> |
154 <param name="aln_mode_select" value="intraspecies"/> | 185 <param name="aln_mode_select" value="intraspecies"/> |
186 <param name="ref_level" value="germ_25"/> | |
155 </conditional> | 187 </conditional> |
156 <repeat name="in_seqs"> | 188 <repeat name="in_seqs"> |
157 <param name="label" value="germ_25"/> | 189 <param name="label" value="germ_25"/> |
158 <param name="fasta" value="germ_25.fasta.gz"/> | 190 <param name="fasta" value="germ_25.fasta.gz"/> |
159 </repeat> | 191 </repeat> |
168 <output name="out_hal"> | 200 <output name="out_hal"> |
169 <assert_contents> | 201 <assert_contents> |
170 <has_size value="7420424" delta="200000" /> | 202 <has_size value="7420424" delta="200000" /> |
171 </assert_contents> | 203 </assert_contents> |
172 </output> | 204 </output> |
205 <output name="out_gfa"> | |
206 <assert_contents> | |
207 <has_size value="6710429" delta="200000" /> | |
208 </assert_contents> | |
209 </output> | |
173 </test> | 210 </test> |
174 <!-- FASTA header with spaces (used to fail) --> | 211 <!-- FASTA header with spaces (used to fail) --> |
175 <test expect_num_outputs="1"> | 212 <test expect_num_outputs="2"> |
176 <conditional name="aln_mode"> | 213 <conditional name="aln_mode"> |
177 <param name="aln_mode_select" value="intraspecies"/> | 214 <param name="aln_mode_select" value="intraspecies"/> |
215 <param name="ref_level" value="badheader1"/> | |
178 </conditional> | 216 </conditional> |
179 <repeat name="in_seqs"> | 217 <repeat name="in_seqs"> |
180 <param name="label" value="badheader1"/> | 218 <param name="label" value="badheader1"/> |
181 <param name="fasta" value="bh1.fasta.gz"/> | 219 <param name="fasta" value="bh1.fasta.gz"/> |
182 </repeat> | 220 </repeat> |
185 <param name="fasta" value="bh2.fasta.gz"/> | 223 <param name="fasta" value="bh2.fasta.gz"/> |
186 </repeat> | 224 </repeat> |
187 <output name="out_hal"> | 225 <output name="out_hal"> |
188 <assert_contents> | 226 <assert_contents> |
189 <has_size value="3382274" delta="200000" /> | 227 <has_size value="3382274" delta="200000" /> |
228 </assert_contents> | |
229 </output> | |
230 <output name="out_gfa"> | |
231 <assert_contents> | |
232 <has_size value="764748" delta="200000" /> | |
190 </assert_contents> | 233 </assert_contents> |
191 </output> | 234 </output> |
192 </test> | 235 </test> |
193 </tests> | 236 </tests> |
194 <help><![CDATA[ | 237 <help><![CDATA[ |
198 reference-free whole-genome multiple alignment program. It can be used | 241 reference-free whole-genome multiple alignment program. It can be used |
199 to progressively align a large number of genomes. | 242 to progressively align a large number of genomes. |
200 | 243 |
201 **Usage** | 244 **Usage** |
202 | 245 |
203 **Between-species mode** | 246 **Between-species mode (Progressive Cactus)** |
204 | 247 |
205 If you are aligning genomes from **multiple species**, you need to | 248 If you are aligning genomes from **multiple species**, you need to |
206 provide a guide tree in Newick format. Cactus uses the guide tree to | 249 provide a guide tree in Newick format. Cactus uses the guide tree to |
207 progressively align genomes, meaning that it doesn’t need to align all | 250 progressively align genomes, meaning that it doesn’t need to align all |
208 possible pairs of genomes. | 251 possible pairs of genomes. |
214 | 257 |
215 (((human:0.006,chimp:0.006667):0.0022,gorilla:0.008825):0.0096,orang:0.01831); | 258 (((human:0.006,chimp:0.006667):0.0022,gorilla:0.008825):0.0096,orang:0.01831); |
216 | 259 |
217 The numbers are the branch lengths. | 260 The numbers are the branch lengths. |
218 | 261 |
219 **Beta: Within-species mode** | 262 **Within-species mode (Minigraph-Cactus)** |
220 | 263 |
221 You can also run Cactus in `pangenome | 264 You can also run Cactus in `pangenome |
222 mode <https://github.com/ComparativeGenomicsToolkit/cactus/blob/master/doc/pangenome.md>`__ | 265 mode <https://github.com/ComparativeGenomicsToolkit/cactus/blob/master/doc/pangenome.md>`__ |
223 to align genomes of multiple individuals from the **same species**. In | 266 to align genomes of multiple individuals from the **same species**. In |
224 this mode you will not use a guide tree. Cactus will use | 267 this mode you will not use a guide tree. Cactus will use |
225 `minigraph <https://github.com/lh3/minigraph>`__ to generate a graph of | 268 `minigraph <https://github.com/lh3/minigraph>`__ to generate a graph of |
226 the input genomes and then use the graph to order the alignments. To use | 269 the input genomes and then use the graph to order the alignments. To use |
227 pangenome mode, select ‘Within-species’ in the ‘Alignment mode’ | 270 pangenome mode, select ‘Within-species’ in the ‘Alignment mode’ |
228 dropdown. | 271 dropdown. |
229 | 272 |
273 Unlike Between-species mode, Within-species mode depends on a predetermined reference genome | |
274 | |
230 **Input** | 275 **Input** |
231 | 276 |
232 The developers recommend soft-masking your genomes with RepeatMasker | 277 The developers recommend soft-masking your genomes with RepeatMasker |
233 before running Cactus. RepeatMasker is available on Galaxy. | 278 before running Cactus. RepeatMasker is available on Galaxy. |
234 | 279 |