comparison cactus_cactus.xml @ 4:3c8227556fdc draft

planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 2b4c7c63b0e4a1f730794a4a5825bce29ee2eb25
author galaxy-australia
date Wed, 09 Nov 2022 03:14:17 +0000
parents 9422c5a87ee2
children 48c13389050d
comparison
equal deleted inserted replaced
3:9422c5a87ee2 4:3c8227556fdc
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="xrefs"/> 6 <expand macro="xrefs"/>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[ 8 <command detect_errors="exit_code"><![CDATA[
9 export TMPDIR=\${_GALAXY_JOB_TMP_DIR} &&
10
9 ## Set up seqfile 11 ## Set up seqfile
10 12
11 #if $aln_mode.aln_mode_select == 'interspecies': 13 #if $aln_mode.aln_mode_select == 'interspecies':
12 cat $aln_mode.in_tree >> seqfile.txt && 14 cat $aln_mode.in_tree >> seqfile.txt &&
13 #end if 15 #end if
14 #set seq_line = ''
15 #for $seq in $in_seqs: 16 #for $seq in $in_seqs:
16 #set seq_fn = str($seq.label) + '.' + $seq.fasta.ext 17 #set seq_fn = str($seq.label) + '.' + $seq.fasta.ext
17 ln -s '$seq.fasta' '$seq_fn' && 18 ln -s '$seq.fasta' '$seq_fn' &&
18 printf '%s %s\n' '$seq.label' '$seq_fn' >> seqfile.txt 19 printf '%s %s\n' '$seq.label' '$seq_fn' >> seqfile.txt
19 #set seq_line += $seq_fn + ' '
20 && 20 &&
21 #end for 21 #end for
22 22
23 ## Run cactus 23 ## Run cactus
24 24
25 #if $aln_mode.aln_mode_select == 'intraspecies': 25 #if $aln_mode.aln_mode_select == 'intraspecies':
26 ## If we're doing a pangenome, we need to run the steps manually 26 ## If we're doing a pangenome, we need to run the steps manually
27 minigraph -xggs 27 cactus-minigraph
28 -t \${GALAXY_SLOTS:-4} 28 --binariesMode local
29 $seq_line 29 --mapCores \${GALAXY_SLOTS:-4}
30 > pangenome.gfa 30 --maxMemory \${GALAXY_MEMORY_MB:-8192}M
31 --reference $aln_mode.ref_level
32 --workDir ./
33 ./jobStore
34 ./seqfile.txt
35 pangenome.gfa
31 && 36 &&
32 cactus-graphmap 37 cactus-graphmap
33 --maxCores \${GALAXY_SLOTS:-4} 38 --binariesMode local
34 --maxMemory \${GALAXY_MEMORY_MB:-8192}M 39 --maxCores \${GALAXY_SLOTS:-4}
40 --maxMemory \${GALAXY_MEMORY_MB:-8192}M
41 --outputFasta pangenome.gfa.fa
42 --reference $aln_mode.ref_level
43 --workDir ./
35 ./jobStore 44 ./jobStore
36 ./seqfile.txt 45 ./seqfile.txt
37 pangenome.gfa 46 pangenome.gfa
38 pangenome.paf 47 pangenome.paf
39 --outputFasta pangenome.gfa.fa
40 --binariesMode local
41 --workDir ./
42 && 48 &&
43 cactus-align 49 cactus-align
44 --maxCores \${GALAXY_SLOTS:-4} 50 --binariesMode local
45 --maxMemory \${GALAXY_MEMORY_MB:-8192}M 51 --maxCores \${GALAXY_SLOTS:-4}
52 --maxMemory \${GALAXY_MEMORY_MB:-8192}M
53 --outVG
54 --pangenome
55 --reference $aln_mode.ref_level
56 --workDir ./
46 ./jobStore 57 ./jobStore
47 ./seqfile.txt 58 ./seqfile.txt
48 pangenome.paf 59 pangenome.paf
49 alignment.hal 60 alignment.hal
50 --pangenome 61 &&
51 --binariesMode local 62 cactus-graphmap-join
52 --workDir ./ 63 --binariesMode local
64 --gfaffix
65 --giraffe
66 --maxCores \${GALAXY_SLOTS:-4}
67 --maxMemory \${GALAXY_MEMORY_MB:-8192}M
68 --outDir ./
69 --outName alignment
70 --reference $aln_mode.ref_level
71 --vg alignment.vg
72 --wlineSep "."
73 ./jobStore
53 #else if $aln_mode.aln_mode_select == 'interspecies': 74 #else if $aln_mode.aln_mode_select == 'interspecies':
54 ## Run cactus normally 75 ## Run cactus normally
55 cactus 76 cactus
56 --maxCores \${GALAXY_SLOTS:-4} 77 --binariesMode local
57 --maxMemory \${GALAXY_MEMORY_MB:-8192}M 78 --maxCores \${GALAXY_SLOTS:-4}
79 --maxMemory \${GALAXY_MEMORY_MB:-8192}M
80 --workDir ./
58 jobStore seqfile.txt alignment.hal 81 jobStore seqfile.txt alignment.hal
59 --binariesMode local
60 --workDir ./
61 #end if 82 #end if
62 83
63 ]]></command> 84 ]]></command>
64 <inputs> 85 <inputs>
65 <conditional name="aln_mode"> 86 <conditional name="aln_mode">
69 </param> 90 </param>
70 <when value="interspecies"> 91 <when value="interspecies">
71 <param name="in_tree" type="data" format="nhx" label="Guide tree" help="Phylogenetic tree in Newick format. Required by Cactus to achieve linear scaling with number of input genomes" /> 92 <param name="in_tree" type="data" format="nhx" label="Guide tree" help="Phylogenetic tree in Newick format. Required by Cactus to achieve linear scaling with number of input genomes" />
72 </when> 93 </when>
73 <when value="intraspecies"> 94 <when value="intraspecies">
95 <param name="ref_level" type="text" value="" label="Reference genome" help="Pangenomes from Minigraph-Cactus depend on a predetermined reference genome. Specify one of the Input Genomes as the reference genome. This must match the label used in 'Genome Label'." />
74 </when> 96 </when>
75 </conditional> 97 </conditional>
76 <repeat name="in_seqs" title="Input genome"> 98 <repeat name="in_seqs" title="Input genome">
77 <param name="label" type="text" value="" label="Genome Label" help="NO SPACES. Must match a label in the guide tree."> 99 <param name="label" type="text" value="" label="Genome Label" help="NO SPACES. Must match a label in the guide tree.">
78 </param> 100 </param>
81 <!-- add an option for root --> 103 <!-- add an option for root -->
82 <!-- root mr --> 104 <!-- root mr -->
83 </inputs> 105 </inputs>
84 <outputs> 106 <outputs>
85 <data name="out_hal" format="h5" from_work_dir="alignment.hal" label="${tool.name} on ${on_string} (HAL file)" /> 107 <data name="out_hal" format="h5" from_work_dir="alignment.hal" label="${tool.name} on ${on_string} (HAL file)" />
108 <data name="out_gfa" format="gfa2.gz" from_work_dir="alignment.gfa.gz" label="${tool.name} on ${on_string} (GFA file)" >
109 <filter>aln_mode[aln_mode_select] == 'intraspecies'</filter>
110 </data>
86 </outputs> 111 </outputs>
87 <tests> 112 <tests>
88 <!-- test interspecies mode --> 113 <!-- test interspecies mode -->
89 <test expect_num_outputs="1"> 114 <test expect_num_outputs="2">
90 <conditional name="aln_mode"> 115 <conditional name="aln_mode">
91 <param name="aln_mode_select" value="interspecies"/> 116 <param name="aln_mode_select" value="interspecies"/>
92 <param name="in_tree" value="test_tree.nhx"/> 117 <param name="in_tree" value="test_tree.nhx"/>
93 </conditional> 118 </conditional>
94 <repeat name="in_seqs"> 119 <repeat name="in_seqs">
116 <has_size value="4783905" delta="200000" /> 141 <has_size value="4783905" delta="200000" />
117 </assert_contents> 142 </assert_contents>
118 </output> 143 </output>
119 </test> 144 </test>
120 <!-- within-species mode --> 145 <!-- within-species mode -->
121 <test expect_num_outputs="1"> 146 <test expect_num_outputs="2">
122 <conditional name="aln_mode"> 147 <conditional name="aln_mode">
123 <param name="aln_mode_select" value="intraspecies"/> 148 <param name="aln_mode_select" value="intraspecies"/>
149 <param name="ref_level" value="simCow_chr6"/>
124 </conditional> 150 </conditional>
125 <repeat name="in_seqs"> 151 <repeat name="in_seqs">
126 <param name="label" value="simCow_chr6"/> 152 <param name="label" value="simCow_chr6"/>
127 <param name="fasta" value="simCow_chr6.fasta"/> 153 <param name="fasta" value="simCow_chr6.fasta"/>
128 </repeat> 154 </repeat>
142 <param name="label" value="simRat_chr6"/> 168 <param name="label" value="simRat_chr6"/>
143 <param name="fasta" value="simRat_chr6.fasta"/> 169 <param name="fasta" value="simRat_chr6.fasta"/>
144 </repeat> 170 </repeat>
145 <output name="out_hal"> 171 <output name="out_hal">
146 <assert_contents> 172 <assert_contents>
147 <has_size value="1349620" delta="200000" /> 173 <has_size value="2088959" delta="200000" />
174 </assert_contents>
175 </output>
176 <output name="out_gfa">
177 <assert_contents>
178 <has_size value="173000" delta="200000" />
148 </assert_contents> 179 </assert_contents>
149 </output> 180 </output>
150 </test> 181 </test>
151 <!-- compressed input --> 182 <!-- compressed input -->
152 <test expect_num_outputs="1"> 183 <test expect_num_outputs="2">
153 <conditional name="aln_mode"> 184 <conditional name="aln_mode">
154 <param name="aln_mode_select" value="intraspecies"/> 185 <param name="aln_mode_select" value="intraspecies"/>
186 <param name="ref_level" value="germ_25"/>
155 </conditional> 187 </conditional>
156 <repeat name="in_seqs"> 188 <repeat name="in_seqs">
157 <param name="label" value="germ_25"/> 189 <param name="label" value="germ_25"/>
158 <param name="fasta" value="germ_25.fasta.gz"/> 190 <param name="fasta" value="germ_25.fasta.gz"/>
159 </repeat> 191 </repeat>
168 <output name="out_hal"> 200 <output name="out_hal">
169 <assert_contents> 201 <assert_contents>
170 <has_size value="7420424" delta="200000" /> 202 <has_size value="7420424" delta="200000" />
171 </assert_contents> 203 </assert_contents>
172 </output> 204 </output>
205 <output name="out_gfa">
206 <assert_contents>
207 <has_size value="6710429" delta="200000" />
208 </assert_contents>
209 </output>
173 </test> 210 </test>
174 <!-- FASTA header with spaces (used to fail) --> 211 <!-- FASTA header with spaces (used to fail) -->
175 <test expect_num_outputs="1"> 212 <test expect_num_outputs="2">
176 <conditional name="aln_mode"> 213 <conditional name="aln_mode">
177 <param name="aln_mode_select" value="intraspecies"/> 214 <param name="aln_mode_select" value="intraspecies"/>
215 <param name="ref_level" value="badheader1"/>
178 </conditional> 216 </conditional>
179 <repeat name="in_seqs"> 217 <repeat name="in_seqs">
180 <param name="label" value="badheader1"/> 218 <param name="label" value="badheader1"/>
181 <param name="fasta" value="bh1.fasta.gz"/> 219 <param name="fasta" value="bh1.fasta.gz"/>
182 </repeat> 220 </repeat>
185 <param name="fasta" value="bh2.fasta.gz"/> 223 <param name="fasta" value="bh2.fasta.gz"/>
186 </repeat> 224 </repeat>
187 <output name="out_hal"> 225 <output name="out_hal">
188 <assert_contents> 226 <assert_contents>
189 <has_size value="3382274" delta="200000" /> 227 <has_size value="3382274" delta="200000" />
228 </assert_contents>
229 </output>
230 <output name="out_gfa">
231 <assert_contents>
232 <has_size value="764748" delta="200000" />
190 </assert_contents> 233 </assert_contents>
191 </output> 234 </output>
192 </test> 235 </test>
193 </tests> 236 </tests>
194 <help><![CDATA[ 237 <help><![CDATA[
198 reference-free whole-genome multiple alignment program. It can be used 241 reference-free whole-genome multiple alignment program. It can be used
199 to progressively align a large number of genomes. 242 to progressively align a large number of genomes.
200 243
201 **Usage** 244 **Usage**
202 245
203 **Between-species mode** 246 **Between-species mode (Progressive Cactus)**
204 247
205 If you are aligning genomes from **multiple species**, you need to 248 If you are aligning genomes from **multiple species**, you need to
206 provide a guide tree in Newick format. Cactus uses the guide tree to 249 provide a guide tree in Newick format. Cactus uses the guide tree to
207 progressively align genomes, meaning that it doesn’t need to align all 250 progressively align genomes, meaning that it doesn’t need to align all
208 possible pairs of genomes. 251 possible pairs of genomes.
214 257
215 (((human:0.006,chimp:0.006667):0.0022,gorilla:0.008825):0.0096,orang:0.01831); 258 (((human:0.006,chimp:0.006667):0.0022,gorilla:0.008825):0.0096,orang:0.01831);
216 259
217 The numbers are the branch lengths. 260 The numbers are the branch lengths.
218 261
219 **Beta: Within-species mode** 262 **Within-species mode (Minigraph-Cactus)**
220 263
221 You can also run Cactus in `pangenome 264 You can also run Cactus in `pangenome
222 mode <https://github.com/ComparativeGenomicsToolkit/cactus/blob/master/doc/pangenome.md>`__ 265 mode <https://github.com/ComparativeGenomicsToolkit/cactus/blob/master/doc/pangenome.md>`__
223 to align genomes of multiple individuals from the **same species**. In 266 to align genomes of multiple individuals from the **same species**. In
224 this mode you will not use a guide tree. Cactus will use 267 this mode you will not use a guide tree. Cactus will use
225 `minigraph <https://github.com/lh3/minigraph>`__ to generate a graph of 268 `minigraph <https://github.com/lh3/minigraph>`__ to generate a graph of
226 the input genomes and then use the graph to order the alignments. To use 269 the input genomes and then use the graph to order the alignments. To use
227 pangenome mode, select ‘Within-species’ in the ‘Alignment mode’ 270 pangenome mode, select ‘Within-species’ in the ‘Alignment mode’
228 dropdown. 271 dropdown.
229 272
273 Unlike Between-species mode, Within-species mode depends on a predetermined reference genome
274
230 **Input** 275 **Input**
231 276
232 The developers recommend soft-masking your genomes with RepeatMasker 277 The developers recommend soft-masking your genomes with RepeatMasker
233 before running Cactus. RepeatMasker is available on Galaxy. 278 before running Cactus. RepeatMasker is available on Galaxy.
234 279