Mercurial > repos > iuc > cactus_cactus
comparison cactus_cactus.xml @ 0:51c3c42bc644 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/cactus commit 827619d22d2931d8fb34ed6844cfa91433e1ac2c
| author | iuc |
|---|---|
| date | Tue, 06 Feb 2024 00:30:39 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:51c3c42bc644 |
|---|---|
| 1 <tool id="cactus_cactus" name="Cactus" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> | |
| 2 <description>whole-genome multiple sequence alignment</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="xrefs"/> | |
| 7 <expand macro="requirements"/> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 | |
| 10 ## Set up seqfile | |
| 11 | |
| 12 #if $aln_mode.aln_mode_select == 'interspecies': | |
| 13 cat $aln_mode.in_tree >> seqfile.txt && | |
| 14 #end if | |
| 15 #for $seq in $in_seqs: | |
| 16 #set seq_fn = str($seq.label) + '.' + $seq.fasta.ext | |
| 17 ln -s '$seq.fasta' '$seq_fn' && | |
| 18 printf '%s %s\n' '$seq.label' '$seq_fn' >> seqfile.txt | |
| 19 && | |
| 20 #end for | |
| 21 | |
| 22 ## Run cactus | |
| 23 | |
| 24 #if $aln_mode.aln_mode_select == 'intraspecies': | |
| 25 ## Run cactus-pangenome | |
| 26 ## --reference should be the first argument | |
| 27 ## https://github.com/ComparativeGenomicsToolkit/cactus/issues/1093#issuecomment-1620088688 | |
| 28 cactus-pangenome | |
| 29 --reference $aln_mode.ref_level | |
| 30 --binariesMode local | |
| 31 --maxCores \${GALAXY_SLOTS:-4} | |
| 32 --maxMemory \${GALAXY_MEMORY_MB:-16384}M | |
| 33 --outDir ./ | |
| 34 --outName alignment | |
| 35 jobStore | |
| 36 seqfile.txt | |
| 37 #else if $aln_mode.aln_mode_select == 'interspecies': | |
| 38 ## Run cactus normally | |
| 39 cactus | |
| 40 --binariesMode local | |
| 41 --maxCores \${GALAXY_SLOTS:-4} | |
| 42 --maxMemory \${GALAXY_MEMORY_MB:-16384}M | |
| 43 --workDir ./ | |
| 44 jobStore | |
| 45 seqfile.txt | |
| 46 alignment.full.hal | |
| 47 #end if | |
| 48 | |
| 49 ]]></command> | |
| 50 <inputs> | |
| 51 <conditional name="aln_mode"> | |
| 52 <param name="aln_mode_select" type="select" label="Alignment mode" help="The taxonomic relationship between input genomes. If genomes are from multiple individuals of the same species, select 'Within-species'"> | |
| 53 <option value="interspecies" selected="true">Between-species</option> | |
| 54 <option value="intraspecies">Within-species</option> | |
| 55 </param> | |
| 56 <when value="interspecies"> | |
| 57 <param name="in_tree" type="data" format="nhx" label="Guide tree" help="Phylogenetic tree in Newick format. Required by Cactus to achieve linear scaling with number of input genomes"/> | |
| 58 </when> | |
| 59 <when value="intraspecies"> | |
| 60 <param name="ref_level" type="text" value="" label="Reference genome" help="Pangenomes from Minigraph-Cactus depend on a predetermined reference genome. Specify one of the Input Genomes as the reference genome. This must match the label used in 'Genome Label'."> | |
| 61 <sanitizer invalid_char=""> | |
| 62 <valid initial="string.letters,string.digits"> | |
| 63 <add value="_"/> | |
| 64 </valid> | |
| 65 </sanitizer> | |
| 66 <validator type="regex">[0-9a-zA-Z_]+</validator> | |
| 67 </param> | |
| 68 </when> | |
| 69 </conditional> | |
| 70 <repeat name="in_seqs" title="Input genome"> | |
| 71 <param name="label" type="text" value="" label="Genome label" help="NO SPACES. Must match a label in the guide tree."> | |
| 72 <sanitizer invalid_char=""> | |
| 73 <valid initial="string.letters,string.digits"> | |
| 74 <add value="_"/> | |
| 75 </valid> | |
| 76 </sanitizer> | |
| 77 <validator type="regex">[0-9a-zA-Z_]+</validator> | |
| 78 </param> | |
| 79 <param name="fasta" type="data" format="fasta,fasta.gz" label="Genome Sequence" help="Input genome"/> | |
| 80 </repeat> | |
| 81 <!-- add an option for root --> | |
| 82 <!-- root mr --> | |
| 83 </inputs> | |
| 84 <outputs> | |
| 85 <data name="out_hal" format="h5" from_work_dir="alignment.full.hal" label="${tool.name} on ${on_string} (HAL file)"> | |
| 86 </data> | |
| 87 <data name="out_gfa" format="gfa2.gz" from_work_dir="alignment.gfa.gz" label="${tool.name} on ${on_string} (GFA file)"> | |
| 88 <filter>aln_mode['aln_mode_select'] == 'intraspecies'</filter> | |
| 89 </data> | |
| 90 </outputs> | |
| 91 <tests> | |
| 92 <!-- test interspecies mode --> | |
| 93 <test expect_num_outputs="1"> | |
| 94 <conditional name="aln_mode"> | |
| 95 <param name="aln_mode_select" value="interspecies"/> | |
| 96 <param name="in_tree" value="test_tree.nhx"/> | |
| 97 </conditional> | |
| 98 <repeat name="in_seqs"> | |
| 99 <param name="label" value="simCow_chr6"/> | |
| 100 <param name="fasta" value="simCow_chr6.fasta"/> | |
| 101 </repeat> | |
| 102 <repeat name="in_seqs"> | |
| 103 <param name="label" value="simDog_chr6"/> | |
| 104 <param name="fasta" value="simDog_chr6.fasta"/> | |
| 105 </repeat> | |
| 106 <repeat name="in_seqs"> | |
| 107 <param name="label" value="simHuman_chr6"/> | |
| 108 <param name="fasta" value="simHuman_chr6.fasta"/> | |
| 109 </repeat> | |
| 110 <repeat name="in_seqs"> | |
| 111 <param name="label" value="simMouse_chr6"/> | |
| 112 <param name="fasta" value="simMouse_chr6.fasta"/> | |
| 113 </repeat> | |
| 114 <repeat name="in_seqs"> | |
| 115 <param name="label" value="simRat_chr6"/> | |
| 116 <param name="fasta" value="simRat_chr6.fasta"/> | |
| 117 </repeat> | |
| 118 <output name="out_hal"> | |
| 119 <assert_contents> | |
| 120 <has_size value="4472551" delta="200000"/> | |
| 121 </assert_contents> | |
| 122 </output> | |
| 123 </test> | |
| 124 <!-- within-species mode --> | |
| 125 <test expect_num_outputs="2"> | |
| 126 <conditional name="aln_mode"> | |
| 127 <param name="aln_mode_select" value="intraspecies"/> | |
| 128 <param name="ref_level" value="simCow_chr6"/> | |
| 129 </conditional> | |
| 130 <repeat name="in_seqs"> | |
| 131 <param name="label" value="simCow_chr6"/> | |
| 132 <param name="fasta" value="simCow_chr6.fasta"/> | |
| 133 </repeat> | |
| 134 <repeat name="in_seqs"> | |
| 135 <param name="label" value="simDog_chr6"/> | |
| 136 <param name="fasta" value="simDog_chr6.fasta"/> | |
| 137 </repeat> | |
| 138 <repeat name="in_seqs"> | |
| 139 <param name="label" value="simHuman_chr6"/> | |
| 140 <param name="fasta" value="simHuman_chr6.fasta"/> | |
| 141 </repeat> | |
| 142 <repeat name="in_seqs"> | |
| 143 <param name="label" value="simMouse_chr6"/> | |
| 144 <param name="fasta" value="simMouse_chr6.fasta"/> | |
| 145 </repeat> | |
| 146 <repeat name="in_seqs"> | |
| 147 <param name="label" value="simRat_chr6"/> | |
| 148 <param name="fasta" value="simRat_chr6.fasta"/> | |
| 149 </repeat> | |
| 150 <output name="out_hal"> | |
| 151 <assert_contents> | |
| 152 <has_size value="565214" delta="65214"/> | |
| 153 </assert_contents> | |
| 154 </output> | |
| 155 <output name="out_gfa"> | |
| 156 <assert_contents> | |
| 157 <has_size value="173000" delta="200000"/> | |
| 158 </assert_contents> | |
| 159 </output> | |
| 160 </test> | |
| 161 <!-- FASTA header with spaces (used to fail) --> | |
| 162 <test expect_num_outputs="2"> | |
| 163 <conditional name="aln_mode"> | |
| 164 <param name="aln_mode_select" value="intraspecies"/> | |
| 165 <param name="ref_level" value="badheader1"/> | |
| 166 </conditional> | |
| 167 <repeat name="in_seqs"> | |
| 168 <param name="label" value="badheader1"/> | |
| 169 <param name="fasta" value="bh1.fasta.gz"/> | |
| 170 </repeat> | |
| 171 <repeat name="in_seqs"> | |
| 172 <param name="label" value="badheader2"/> | |
| 173 <param name="fasta" value="bh2.fasta.gz"/> | |
| 174 </repeat> | |
| 175 <output name="out_hal"> | |
| 176 <assert_contents> | |
| 177 <has_size value="3382274" delta="200000"/> | |
| 178 </assert_contents> | |
| 179 </output> | |
| 180 <output name="out_gfa"> | |
| 181 <assert_contents> | |
| 182 <has_size value="764748" delta="200000"/> | |
| 183 </assert_contents> | |
| 184 </output> | |
| 185 </test> | |
| 186 </tests> | |
| 187 <help><![CDATA[ | |
| 188 | |
| 189 .. class:: infomark | |
| 190 | |
| 191 **What it does** | |
| 192 | |
| 193 `Cactus <https://github.com/ComparativeGenomicsToolkit/cactus>`__ is a | |
| 194 reference-free whole-genome multiple alignment program. It can be used | |
| 195 to progressively align a large number of genomes. | |
| 196 | |
| 197 ----- | |
| 198 | |
| 199 .. class:: infomark | |
| 200 | |
| 201 **Usage** | |
| 202 | |
| 203 **Between-species mode (Progressive Cactus)** | |
| 204 | |
| 205 If you are aligning genomes from **multiple species**, you need to | |
| 206 provide a guide tree in Newick format. Cactus uses the guide tree to | |
| 207 progressively align genomes, meaning that it doesn’t need to align all | |
| 208 possible pairs of genomes. | |
| 209 | |
| 210 A Newick-formatted tree for human, chimp and gorilla genomes looks like | |
| 211 this: | |
| 212 | |
| 213 :: | |
| 214 | |
| 215 (((human:0.006,chimp:0.006667):0.0022,gorilla:0.008825):0.0096,orang:0.01831); | |
| 216 | |
| 217 The numbers are the branch lengths. | |
| 218 | |
| 219 **Within-species mode (Minigraph-Cactus)** | |
| 220 | |
| 221 You can also run Cactus in `pangenome | |
| 222 mode <https://github.com/ComparativeGenomicsToolkit/cactus/blob/master/doc/pangenome.md>`__ | |
| 223 to align genomes of multiple individuals from the **same species**. In | |
| 224 this mode you will not use a guide tree. Cactus will use | |
| 225 `minigraph <https://github.com/lh3/minigraph>`__ to generate a graph of | |
| 226 the input genomes and then use the graph to order the alignments. To use | |
| 227 pangenome mode, select ‘Within-species’ in the ‘Alignment mode’ | |
| 228 dropdown. | |
| 229 | |
| 230 Unlike Between-species mode, Within-species mode depends on a predetermined reference genome. | |
| 231 | |
| 232 ----- | |
| 233 | |
| 234 .. class:: infomark | |
| 235 | |
| 236 **Input** | |
| 237 | |
| 238 The developers recommend soft-masking your genomes with RepeatMasker | |
| 239 before running Cactus. RepeatMasker is available on Galaxy. | |
| 240 | |
| 241 If you’re using Between-species mode, you need to provide labels for the | |
| 242 fasta files that match the leaves on the guide tree. In the example | |
| 243 above, you would use the label ‘human’ for the human fasta file. | |
| 244 | |
| 245 ----- | |
| 246 | |
| 247 .. class:: infomark | |
| 248 | |
| 249 **Output** | |
| 250 | |
| 251 The main output of Cactus is in `HAL | |
| 252 format <https://github.com/ComparativeGenomicsToolkit/cactus#using-the-output>`__. | |
| 253 You can use the `Cactus: export <root?tool_id=cactus_export>`__ tool to | |
| 254 convert the Cactus output to a VG or Multiple Alignment Format (MAF) | |
| 255 file. | |
| 256 | |
| 257 | |
| 258 ]]></help> | |
| 259 <expand macro="citations"/> | |
| 260 </tool> |
