Mercurial > repos > iuc > cactus_cactus
diff cactus_cactus.xml @ 0:51c3c42bc644 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/cactus commit 827619d22d2931d8fb34ed6844cfa91433e1ac2c
| author | iuc |
|---|---|
| date | Tue, 06 Feb 2024 00:30:39 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cactus_cactus.xml Tue Feb 06 00:30:39 2024 +0000 @@ -0,0 +1,260 @@ +<tool id="cactus_cactus" name="Cactus" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> + <description>whole-genome multiple sequence alignment</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="xrefs"/> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + + ## Set up seqfile + + #if $aln_mode.aln_mode_select == 'interspecies': + cat $aln_mode.in_tree >> seqfile.txt && + #end if + #for $seq in $in_seqs: + #set seq_fn = str($seq.label) + '.' + $seq.fasta.ext + ln -s '$seq.fasta' '$seq_fn' && + printf '%s %s\n' '$seq.label' '$seq_fn' >> seqfile.txt + && + #end for + + ## Run cactus + + #if $aln_mode.aln_mode_select == 'intraspecies': + ## Run cactus-pangenome + ## --reference should be the first argument + ## https://github.com/ComparativeGenomicsToolkit/cactus/issues/1093#issuecomment-1620088688 + cactus-pangenome + --reference $aln_mode.ref_level + --binariesMode local + --maxCores \${GALAXY_SLOTS:-4} + --maxMemory \${GALAXY_MEMORY_MB:-16384}M + --outDir ./ + --outName alignment + jobStore + seqfile.txt + #else if $aln_mode.aln_mode_select == 'interspecies': + ## Run cactus normally + cactus + --binariesMode local + --maxCores \${GALAXY_SLOTS:-4} + --maxMemory \${GALAXY_MEMORY_MB:-16384}M + --workDir ./ + jobStore + seqfile.txt + alignment.full.hal + #end if + + ]]></command> + <inputs> + <conditional name="aln_mode"> + <param name="aln_mode_select" type="select" label="Alignment mode" help="The taxonomic relationship between input genomes. If genomes are from multiple individuals of the same species, select 'Within-species'"> + <option value="interspecies" selected="true">Between-species</option> + <option value="intraspecies">Within-species</option> + </param> + <when value="interspecies"> + <param name="in_tree" type="data" format="nhx" label="Guide tree" help="Phylogenetic tree in Newick format. Required by Cactus to achieve linear scaling with number of input genomes"/> + </when> + <when value="intraspecies"> + <param name="ref_level" type="text" value="" label="Reference genome" help="Pangenomes from Minigraph-Cactus depend on a predetermined reference genome. Specify one of the Input Genomes as the reference genome. This must match the label used in 'Genome Label'."> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_"/> + </valid> + </sanitizer> + <validator type="regex">[0-9a-zA-Z_]+</validator> + </param> + </when> + </conditional> + <repeat name="in_seqs" title="Input genome"> + <param name="label" type="text" value="" label="Genome label" help="NO SPACES. Must match a label in the guide tree."> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_"/> + </valid> + </sanitizer> + <validator type="regex">[0-9a-zA-Z_]+</validator> + </param> + <param name="fasta" type="data" format="fasta,fasta.gz" label="Genome Sequence" help="Input genome"/> + </repeat> + <!-- add an option for root --> + <!-- root mr --> + </inputs> + <outputs> + <data name="out_hal" format="h5" from_work_dir="alignment.full.hal" label="${tool.name} on ${on_string} (HAL file)"> + </data> + <data name="out_gfa" format="gfa2.gz" from_work_dir="alignment.gfa.gz" label="${tool.name} on ${on_string} (GFA file)"> + <filter>aln_mode['aln_mode_select'] == 'intraspecies'</filter> + </data> + </outputs> + <tests> + <!-- test interspecies mode --> + <test expect_num_outputs="1"> + <conditional name="aln_mode"> + <param name="aln_mode_select" value="interspecies"/> + <param name="in_tree" value="test_tree.nhx"/> + </conditional> + <repeat name="in_seqs"> + <param name="label" value="simCow_chr6"/> + <param name="fasta" value="simCow_chr6.fasta"/> + </repeat> + <repeat name="in_seqs"> + <param name="label" value="simDog_chr6"/> + <param name="fasta" value="simDog_chr6.fasta"/> + </repeat> + <repeat name="in_seqs"> + <param name="label" value="simHuman_chr6"/> + <param name="fasta" value="simHuman_chr6.fasta"/> + </repeat> + <repeat name="in_seqs"> + <param name="label" value="simMouse_chr6"/> + <param name="fasta" value="simMouse_chr6.fasta"/> + </repeat> + <repeat name="in_seqs"> + <param name="label" value="simRat_chr6"/> + <param name="fasta" value="simRat_chr6.fasta"/> + </repeat> + <output name="out_hal"> + <assert_contents> + <has_size value="4472551" delta="200000"/> + </assert_contents> + </output> + </test> + <!-- within-species mode --> + <test expect_num_outputs="2"> + <conditional name="aln_mode"> + <param name="aln_mode_select" value="intraspecies"/> + <param name="ref_level" value="simCow_chr6"/> + </conditional> + <repeat name="in_seqs"> + <param name="label" value="simCow_chr6"/> + <param name="fasta" value="simCow_chr6.fasta"/> + </repeat> + <repeat name="in_seqs"> + <param name="label" value="simDog_chr6"/> + <param name="fasta" value="simDog_chr6.fasta"/> + </repeat> + <repeat name="in_seqs"> + <param name="label" value="simHuman_chr6"/> + <param name="fasta" value="simHuman_chr6.fasta"/> + </repeat> + <repeat name="in_seqs"> + <param name="label" value="simMouse_chr6"/> + <param name="fasta" value="simMouse_chr6.fasta"/> + </repeat> + <repeat name="in_seqs"> + <param name="label" value="simRat_chr6"/> + <param name="fasta" value="simRat_chr6.fasta"/> + </repeat> + <output name="out_hal"> + <assert_contents> + <has_size value="565214" delta="65214"/> + </assert_contents> + </output> + <output name="out_gfa"> + <assert_contents> + <has_size value="173000" delta="200000"/> + </assert_contents> + </output> + </test> + <!-- FASTA header with spaces (used to fail) --> + <test expect_num_outputs="2"> + <conditional name="aln_mode"> + <param name="aln_mode_select" value="intraspecies"/> + <param name="ref_level" value="badheader1"/> + </conditional> + <repeat name="in_seqs"> + <param name="label" value="badheader1"/> + <param name="fasta" value="bh1.fasta.gz"/> + </repeat> + <repeat name="in_seqs"> + <param name="label" value="badheader2"/> + <param name="fasta" value="bh2.fasta.gz"/> + </repeat> + <output name="out_hal"> + <assert_contents> + <has_size value="3382274" delta="200000"/> + </assert_contents> + </output> + <output name="out_gfa"> + <assert_contents> + <has_size value="764748" delta="200000"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + +.. class:: infomark + +**What it does** + +`Cactus <https://github.com/ComparativeGenomicsToolkit/cactus>`__ is a +reference-free whole-genome multiple alignment program. It can be used +to progressively align a large number of genomes. + +----- + +.. class:: infomark + +**Usage** + +**Between-species mode (Progressive Cactus)** + +If you are aligning genomes from **multiple species**, you need to +provide a guide tree in Newick format. Cactus uses the guide tree to +progressively align genomes, meaning that it doesn’t need to align all +possible pairs of genomes. + +A Newick-formatted tree for human, chimp and gorilla genomes looks like +this: + + :: + + (((human:0.006,chimp:0.006667):0.0022,gorilla:0.008825):0.0096,orang:0.01831); + +The numbers are the branch lengths. + +**Within-species mode (Minigraph-Cactus)** + +You can also run Cactus in `pangenome +mode <https://github.com/ComparativeGenomicsToolkit/cactus/blob/master/doc/pangenome.md>`__ +to align genomes of multiple individuals from the **same species**. In +this mode you will not use a guide tree. Cactus will use +`minigraph <https://github.com/lh3/minigraph>`__ to generate a graph of +the input genomes and then use the graph to order the alignments. To use +pangenome mode, select ‘Within-species’ in the ‘Alignment mode’ +dropdown. + +Unlike Between-species mode, Within-species mode depends on a predetermined reference genome. + +----- + +.. class:: infomark + +**Input** + +The developers recommend soft-masking your genomes with RepeatMasker +before running Cactus. RepeatMasker is available on Galaxy. + +If you’re using Between-species mode, you need to provide labels for the +fasta files that match the leaves on the guide tree. In the example +above, you would use the label ‘human’ for the human fasta file. + +----- + +.. class:: infomark + +**Output** + +The main output of Cactus is in `HAL +format <https://github.com/ComparativeGenomicsToolkit/cactus#using-the-output>`__. +You can use the `Cactus: export <root?tool_id=cactus_export>`__ tool to +convert the Cactus output to a VG or Multiple Alignment Format (MAF) +file. + + + ]]></help> + <expand macro="citations"/> +</tool>
