cactus_cactus: cactus_cactus.xml comparison

comparison cactus_cactus.xml @ 0:85f68b344286 draft

"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8f8363625623f2ff3f04d12d227673ac134eba24"

author	galaxy-australia
date	Mon, 04 Apr 2022 06:27:44 +0000
parents
children	1bc1199f0ff4

comparison

equal deleted inserted replaced

--1:000000000000
+:85f68b344286
+<tool id="cactus_cactus" name="Cactus" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
+<description>whole-genome multiple sequence alignment.</description>
+<macros>
+<import>macros.xml</import>
+</macros>
+<expand macro="xrefs"/>
+<expand macro="requirements"/>
+<command detect_errors="exit_code"><![CDATA[
+## Check the FASTA headers
+## This is only necessary in pangenome mode
+#if $aln_mode.aln_mode_select == 'intraspecies':
+#for $seq in $in_seqs:
+if
+#if $seq.fasta.is_of_type('fasta.gz'):
+zgrep
+#else
+grep
+#end if
+"^>" $seq.fasta | grep -q "[[:space:]]" ; then
+echo "Error parsing input FASTA." ;
+echo "Pangenome mode fails if there are spaces in the header." ;
+echo "Please remove them with the NormalizeFasta tool." ;
+exit 1
+; fi &&
+#end for
+#end if
+## Set up seqfile
+#if $aln_mode.aln_mode_select == 'interspecies':
+cat $aln_mode.in_tree >> seqfile.txt &&
+#end if
+#set seq_line = ''
+#for $seq in $in_seqs:
+#set seq_fn = str($seq.label) + '.' + $seq.fasta.ext
+ln -s '$seq.fasta' '$seq_fn' &&
+printf '%s %s\n' '$seq.label' '$seq_fn' >> seqfile.txt
+#set seq_line += $seq_fn + ' '
+&&
+#end for
+## Run cactus
+#if $aln_mode.aln_mode_select == 'intraspecies':
+## If we're doing a pangenome, we need to run the steps manually
+minigraph -xggs
+-t \${GALAXY_SLOTS:-4}
+$seq_line
+> pangenome.gfa
+&&
+cactus-graphmap
+--maxCores  \${GALAXY_SLOTS:-4}
+--maxMemory \${GALAXY_MEMORY_MB:-8192}M
+./jobStore
+./seqfile.txt
+pangenome.gfa
+pangenome.paf
+--outputFasta pangenome.gfa.fa
+--binariesMode local
+--workDir ./
+&&
+cactus-align
+--maxCores  \${GALAXY_SLOTS:-4}
+--maxMemory \${GALAXY_MEMORY_MB:-8192}M
+./jobStore
+./seqfile.txt
+pangenome.paf
+alignment.hal
+--pangenome
+--pafInput
+--binariesMode local
+--workDir ./
+#else if $aln_mode.aln_mode_select == 'interspecies':
+## Run cactus normally
+cactus
+--maxCores  \${GALAXY_SLOTS:-4}
+--maxMemory \${GALAXY_MEMORY_MB:-8192}M
+jobStore seqfile.txt alignment.hal
+--binariesMode local
+--workDir ./
+#end if
+]]></command>
+<inputs>
+<conditional name="aln_mode">
+<param name="aln_mode_select" type="select" label="Alignment mode" help="The taxonomic relationship between input genomes. If genomes are from multiple individuals of the same species, select 'Within-species'">
+<option value="interspecies" selected="true">Between-species</option>
+<option value="intraspecies">Within-species</option>
+</param>
+<when value="interspecies">
+<param name="in_tree" type="data" format="nhx" label="Guide tree" help="Phylogenetic tree in Newick format. Required by Cactus to achieve linear scaling with number of input genomes" />
+</when>
+<when value="intraspecies">
+</when>
+</conditional>
+<repeat name="in_seqs" title="Input genome">
+<param name="label" type="text" value="" label="Genome Label" help="NO SPACES. Must match a label in the guide tree.">
+</param>
+<param name="fasta" type="data" format="fasta,fasta.gz" label="Genome Sequence" help="Input genome"/>
+</repeat>
+<!-- add an option for root -->
+<!-- root mr  -->
+</inputs>
+<outputs>
+<data name="out_hal" format="h5" from_work_dir="alignment.hal" label="${tool.name} on ${on_string} (HAL file)" />
+</outputs>
+<tests>
+<!-- test interspecies mode -->
+<test expect_num_outputs="1">
+<conditional name="aln_mode">
+<param name="aln_mode_select" value="interspecies"/>
+<param name="in_tree" value="test_tree.nhx"/>
+</conditional>
+<repeat name="in_seqs">
+<param name="label" value="simCow_chr6"/>
+<param name="fasta" value="simCow_chr6.fasta"/>
+</repeat>
+<repeat name="in_seqs">
+<param name="label" value="simDog_chr6"/>
+<param name="fasta" value="simDog_chr6.fasta"/>
+</repeat>
+<repeat name="in_seqs">
+<param name="label" value="simHuman_chr6"/>
+<param name="fasta" value="simHuman_chr6.fasta"/>
+</repeat>
+<repeat name="in_seqs">
+<param name="label" value="simMouse_chr6"/>
+<param name="fasta" value="simMouse_chr6.fasta"/>
+</repeat>
+<repeat name="in_seqs">
+<param name="label" value="simRat_chr6"/>
+<param name="fasta" value="simRat_chr6.fasta"/>
+</repeat>
+<output name="out_hal">
+<assert_contents>
+<has_size value="5272354" delta="200000" />
+</assert_contents>
+</output>
+</test>
+<!-- within-species mode -->
+<test expect_num_outputs="1">
+<conditional name="aln_mode">
+<param name="aln_mode_select" value="intraspecies"/>
+</conditional>
+<repeat name="in_seqs">
+<param name="label" value="simCow_chr6"/>
+<param name="fasta" value="simCow_chr6.fasta"/>
+</repeat>
+<repeat name="in_seqs">
+<param name="label" value="simDog_chr6"/>
+<param name="fasta" value="simDog_chr6.fasta"/>
+</repeat>
+<repeat name="in_seqs">
+<param name="label" value="simHuman_chr6"/>
+<param name="fasta" value="simHuman_chr6.fasta"/>
+</repeat>
+<repeat name="in_seqs">
+<param name="label" value="simMouse_chr6"/>
+<param name="fasta" value="simMouse_chr6.fasta"/>
+</repeat>
+<repeat name="in_seqs">
+<param name="label" value="simRat_chr6"/>
+<param name="fasta" value="simRat_chr6.fasta"/>
+</repeat>
+<output name="out_hal">
+<assert_contents>
+<has_size value="2119332" delta="200000" />
+</assert_contents>
+</output>
+</test>
+<!-- compressed input -->
+<test expect_num_outputs="1">
+<conditional name="aln_mode">
+<param name="aln_mode_select" value="intraspecies"/>
+</conditional>
+<repeat name="in_seqs">
+<param name="label" value="germ_25"/>
+<param name="fasta" value="germ_25.fasta.gz"/>
+</repeat>
+<repeat name="in_seqs">
+<param name="label" value="vulg_25"/>
+<param name="fasta" value="vulg_25.fasta.gz"/>
+</repeat>
+<repeat name="in_seqs">
+<param name="label" value="pens_25"/>
+<param name="fasta" value="pens_25.fasta.gz"/>
+</repeat>
+<output name="out_hal">
+<assert_contents>
+<has_size value="7204260" delta="200000" />
+</assert_contents>
+</output>
+</test>
+<!-- FASTA header -->
+<test expect_exit_code="1" expect_failure="true">
+<conditional name="aln_mode">
+<param name="aln_mode_select" value="intraspecies"/>
+</conditional>
+<repeat name="in_seqs">
+<param name="label" value="badheader1"/>
+<param name="fasta" value="bh1.fasta.gz"/>
+</repeat>
+<repeat name="in_seqs">
+<param name="label" value="badheader2"/>
+<param name="fasta" value="bh2.fasta.gz"/>
+</repeat>
+</test>
+</tests>
+<help><![CDATA[
+**What it does**
+`Cactus <https://github.com/ComparativeGenomicsToolkit/cactus>`__ is a
+reference-free whole-genome multiple alignment program. It can be used
+to progressively align a large number of genomes.
+**Usage**
+**Between-species mode**
+If you are aligning genomes from **multiple species**, you need to
+provide a guide tree in Newick format. Cactus uses the guide tree to
+progressively align genomes, meaning that it doesn’t need to align all
+possible pairs of genomes.
+A Newick-formatted tree for human, chimp and gorilla genomes looks like
+this:
+::
+(((human:0.006,chimp:0.006667):0.0022,gorilla:0.008825):0.0096,orang:0.01831);
+The numbers are the branch lengths.
+**Beta: Within-species mode**
+You can also run Cactus in `pangenome
+mode <https://github.com/ComparativeGenomicsToolkit/cactus/blob/master/doc/pangenome.md>`__
+to align genomes of multiple individuals from the **same species**. In
+this mode you will not use a guide tree. Cactus will use
+`minigraph <https://github.com/lh3/minigraph>`__ to generate a graph of
+the input genomes and then use the graph to order the alignments. To use
+pangenome mode, select ‘Within-species’ in the ‘Alignment mode’
+dropdown.
+⚠️ To use pangenome mode, you will have to remove spaces from the headers in your FASTA file.
+You can do this with the NormalizeFasta tool.
+**Input**
+The developers recommend soft-masking your genomes with RepeatMasker
+before running Cactus. RepeatMasker is available on Galaxy.
+If you’re using Between-species mode, you need to provide labels for the
+fasta files that match the leaves on the guide tree. In the example
+above, you would use the label ‘human’ for the human fasta file.
+**Output**
+The main output of Cactus is in `HAL
+format <https://github.com/ComparativeGenomicsToolkit/cactus#using-the-output>`__.
+You can use the `Cactus: export <root?tool_id=cactus_export>`__ tool to
+convert the Cactus output to a VG or Multiple Alignment Format (MAF)
+file.
+]]></help>
+<expand macro="citations"/>
+</tool>

Mercurial > repos > galaxy-australia > cactus_cactus

comparison cactus_cactus.xml @ 0:85f68b344286 draft