Mercurial > repos > galaxy-australia > cactus_cactus
diff cactus_cactus.xml @ 4:3c8227556fdc draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 2b4c7c63b0e4a1f730794a4a5825bce29ee2eb25
author | galaxy-australia |
---|---|
date | Wed, 09 Nov 2022 03:14:17 +0000 |
parents | 9422c5a87ee2 |
children | 48c13389050d |
line wrap: on
line diff
--- a/cactus_cactus.xml Tue Sep 20 05:38:05 2022 +0000 +++ b/cactus_cactus.xml Wed Nov 09 03:14:17 2022 +0000 @@ -6,17 +6,17 @@ <expand macro="xrefs"/> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ + export TMPDIR=\${_GALAXY_JOB_TMP_DIR} && + ## Set up seqfile #if $aln_mode.aln_mode_select == 'interspecies': cat $aln_mode.in_tree >> seqfile.txt && #end if - #set seq_line = '' #for $seq in $in_seqs: #set seq_fn = str($seq.label) + '.' + $seq.fasta.ext ln -s '$seq.fasta' '$seq_fn' && printf '%s %s\n' '$seq.label' '$seq_fn' >> seqfile.txt - #set seq_line += $seq_fn + ' ' && #end for @@ -24,40 +24,61 @@ #if $aln_mode.aln_mode_select == 'intraspecies': ## If we're doing a pangenome, we need to run the steps manually - minigraph -xggs - -t \${GALAXY_SLOTS:-4} - $seq_line - > pangenome.gfa + cactus-minigraph + --binariesMode local + --mapCores \${GALAXY_SLOTS:-4} + --maxMemory \${GALAXY_MEMORY_MB:-8192}M + --reference $aln_mode.ref_level + --workDir ./ + ./jobStore + ./seqfile.txt + pangenome.gfa && cactus-graphmap + --binariesMode local --maxCores \${GALAXY_SLOTS:-4} --maxMemory \${GALAXY_MEMORY_MB:-8192}M + --outputFasta pangenome.gfa.fa + --reference $aln_mode.ref_level + --workDir ./ ./jobStore ./seqfile.txt pangenome.gfa pangenome.paf - --outputFasta pangenome.gfa.fa - --binariesMode local - --workDir ./ && cactus-align + --binariesMode local --maxCores \${GALAXY_SLOTS:-4} --maxMemory \${GALAXY_MEMORY_MB:-8192}M + --outVG + --pangenome + --reference $aln_mode.ref_level + --workDir ./ ./jobStore ./seqfile.txt pangenome.paf alignment.hal - --pangenome + && + cactus-graphmap-join --binariesMode local - --workDir ./ + --gfaffix + --giraffe + --maxCores \${GALAXY_SLOTS:-4} + --maxMemory \${GALAXY_MEMORY_MB:-8192}M + --outDir ./ + --outName alignment + --reference $aln_mode.ref_level + --vg alignment.vg + --wlineSep "." + ./jobStore #else if $aln_mode.aln_mode_select == 'interspecies': ## Run cactus normally cactus + --binariesMode local --maxCores \${GALAXY_SLOTS:-4} --maxMemory \${GALAXY_MEMORY_MB:-8192}M + --workDir ./ jobStore seqfile.txt alignment.hal - --binariesMode local - --workDir ./ #end if ]]></command> @@ -71,6 +92,7 @@ <param name="in_tree" type="data" format="nhx" label="Guide tree" help="Phylogenetic tree in Newick format. Required by Cactus to achieve linear scaling with number of input genomes" /> </when> <when value="intraspecies"> + <param name="ref_level" type="text" value="" label="Reference genome" help="Pangenomes from Minigraph-Cactus depend on a predetermined reference genome. Specify one of the Input Genomes as the reference genome. This must match the label used in 'Genome Label'." /> </when> </conditional> <repeat name="in_seqs" title="Input genome"> @@ -83,10 +105,13 @@ </inputs> <outputs> <data name="out_hal" format="h5" from_work_dir="alignment.hal" label="${tool.name} on ${on_string} (HAL file)" /> + <data name="out_gfa" format="gfa2.gz" from_work_dir="alignment.gfa.gz" label="${tool.name} on ${on_string} (GFA file)" > + <filter>aln_mode[aln_mode_select] == 'intraspecies'</filter> + </data> </outputs> <tests> <!-- test interspecies mode --> - <test expect_num_outputs="1"> + <test expect_num_outputs="2"> <conditional name="aln_mode"> <param name="aln_mode_select" value="interspecies"/> <param name="in_tree" value="test_tree.nhx"/> @@ -118,9 +143,10 @@ </output> </test> <!-- within-species mode --> - <test expect_num_outputs="1"> + <test expect_num_outputs="2"> <conditional name="aln_mode"> <param name="aln_mode_select" value="intraspecies"/> + <param name="ref_level" value="simCow_chr6"/> </conditional> <repeat name="in_seqs"> <param name="label" value="simCow_chr6"/> @@ -144,14 +170,20 @@ </repeat> <output name="out_hal"> <assert_contents> - <has_size value="1349620" delta="200000" /> + <has_size value="2088959" delta="200000" /> + </assert_contents> + </output> + <output name="out_gfa"> + <assert_contents> + <has_size value="173000" delta="200000" /> </assert_contents> </output> </test> <!-- compressed input --> - <test expect_num_outputs="1"> + <test expect_num_outputs="2"> <conditional name="aln_mode"> <param name="aln_mode_select" value="intraspecies"/> + <param name="ref_level" value="germ_25"/> </conditional> <repeat name="in_seqs"> <param name="label" value="germ_25"/> @@ -170,11 +202,17 @@ <has_size value="7420424" delta="200000" /> </assert_contents> </output> + <output name="out_gfa"> + <assert_contents> + <has_size value="6710429" delta="200000" /> + </assert_contents> + </output> </test> <!-- FASTA header with spaces (used to fail) --> - <test expect_num_outputs="1"> + <test expect_num_outputs="2"> <conditional name="aln_mode"> <param name="aln_mode_select" value="intraspecies"/> + <param name="ref_level" value="badheader1"/> </conditional> <repeat name="in_seqs"> <param name="label" value="badheader1"/> @@ -189,6 +227,11 @@ <has_size value="3382274" delta="200000" /> </assert_contents> </output> + <output name="out_gfa"> + <assert_contents> + <has_size value="764748" delta="200000" /> + </assert_contents> + </output> </test> </tests> <help><![CDATA[ @@ -200,7 +243,7 @@ **Usage** -**Between-species mode** +**Between-species mode (Progressive Cactus)** If you are aligning genomes from **multiple species**, you need to provide a guide tree in Newick format. Cactus uses the guide tree to @@ -216,7 +259,7 @@ The numbers are the branch lengths. -**Beta: Within-species mode** +**Within-species mode (Minigraph-Cactus)** You can also run Cactus in `pangenome mode <https://github.com/ComparativeGenomicsToolkit/cactus/blob/master/doc/pangenome.md>`__ @@ -227,6 +270,8 @@ pangenome mode, select ‘Within-species’ in the ‘Alignment mode’ dropdown. +Unlike Between-species mode, Within-species mode depends on a predetermined reference genome + **Input** The developers recommend soft-masking your genomes with RepeatMasker