diff masurca.xml @ 0:3f13e9565679 draft

Uploaded
author dnbenso
date Mon, 24 Jan 2022 00:00:38 +0000
parents
children 1808eaa9d699
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/masurca.xml	Mon Jan 24 00:00:38 2022 +0000
@@ -0,0 +1,160 @@
+<tool id="masurca" name="MaSuRCA" version="@TOOL_VERSION@+galaxy0">
+    <description>The MaSuRCA (Maryland Super Read Cabog Assembler) genome assembly and analysis toolkit with config</description>
+    <macros>
+        <token name="@TOOL_VERSION@">4.0.6</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">masurca</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        cp $__tool_directory__/default-masurca-config  config.txt &&
+        #if $nanopore_input.np_input == "Yes":
+            #if $pacbio_input.pb_input == "Yes":
+                cat '$nanopore_input.nano' '$pacbio_input.pacbio' > long.fastq.gz &&
+            #else:
+                ln -s '$nanopore_input.nano' long.fastq.gz &&
+            #end if
+            sed -i 's|#NANOPORE=INPUTREADLONG|NANOPORE=long.fastq.gz|' config.txt &&
+        #elif $pacbio_input.pb_input == "Yes":
+            ln -s '$pacbio_input.pacbio' long.fastq.gz &&
+            sed -i 's|#PACBIO=INPUTREADLONG|PACBIO=long.fastq.gz|' config.txt &&
+        #end if
+        #if str( $illumina_input.input_type ) == "single"
+            ln -s '$illumina_input.fastq_input1' ill_1.fastq.gz &&
+            sed -i 's|INPUTREAD1|ill_1.fastq.gz|' config.txt &&
+        #elif str( $illumina_input.input_type ) == "paired"
+            ln -s '$illumina_input.fastq_input1' ill_1.fastq.gz &&
+            sed -i 's|INPUTREAD1|ill_1.fastq.gz|' config.txt && 
+            ln -s '$illumina_input.fastq_input2' ill_2.fastq.gz &&
+            sed -i 's|INPUTREAD2|ill_2.fastq.gz|' config.txt &&
+        #elif str( $illumina_input.input_type ) == "paired_collection"
+            ln -s '$illumina_input.fastq_input1' ill_1.fastq.gz &&
+            sed -i 's|INPUTREAD1|ill_1.fastq.gz|' config.txt && 
+            ln -s '$illumina_input.fastq_input2' ill_2.fastq.gz &&
+            sed -i 's|INPUTREAD2|ill_2.fastq.gz|' config.txt &&
+        #end if
+        #if $reference_input.ref_input == "Yes":
+            sed -i 's|#REFERENCE=REF|REFERENCE=$ref|' config.txt &&
+        #end if
+        sed -i 's|GALAXY_SLOTS|'\${GALAXY_SLOTS:-8}'|' config.txt &&
+        sed -i 's|MEAN|$mean|' config.txt &&
+        sed -i 's|STDDEV|$stddev|' config.txt &&
+        sed -i 's|JELLYFISHSIZE|$jfsize|' config.txt &&
+        sed -i 's|USE_LINKING_MATES = 0|USE_LINKING_MATES = $lnkmts|' config.txt &&
+        sed -i 's|FLYE_ASSEMBLY=0|FLYE_ASSEMBLY=$flye|' config.txt &&
+        masurca config.txt &&
+        bash assemble.sh
+    ]]></command>
+    <inputs>
+        <conditional name="illumina_input">
+            <param name="input_type" type="select" label="Paired-end reads" help="Select between paired and paired collection">
+                <option value="single">Single</option>
+                <option value="paired">Paired</option>
+                <option value="paired_collection">Paired Collection</option>
+            </param>
+            <when value="single">
+                <param type="data" name="fastq_input1" format="fastqsanger,fastqsanger.gz"
+                    label="Select unpaired reads" help="Specify dataset with unpaired reads"/>
+            </when>
+            <when value="paired">
+                <param type="data" name="fastq_input1" format="fastqsanger,fastqsanger.gz"
+                    label="Select first set of reads" help="Specify dataset with forward reads"/>
+                <param type="data" name="fastq_input2" format="fastqsanger,fastqsanger.gz"
+                    label="Select second set of reads" help="Specify dataset with reverse reads"/>
+            </when>
+            <when value="paired_collection">
+                <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" />
+            </when>
+        </conditional>
+        <param type="integer" name="mean" value="500" label="Mean size" help="Libarary insert average length" />
+        <param type="integer" name="stddev" value="50" label="Standard deviation"
+               help="Library insert standard deviation - if not known, set it to approximately 15% of the mean" />
+        <conditional name="nanopore_input">
+            <param name="np_input" type="select" label="Use Nanopore long reads" help="Optional Nanopore reads must be in a single fasta or fastq file">
+                <option value="No" selected="true">No</option>
+                <option value="Yes">Yes</option>
+            </param>
+            <when value="No"/>
+            <when value="Yes">
+                <param type="data" name="nano" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" label="nanopore reads" />
+            </when>
+        </conditional>
+        <conditional name="pacbio_input">
+            <param name="pb_input" type="select" label="Use Pacbio long reads" help="Optional Pacbio reads must be in a single fasta or fastq file">
+                <option value="No" selected="true">No</option>
+                <option value="Yes">Yes</option>
+            </param>
+            <when value="No"/>
+            <when value="Yes">
+                <param type="data" name="pacbio" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" label="pacbio reads" />
+            </when>
+        </conditional>
+        <conditional name="reference_input">
+            <param name="ref_input" type="select" label="Synteny-assisted assembly" help="Concatenate all reference genomes into one reference.fa; works for Illumina-only data">
+                <option value="No" selected="true">No</option>
+                <option value="Yes">Yes</option>
+            </param>
+            <when value="No"/>
+            <when value="Yes">
+                <param type="data" name="ref" format="fasta,fasta.gz" label="Reference" />
+            </when>
+        </conditional>
+        <param type="integer" name="jfsize" value="20000000" label="Jellyfish hash size" help="Set this to about 10x the genome size" />
+        <param type="boolean" name="flye" truevalue="1" falsevalue="0" label="Set this to use Flye assembler for final assembly of corrected mega-reads"
+            help="If you are doing Hybrid Illumina paired end + Nanopore/PacBio assembly ONLY (no Illumina mate pairs or OTHER frg files). DO NOT use if you have less than 15x coverage by long read" />
+        <param type="boolean" name="lnkmts" truevalue="1" falsevalue="0" label="Include Linking Mates"
+            help="Most of the paired end reads end up in the same super read and thus are not passed to the assembler. Those that do not end up in the same super read are called ”linking mates” . The best assembly results are achieved by setting this parameter to 1 for Illumina-only assemblies. If you have more than 2x coverage by long reads, set this to 0." />
+    </inputs>
+    <outputs>
+        <data name="superReads" format="fasta" from_work_dir="superReadSequences.named.fasta" label="${tool.name} on ${on_string}: named_superReads" />
+        <data name="scaffold_prm" format="fasta" from_work_dir="CA.mr.*/primary.genome.scf.fasta" label="${tool.name} on ${on_string}: primary_genome">
+            <filter>flye == False</filter>
+        </data>
+        <data name="scaffold_alt" format="fasta" from_work_dir="CA.mr.*/alternative.genome.scf.fasta" label="${tool.name} on ${on_string}: alternative_genome">
+            <filter>flye == False</filter>
+        </data>
+        <data name="flye_assembly" format="fasta" from_work_dir="flye.mr.*/assembly.fasta" label="${tool.name} on ${on_string}: flye_assembly">
+            <filter>flye == True</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="illumina_input">
+                <param name="input_type" value="paired" />
+                <param name="fastq_input1" value="illumina_reads_1.fastq"/>
+                <param name="fastq_input2" value="illumina_reads_2.fastq"/>
+            </conditional>
+            <conditional name="nanopore_input">
+                <param name="np_input" value="Yes" />
+                <param name="nano" value="nanopore_reads.fastq" />
+            </conditional>
+            <conditional name="pacbio_input">
+                <param name="pb_input" value="No" />
+            </conditional>
+            <conditional name="reference_input">
+                <param name="ref_input" value="Yes" />
+                <param name="ref" value="reference_genome.fasta" />
+            </conditional>
+            <param name="mean" value="500" />
+            <param name="stddev" value="50" />
+            <param name="jfsize" value="80349460" />
+            <param name="flye" value="1" />
+            <param name="lnkmts" value="0" />
+            <output name="superReads" ftype="fasta">
+                <assert_contents>
+                   <has_line_matching expression="^GAAAGCCGTGGCTTGGAACGGTGCTGATTGATCCGGC.*"/> 
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+        This implementation of MaSuRCA uses a config file for more complicated
+        assemblies and to change default settings. Illumina reads (mandatory)
+        and long reads from PACBIO or Oxford Nanopore or both can be included.
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btt476</citation>
+    </citations>
+</tool>