# HG changeset patch # User dnbenso # Date 1642982438 0 # Node ID 3f13e956567905a2eb1ffc41c4094b5fb6ef3629 Uploaded diff -r 000000000000 -r 3f13e9565679 .default-masurca-config.swp Binary file .default-masurca-config.swp has changed diff -r 000000000000 -r 3f13e9565679 default-masurca-config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/default-masurca-config Mon Jan 24 00:00:38 2022 +0000 @@ -0,0 +1,63 @@ +DATA +#Illumina paired end reads supplied as +#if single-end, do not specify +#MUST HAVE Illumina paired end reads to use MaSuRCA +#PE= pe 500 50 /FULL_PATH/frag_1.fastq /FULL_PATH/frag_2.fastq +PE= pe MEAN STDDEV INPUTREAD1 INPUTREAD2 +#Illumina mate pair reads supplied as +#JUMP= sh 3600 200 /FULL_PATH/short_1.fastq /FULL_PATH/short_2.fastq +#pacbio OR nanopore reads must be in a single fasta or fastq file with absolute path, can be gzipped +#if you have both types of reads supply them both as NANOPORE type +#PACBIO=/FULL_PATH/pacbio.fa +#PACBIO=INPUTREADLONG +#NANOPORE=/FULL_PATH/nanopore.fa +#NANOPORE=INPUTREADLONG +#Other reads (Sanger, 454, etc) one frg file, concatenate your frg files into one if you have many +#OTHER=/FULL_PATH/file.frg +#synteny-assisted assembly, concatenate all reference genomes into one reference.fa; works for Illumina-only data +#REFERENCE=REF +END + +PARAMETERS +#PLEASE READ all comments to essential parameters below, and set the parameters according to your project +#set this to 1 if your Illumina jumping library reads are shorter than 100bp +EXTEND_JUMP_READS=0 +#this is k-mer size for deBruijn graph values between 25 and 127 are supported, auto will compute the optimal size based on the read data and GC content +GRAPH_KMER_SIZE = auto +#set this to 1 for all Illumina-only assemblies +#set this to 0 if you have more than 15x coverage by long reads (Pacbio or Nanopore) or any other long reads/mate pairs (Illumina MP, Sanger, 454, etc) +USE_LINKING_MATES = 0 +#specifies whether to run the assembly on the grid +USE_GRID=0 +#specifies grid engine to use SGE or SLURM +GRID_ENGINE=SLURM +#specifies queue (for SGE) or partition (for SLURM) to use when running on the grid MANDATORY +GRID_QUEUE=defq +#batch size in the amount of long read sequence for each batch on the grid +GRID_BATCH_SIZE=500000000 +#use at most this much coverage by the longest Pacbio or Nanopore reads, discard the rest of the reads +#can increase this to 30 or 35 if your reads are short (N50<7000bp) +LHE_COVERAGE=25 +#set to 0 (default) to do two passes of mega-reads for slower, but higher quality assembly, otherwise set to 1 +MEGA_READS_ONE_PASS=0 +#this parameter is useful if you have too many Illumina jumping library mates. Typically set it to 60 for bacteria and 300 for the other organisms +LIMIT_JUMP_COVERAGE = 300 +#these are the additional parameters to Celera Assembler. do not worry about performance, number or processors or batch sizes -- these are computed automatically. +#CABOG ASSEMBLY ONLY: set cgwErrorRate=0.25 for bacteria and 0.1<=cgwErrorRate<=0.15 for other organisms. +CA_PARAMETERS = cgwErrorRate=0.15 +#CABOG ASSEMBLY ONLY: whether to attempt to close gaps in scaffolds with Illumina or long read data +CLOSE_GAPS=1 +#number of cpus to use, set this to the number of CPUs/threads per node you will be using +NUM_THREADS = GALAXY_SLOTS +#this is mandatory jellyfish hash size -- a safe value is estimated_genome_size*20 +JF_SIZE = JELLYFISHSIZE +#ILLUMINA ONLY. Set this to 1 to use SOAPdenovo contigging/scaffolding module. +#Assembly will be worse but will run faster. Useful for very large (>=8Gbp) genomes from Illumina-only data +SOAP_ASSEMBLY=0 +#If you are doing Hybrid Illumina paired end + Nanopore/PacBio assembly ONLY (no Illumina mate pairs or OTHER frg files). +#Set this to 1 to use Flye assembler for final assembly of corrected mega-reads. +#A lot faster than CABOG, AND QUALITY IS THE SAME OR BETTER. +#Works well even when MEGA_READS_ONE_PASS is set to 1. +#DO NOT use if you have less than 15x coverage by long reads. +FLYE_ASSEMBLY=0 +END diff -r 000000000000 -r 3f13e9565679 masurca.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/masurca.xml Mon Jan 24 00:00:38 2022 +0000 @@ -0,0 +1,160 @@ + + The MaSuRCA (Maryland Super Read Cabog Assembler) genome assembly and analysis toolkit with config + + 4.0.6 + + + masurca + + long.fastq.gz && + #else: + ln -s '$nanopore_input.nano' long.fastq.gz && + #end if + sed -i 's|#NANOPORE=INPUTREADLONG|NANOPORE=long.fastq.gz|' config.txt && + #elif $pacbio_input.pb_input == "Yes": + ln -s '$pacbio_input.pacbio' long.fastq.gz && + sed -i 's|#PACBIO=INPUTREADLONG|PACBIO=long.fastq.gz|' config.txt && + #end if + #if str( $illumina_input.input_type ) == "single" + ln -s '$illumina_input.fastq_input1' ill_1.fastq.gz && + sed -i 's|INPUTREAD1|ill_1.fastq.gz|' config.txt && + #elif str( $illumina_input.input_type ) == "paired" + ln -s '$illumina_input.fastq_input1' ill_1.fastq.gz && + sed -i 's|INPUTREAD1|ill_1.fastq.gz|' config.txt && + ln -s '$illumina_input.fastq_input2' ill_2.fastq.gz && + sed -i 's|INPUTREAD2|ill_2.fastq.gz|' config.txt && + #elif str( $illumina_input.input_type ) == "paired_collection" + ln -s '$illumina_input.fastq_input1' ill_1.fastq.gz && + sed -i 's|INPUTREAD1|ill_1.fastq.gz|' config.txt && + ln -s '$illumina_input.fastq_input2' ill_2.fastq.gz && + sed -i 's|INPUTREAD2|ill_2.fastq.gz|' config.txt && + #end if + #if $reference_input.ref_input == "Yes": + sed -i 's|#REFERENCE=REF|REFERENCE=$ref|' config.txt && + #end if + sed -i 's|GALAXY_SLOTS|'\${GALAXY_SLOTS:-8}'|' config.txt && + sed -i 's|MEAN|$mean|' config.txt && + sed -i 's|STDDEV|$stddev|' config.txt && + sed -i 's|JELLYFISHSIZE|$jfsize|' config.txt && + sed -i 's|USE_LINKING_MATES = 0|USE_LINKING_MATES = $lnkmts|' config.txt && + sed -i 's|FLYE_ASSEMBLY=0|FLYE_ASSEMBLY=$flye|' config.txt && + masurca config.txt && + bash assemble.sh + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + flye == False + + + flye == False + + + flye == True + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1093/bioinformatics/btt476 + + diff -r 000000000000 -r 3f13e9565679 test-data/illumina_reads_1.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illumina_reads_1.fastq Mon Jan 24 00:00:38 2022 +0000 @@ -0,0 +1,1 @@ +wget https://usegalaxy.org.au/datasets/5cec05905990af26/display?to_ext=fastqsanger -O illumina_reads_1.fastq diff -r 000000000000 -r 3f13e9565679 test-data/illumina_reads_2.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illumina_reads_2.fastq Mon Jan 24 00:00:38 2022 +0000 @@ -0,0 +1,1 @@ +wget https://usegalaxy.org.au/datasets/abab1e6005ad1907/display?to_ext=fastqsanger -O illumina_reads_2.fastq diff -r 000000000000 -r 3f13e9565679 test-data/nanopore_reads.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/nanopore_reads.fastq Mon Jan 24 00:00:38 2022 +0000 @@ -0,0 +1,1 @@ +wget https://usegalaxy.org.au/datasets/0af5c63726eb74ba/display?to_ext=fastq -O nanopore_reads.fastq diff -r 000000000000 -r 3f13e9565679 test-data/reference_genome.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reference_genome.fasta Mon Jan 24 00:00:38 2022 +0000 @@ -0,0 +1,1 @@ +wget https://usegalaxy.org.au/datasets/a902098a85e91ce7/display?to_ext=fasta -O reference_genome.fasta