comparison masurca.xml @ 0:3f13e9565679 draft

Uploaded
author dnbenso
date Mon, 24 Jan 2022 00:00:38 +0000
parents
children 1808eaa9d699
comparison
equal deleted inserted replaced
-1:000000000000 0:3f13e9565679
1 <tool id="masurca" name="MaSuRCA" version="@TOOL_VERSION@+galaxy0">
2 <description>The MaSuRCA (Maryland Super Read Cabog Assembler) genome assembly and analysis toolkit with config</description>
3 <macros>
4 <token name="@TOOL_VERSION@">4.0.6</token>
5 </macros>
6 <requirements>
7 <requirement type="package" version="@TOOL_VERSION@">masurca</requirement>
8 </requirements>
9 <command detect_errors="exit_code"><![CDATA[
10 cp $__tool_directory__/default-masurca-config config.txt &&
11 #if $nanopore_input.np_input == "Yes":
12 #if $pacbio_input.pb_input == "Yes":
13 cat '$nanopore_input.nano' '$pacbio_input.pacbio' > long.fastq.gz &&
14 #else:
15 ln -s '$nanopore_input.nano' long.fastq.gz &&
16 #end if
17 sed -i 's|#NANOPORE=INPUTREADLONG|NANOPORE=long.fastq.gz|' config.txt &&
18 #elif $pacbio_input.pb_input == "Yes":
19 ln -s '$pacbio_input.pacbio' long.fastq.gz &&
20 sed -i 's|#PACBIO=INPUTREADLONG|PACBIO=long.fastq.gz|' config.txt &&
21 #end if
22 #if str( $illumina_input.input_type ) == "single"
23 ln -s '$illumina_input.fastq_input1' ill_1.fastq.gz &&
24 sed -i 's|INPUTREAD1|ill_1.fastq.gz|' config.txt &&
25 #elif str( $illumina_input.input_type ) == "paired"
26 ln -s '$illumina_input.fastq_input1' ill_1.fastq.gz &&
27 sed -i 's|INPUTREAD1|ill_1.fastq.gz|' config.txt &&
28 ln -s '$illumina_input.fastq_input2' ill_2.fastq.gz &&
29 sed -i 's|INPUTREAD2|ill_2.fastq.gz|' config.txt &&
30 #elif str( $illumina_input.input_type ) == "paired_collection"
31 ln -s '$illumina_input.fastq_input1' ill_1.fastq.gz &&
32 sed -i 's|INPUTREAD1|ill_1.fastq.gz|' config.txt &&
33 ln -s '$illumina_input.fastq_input2' ill_2.fastq.gz &&
34 sed -i 's|INPUTREAD2|ill_2.fastq.gz|' config.txt &&
35 #end if
36 #if $reference_input.ref_input == "Yes":
37 sed -i 's|#REFERENCE=REF|REFERENCE=$ref|' config.txt &&
38 #end if
39 sed -i 's|GALAXY_SLOTS|'\${GALAXY_SLOTS:-8}'|' config.txt &&
40 sed -i 's|MEAN|$mean|' config.txt &&
41 sed -i 's|STDDEV|$stddev|' config.txt &&
42 sed -i 's|JELLYFISHSIZE|$jfsize|' config.txt &&
43 sed -i 's|USE_LINKING_MATES = 0|USE_LINKING_MATES = $lnkmts|' config.txt &&
44 sed -i 's|FLYE_ASSEMBLY=0|FLYE_ASSEMBLY=$flye|' config.txt &&
45 masurca config.txt &&
46 bash assemble.sh
47 ]]></command>
48 <inputs>
49 <conditional name="illumina_input">
50 <param name="input_type" type="select" label="Paired-end reads" help="Select between paired and paired collection">
51 <option value="single">Single</option>
52 <option value="paired">Paired</option>
53 <option value="paired_collection">Paired Collection</option>
54 </param>
55 <when value="single">
56 <param type="data" name="fastq_input1" format="fastqsanger,fastqsanger.gz"
57 label="Select unpaired reads" help="Specify dataset with unpaired reads"/>
58 </when>
59 <when value="paired">
60 <param type="data" name="fastq_input1" format="fastqsanger,fastqsanger.gz"
61 label="Select first set of reads" help="Specify dataset with forward reads"/>
62 <param type="data" name="fastq_input2" format="fastqsanger,fastqsanger.gz"
63 label="Select second set of reads" help="Specify dataset with reverse reads"/>
64 </when>
65 <when value="paired_collection">
66 <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" />
67 </when>
68 </conditional>
69 <param type="integer" name="mean" value="500" label="Mean size" help="Libarary insert average length" />
70 <param type="integer" name="stddev" value="50" label="Standard deviation"
71 help="Library insert standard deviation - if not known, set it to approximately 15% of the mean" />
72 <conditional name="nanopore_input">
73 <param name="np_input" type="select" label="Use Nanopore long reads" help="Optional Nanopore reads must be in a single fasta or fastq file">
74 <option value="No" selected="true">No</option>
75 <option value="Yes">Yes</option>
76 </param>
77 <when value="No"/>
78 <when value="Yes">
79 <param type="data" name="nano" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" label="nanopore reads" />
80 </when>
81 </conditional>
82 <conditional name="pacbio_input">
83 <param name="pb_input" type="select" label="Use Pacbio long reads" help="Optional Pacbio reads must be in a single fasta or fastq file">
84 <option value="No" selected="true">No</option>
85 <option value="Yes">Yes</option>
86 </param>
87 <when value="No"/>
88 <when value="Yes">
89 <param type="data" name="pacbio" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" label="pacbio reads" />
90 </when>
91 </conditional>
92 <conditional name="reference_input">
93 <param name="ref_input" type="select" label="Synteny-assisted assembly" help="Concatenate all reference genomes into one reference.fa; works for Illumina-only data">
94 <option value="No" selected="true">No</option>
95 <option value="Yes">Yes</option>
96 </param>
97 <when value="No"/>
98 <when value="Yes">
99 <param type="data" name="ref" format="fasta,fasta.gz" label="Reference" />
100 </when>
101 </conditional>
102 <param type="integer" name="jfsize" value="20000000" label="Jellyfish hash size" help="Set this to about 10x the genome size" />
103 <param type="boolean" name="flye" truevalue="1" falsevalue="0" label="Set this to use Flye assembler for final assembly of corrected mega-reads"
104 help="If you are doing Hybrid Illumina paired end + Nanopore/PacBio assembly ONLY (no Illumina mate pairs or OTHER frg files). DO NOT use if you have less than 15x coverage by long read" />
105 <param type="boolean" name="lnkmts" truevalue="1" falsevalue="0" label="Include Linking Mates"
106 help="Most of the paired end reads end up in the same super read and thus are not passed to the assembler. Those that do not end up in the same super read are called ”linking mates” . The best assembly results are achieved by setting this parameter to 1 for Illumina-only assemblies. If you have more than 2x coverage by long reads, set this to 0." />
107 </inputs>
108 <outputs>
109 <data name="superReads" format="fasta" from_work_dir="superReadSequences.named.fasta" label="${tool.name} on ${on_string}: named_superReads" />
110 <data name="scaffold_prm" format="fasta" from_work_dir="CA.mr.*/primary.genome.scf.fasta" label="${tool.name} on ${on_string}: primary_genome">
111 <filter>flye == False</filter>
112 </data>
113 <data name="scaffold_alt" format="fasta" from_work_dir="CA.mr.*/alternative.genome.scf.fasta" label="${tool.name} on ${on_string}: alternative_genome">
114 <filter>flye == False</filter>
115 </data>
116 <data name="flye_assembly" format="fasta" from_work_dir="flye.mr.*/assembly.fasta" label="${tool.name} on ${on_string}: flye_assembly">
117 <filter>flye == True</filter>
118 </data>
119 </outputs>
120 <tests>
121 <test>
122 <conditional name="illumina_input">
123 <param name="input_type" value="paired" />
124 <param name="fastq_input1" value="illumina_reads_1.fastq"/>
125 <param name="fastq_input2" value="illumina_reads_2.fastq"/>
126 </conditional>
127 <conditional name="nanopore_input">
128 <param name="np_input" value="Yes" />
129 <param name="nano" value="nanopore_reads.fastq" />
130 </conditional>
131 <conditional name="pacbio_input">
132 <param name="pb_input" value="No" />
133 </conditional>
134 <conditional name="reference_input">
135 <param name="ref_input" value="Yes" />
136 <param name="ref" value="reference_genome.fasta" />
137 </conditional>
138 <param name="mean" value="500" />
139 <param name="stddev" value="50" />
140 <param name="jfsize" value="80349460" />
141 <param name="flye" value="1" />
142 <param name="lnkmts" value="0" />
143 <output name="superReads" ftype="fasta">
144 <assert_contents>
145 <has_line_matching expression="^GAAAGCCGTGGCTTGGAACGGTGCTGATTGATCCGGC.*"/>
146 </assert_contents>
147 </output>
148 </test>
149 </tests>
150 <help><![CDATA[
151
152 This implementation of MaSuRCA uses a config file for more complicated
153 assemblies and to change default settings. Illumina reads (mandatory)
154 and long reads from PACBIO or Oxford Nanopore or both can be included.
155
156 ]]></help>
157 <citations>
158 <citation type="doi">10.1093/bioinformatics/btt476</citation>
159 </citations>
160 </tool>