comparison masurca.xml @ 2:1808eaa9d699 draft

Uploaded
author dnbenso
date Tue, 25 Jan 2022 03:45:18 +0000
parents 3f13e9565679
children 784cb0a6cfdb
comparison
equal deleted inserted replaced
1:03edd7b30f66 2:1808eaa9d699
39 sed -i 's|GALAXY_SLOTS|'\${GALAXY_SLOTS:-8}'|' config.txt && 39 sed -i 's|GALAXY_SLOTS|'\${GALAXY_SLOTS:-8}'|' config.txt &&
40 sed -i 's|MEAN|$mean|' config.txt && 40 sed -i 's|MEAN|$mean|' config.txt &&
41 sed -i 's|STDDEV|$stddev|' config.txt && 41 sed -i 's|STDDEV|$stddev|' config.txt &&
42 sed -i 's|JELLYFISHSIZE|$jfsize|' config.txt && 42 sed -i 's|JELLYFISHSIZE|$jfsize|' config.txt &&
43 sed -i 's|USE_LINKING_MATES = 0|USE_LINKING_MATES = $lnkmts|' config.txt && 43 sed -i 's|USE_LINKING_MATES = 0|USE_LINKING_MATES = $lnkmts|' config.txt &&
44 sed -i 's|MEGA_READS_ONE_PASS=0|MEGA_READS_ONE_PASS=$mega_one_pass|' config.txt &&
44 sed -i 's|FLYE_ASSEMBLY=0|FLYE_ASSEMBLY=$flye|' config.txt && 45 sed -i 's|FLYE_ASSEMBLY=0|FLYE_ASSEMBLY=$flye|' config.txt &&
45 masurca config.txt && 46 masurca config.txt &&
46 bash assemble.sh 47 bash assemble.sh
47 ]]></command> 48 ]]></command>
48 <inputs> 49 <inputs>
98 <when value="Yes"> 99 <when value="Yes">
99 <param type="data" name="ref" format="fasta,fasta.gz" label="Reference" /> 100 <param type="data" name="ref" format="fasta,fasta.gz" label="Reference" />
100 </when> 101 </when>
101 </conditional> 102 </conditional>
102 <param type="integer" name="jfsize" value="20000000" label="Jellyfish hash size" help="Set this to about 10x the genome size" /> 103 <param type="integer" name="jfsize" value="20000000" label="Jellyfish hash size" help="Set this to about 10x the genome size" />
104 <param type="boolean" name="mega_one_pass" truevalue="1" falsevalue="0" label="MEGA_READS_ONE_PASS"
105 help="set to 0 (default) to do two passes of mega-reads for slower, but higher quality assembly, otherwise set to 1" />
103 <param type="boolean" name="flye" truevalue="1" falsevalue="0" label="Set this to use Flye assembler for final assembly of corrected mega-reads" 106 <param type="boolean" name="flye" truevalue="1" falsevalue="0" label="Set this to use Flye assembler for final assembly of corrected mega-reads"
104 help="If you are doing Hybrid Illumina paired end + Nanopore/PacBio assembly ONLY (no Illumina mate pairs or OTHER frg files). DO NOT use if you have less than 15x coverage by long read" /> 107 help="If you are doing Hybrid Illumina paired end + Nanopore/PacBio assembly ONLY (no Illumina mate pairs or OTHER frg files). DO NOT use if you have less than 15x coverage by long read" />
105 <param type="boolean" name="lnkmts" truevalue="1" falsevalue="0" label="Include Linking Mates" 108 <param type="boolean" name="lnkmts" truevalue="1" falsevalue="0" label="Include Linking Mates"
106 help="Most of the paired end reads end up in the same super read and thus are not passed to the assembler. Those that do not end up in the same super read are called ”linking mates” . The best assembly results are achieved by setting this parameter to 1 for Illumina-only assemblies. If you have more than 2x coverage by long reads, set this to 0." /> 109 help="Most of the paired end reads end up in the same super read and thus are not passed to the assembler. Those that do not end up in the same super read are called ”linking mates” . The best assembly results are achieved by setting this parameter to 1 for Illumina-only assemblies. If you have more than 2x coverage by long reads, set this to 0." />
107 </inputs> 110 </inputs>
117 <filter>flye == True</filter> 120 <filter>flye == True</filter>
118 </data> 121 </data>
119 </outputs> 122 </outputs>
120 <tests> 123 <tests>
121 <test> 124 <test>
125 <!--
126 The test files are too large for upload to a toolshed or git repo.
127 For convenience I've include download instructions in each file so
128 that if you are testing with planemo you can at least download the
129 data and perform a basic test. If anyone has any datasets less than
130 1Mb that can be used to complete an assembly please let me know.
131 -->
122 <conditional name="illumina_input"> 132 <conditional name="illumina_input">
123 <param name="input_type" value="paired" /> 133 <param name="input_type" value="paired" />
124 <param name="fastq_input1" value="illumina_reads_1.fastq"/> 134 <param name="fastq_input1" value="illumina_reads_1.fastq"/>
125 <param name="fastq_input2" value="illumina_reads_2.fastq"/> 135 <param name="fastq_input2" value="illumina_reads_2.fastq"/>
126 </conditional> 136 </conditional>
136 <param name="ref" value="reference_genome.fasta" /> 146 <param name="ref" value="reference_genome.fasta" />
137 </conditional> 147 </conditional>
138 <param name="mean" value="500" /> 148 <param name="mean" value="500" />
139 <param name="stddev" value="50" /> 149 <param name="stddev" value="50" />
140 <param name="jfsize" value="80349460" /> 150 <param name="jfsize" value="80349460" />
151 <param name="mega_one_pass" value="0" />
141 <param name="flye" value="1" /> 152 <param name="flye" value="1" />
142 <param name="lnkmts" value="0" /> 153 <param name="lnkmts" value="0" />
143 <output name="superReads" ftype="fasta"> 154 <output name="superReads" ftype="fasta">
144 <assert_contents> 155 <assert_contents>
145 <has_line_matching expression="^GAAAGCCGTGGCTTGGAACGGTGCTGATTGATCCGGC.*"/> 156 <has_line_matching expression="^GAAAGCCGTGGCTTGGAACGGTGCTGATTGATCCGGC.*"/>
147 </output> 158 </output>
148 </test> 159 </test>
149 </tests> 160 </tests>
150 <help><![CDATA[ 161 <help><![CDATA[
151 162
152 This implementation of MaSuRCA uses a config file for more complicated 163 **MaSuRCA**
153 assemblies and to change default settings. Illumina reads (mandatory)
154 and long reads from PACBIO or Oxford Nanopore or both can be included.
155 164
165 This implementation of MaSuRCA uses a config file for more complicated assemblies and to change default settings. Illumina reads (mandatory) and long reads from PACBIO or Oxford Nanopore or both can be included. It is written by `Aleksey Zimin`_ at Johns Hopkins University. Included below is relevant notes from MaSuRCA's `github page`_.
166
167 .. _`Aleksey Zimin`: https://github.com/alekseyzimin
168 .. _`github page`: https://github.com/alekseyzimin/masurca
169
170 -----
171
172 **Input data**
173
174 The following types of data are supported::
175
176 * Illumina paired end (or single end) reads -- MANDATORY. The mean and stdev parameters are the library insert average length and standard deviation. If the standard deviation is not known, set it to approximately 15% of the mean.If the second (reverse) read set is not available, do not specify it and just specify the forward reads. Files must be in fastq format and can be gzipped.
177 * PacBio/MinION data are supported. Note that you have to have 50x + coverage in Illumina Paired End reads to use PacBio of Oxford Nanopore MinION data. Supply PacBio or MinION reads in a single fasta or fastq file (can be gzipped).
178
179 **Parameters**
180
181 The following parameter is mandatory::
182
183 * jellyfish hash size, set this to about 10x the genome size.
184
185 Optional parameters:
186
187 * linking mates: Most of the paired end reads end up in the same super read and thus are not passed to the assembler. Those that do not end up in the same super read are called ”linking mates” . The best assembly results are achieved by setting this parameter to 1 for Illumina-only assemblies. If you have more than 2x coverage by long (454, Sanger, etc) reads, set this to 0.
188
156 ]]></help> 189 ]]></help>
157 <citations> 190 <citations>
158 <citation type="doi">10.1093/bioinformatics/btt476</citation> 191 <citation type="doi">10.1093/bioinformatics/btt476</citation>
159 </citations> 192 </citations>
160 </tool> 193 </tool>