9
|
1 <tool id="bsmap" name="BSMAP Mapper">
|
|
2 <requirements>
|
|
3 <requirement type='package'>
|
|
4 bsmap
|
|
5 </requirement>
|
|
6 </requirements>
|
|
7 <command interpreter="bash">
|
|
8 bsmap_wrapper.sh
|
|
9 ##Reference genome
|
|
10 ##ref="${reference.fields.path}"
|
|
11 #if $refGenomeSource.genomeSource == "history":
|
|
12 ref="${refGenomeSource.myFile.extra_files_path}/${refGenomeSource.myFile.metadata.base_name}"
|
|
13 #else
|
|
14 ref="${refGenomeSource.builtin.fields.path}"
|
|
15 #end if
|
|
16 ##Output files (SAM output, BSMAP summary)
|
|
17 mapped=$mapped
|
|
18 ##Temp directory
|
|
19 tempdir=$mapped.files_path
|
|
20 summary=$summary
|
|
21 #if str($singlePaired.sPaired) == "single":
|
|
22 library="single"
|
|
23 mate1=$singlePaired.sInput1
|
|
24 #if str($singlePaired.sParams.sSettingsType) == "full":
|
|
25 fullparam=true
|
|
26 qual=$singlePaired.sParams.qual
|
|
27 threshold=$singlePaired.sParams.threshold
|
|
28 lowqual=$singlePaired.sParams.lowqual
|
|
29 adapter=$singlePaired.sParams.adapter
|
|
30 firstn=$singlePaired.sParams.firstn
|
|
31 repeat_reads=$singlePaired.sParams.repeat_reads
|
|
32 seed_size=$singlePaired.sParams.seed_size
|
|
33 mismatch=$singlePaired.sParams.mismatch
|
|
34 equal_best=$singlePaired.sParams.equal_best
|
|
35 start=$singlePaired.sParams.start
|
|
36 end=$singlePaired.sParams.end
|
|
37 index_interval=$singlePaired.sParams.index_interval
|
|
38 seed_random=$singlePaired.sParams.seed_random
|
|
39 rrbs=$singlePaired.sParams.rrbs
|
|
40 mode=$singlePaired.sParams.mode
|
|
41 align_info=$singlePaired.sParams.align_info
|
|
42 #end if
|
|
43 #else:
|
|
44 library="paired"
|
|
45 mate1=$singlePaired.pInput1
|
|
46 mate2=$singlePaired.pInput2
|
|
47 unpaired=$unpaired
|
|
48 #if str($singlePaired.pParams.pSettingsType) == "full":
|
|
49 fullparam=true
|
|
50 qual=$singlePaired.pParams.qual
|
|
51 threshold=$singlePaired.pParams.threshold
|
|
52 lowqual=$singlePaired.pParams.lowqual
|
|
53 adapter=$singlePaired.pParams.adapter
|
|
54 firstn=$singlePaired.pParams.firstn
|
|
55 repeat_reads=$singlePaired.pParams.repeat_reads
|
|
56 seed_size=$singlePaired.pParams.seed_size
|
|
57 mismatch=$singlePaired.pParams.mismatch
|
|
58 equal_best=$singlePaired.pParams.equal_best
|
|
59 start=$singlePaired.pParams.start
|
|
60 end=$singlePaired.pParams.end
|
|
61 index_interval=$singlePaired.pParams.index_interval
|
|
62 seed_random=$singlePaired.pParams.seed_random
|
|
63 rrbs=$singlePaired.pParams.rrbs
|
|
64 mode=$singlePaired.pParams.mode
|
|
65 align_info=$singlePaired.pParams.align_info
|
|
66 maxinsert=$singlePaired.pParams.maxinsert
|
|
67 mininsert=$singlePaired.pParams.mininsert
|
|
68 #end if
|
|
69 #end if
|
|
70 </command>
|
|
71 <inputs>
|
|
72
|
|
73 <conditional name="refGenomeSource">
|
|
74 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in reference?">
|
|
75 <option value="builtin">Use a built-in index</option>
|
|
76 <option value="history">Use one from the history</option>
|
|
77 </param>
|
|
78 <when value="builtin">
|
|
79 <param name="index" type="select" label="Select a reference genome">
|
|
80 <options from_data_table="bsmap_fasta">
|
|
81 <filter type="sort_by" column="2" />
|
|
82 <validator type="no_options" message="No reference genomes are available" />
|
|
83 </options>
|
|
84 </param>
|
|
85 </when>
|
|
86 <when value="history">
|
|
87 <param name="myFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
|
|
88 </when>
|
|
89 </conditional>
|
|
90
|
|
91 <conditional name="singlePaired">
|
|
92 <param name="sPaired" type="select" label="Is this library mate-paired?">
|
|
93 <option value="single">Single-end</option>
|
|
94 <option value="paired">Paired-end</option>
|
|
95 </param>
|
|
96 <when value="single">
|
|
97 <param name="sInput1" type="data" format="fastq,fasta" label="FASTQ file" help="Must have ASCII encoded quality scores"/>
|
|
98 <conditional name="sParams">
|
|
99 <param name="sSettingsType" type="select" label="BSMAP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
|
|
100 <option value="preSet">Commonly used</option>
|
|
101 <option value="full">Full parameter list</option>
|
|
102 </param>
|
|
103 <when value="preSet" />
|
|
104 <when value="full">
|
|
105 <param name="qual" type="select" label="Select the type of FastQ qualities">
|
|
106 <option value="33">phred33-quals</option>
|
|
107 <option value="64">phred64-quals</option>
|
|
108 </param>
|
|
109 <param name="threshold" type="integer" value="0" label="Quality threshold in trimming" help="0-40, default=0 (no trim)" min="0" max="40" />
|
|
110 <param name="lowqual" type="integer" value="5" label="Filter low-quality reads containing >n Ns" help="default=5" />
|
|
111 <param name="adapter" type="text" value="none" label="3-end adapter sequence" help="default: none (no trim)" />
|
|
112 <param name="firstn" type="integer" value="144" label="Map the first N nucleotides of the read" help="default:144 (map the whole read)" />
|
|
113
|
|
114 <param name="repeat_reads" type="select" label="How to report repeat hits">
|
|
115 <option value="0">none(unique hit only)</option>
|
|
116 <option value="1">random one</option>
|
|
117 </param>
|
|
118
|
|
119 <param name="seed_size" type="integer" value="16" label="Seed size" min="8" max="16" help="default=16(WGBS mode), 12(RRBS mode)" />
|
|
120 <param name="mismatch" type="integer" value="2" label="Maximum number of mismatches allowed on a read" max="15" />
|
|
121 <param name="equal_best" type="integer" value="20" label="Maximum number of equal best hits to count" max="1000" />
|
|
122 <param name="start" type="integer" value="1" label="Start from the Nth read or read pair" />
|
|
123 <param name="end" type="integer" value="4294967295" label="End at the Nth read or read pair" />
|
|
124 <param name="index_interval" type="integer" value="4" label="Index interval" />
|
|
125 <param name="seed_random" type="integer" value="-1" label="Seed for random number generation used in selecting multiple hits" help="other seed values generate pseudo random number based on read index number, to allow reproducible mapping results" />
|
|
126 <param name="rrbs" type="text" value="none" label="Activating RRBS mapping mode and set restriction enzyme digestion sites" help="digestion position marked by '-', example: -D C-CGG for MspI digestion. default: none (whole genome shotgun bisulfite mapping mode)" />
|
|
127 <param name="mode" type="select" label="Set mapping strand information">
|
|
128 <option value="0">only map to 2 forward strands</option>
|
|
129 <option value="1">map SE or PE reads to all 4 strands</option>
|
|
130 </param>
|
|
131 <param name="align_info" type="text" value="none" label="Set alignment information for the additional nucleotide transition" help="is in the form of two different nucleotides N1N2,indicating N1 in the reads could be mapped to N2 in the reference sequences. default: -M TC, corresponds to C=>U(T) transition in bisulfite conversion. example: -M GA could be used to detect A=>I(G) transition in RNA editing." />
|
|
132 </when> <!-- full -->
|
|
133 </conditional> <!-- sParams -->
|
|
134 </when> <!-- single -->
|
|
135
|
|
136 <when value="paired">
|
|
137 <param name="pInput1" type="data" format="fastq,fasta" label="Forward FASTQ file" />
|
|
138 <param name="pInput2" type="data" format="fastq,fasta" label="Reverse FASTQ file" />
|
|
139
|
|
140 <conditional name="pParams">
|
|
141 <param name="pSettingsType" type="select" label="BSMAP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
|
|
142 <option value="preSet">Commonly used</option>
|
|
143 <option value="full">Full parameter list</option>
|
|
144 </param>
|
|
145 <when value="preSet" />
|
|
146 <when value="full">
|
|
147 <param name="qual" type="select" label="Select the type of FastQ qualities">
|
|
148 <option value="33">phred33-quals</option>
|
|
149 <option value="64">phred64-quals</option>
|
|
150 </param>
|
|
151
|
|
152 <param name="mininsert" type="integer" value="28" label="Minimal insert size allowed" />
|
|
153 <param name="maxinsert" type="integer" value="500" label="Maximal insert size allowed" />
|
|
154
|
|
155 <param name="threshold" type="integer" value="0" label="Quality threshold in trimming" help="0-40, default=0 (no trim)" min="0" max="40" />
|
|
156 <param name="lowqual" type="integer" value="5" label="Filter low-quality reads containing >n Ns" help="default=5" />
|
|
157 <param name="adapter" type="text" value="none" label="3-end adapter sequence" help="default: none (no trim)" />
|
|
158 <param name="firstn" type="integer" value="144" label="Map the first N nucleotides of the read" help="default:144 (map the whole read)" />
|
|
159
|
|
160 <param name="repeat_reads" type="select" label="How to report repeat hits">
|
|
161 <option value="0">none(unique hit only)</option>
|
|
162 <option value="1">random one</option>
|
|
163 </param>
|
|
164
|
|
165 <param name="seed_size" type="integer" value="16" label="Seed size" min="8" max="16" help="default=16(WGBS mode), 12(RRBS mode)" />
|
|
166 <param name="mismatch" type="integer" value="2" label="Maximum number of mismatches allowed on a read" max="15" />
|
|
167 <param name="equal_best" type="integer" value="20" label="Maximum number of equal best hits to count" max="1000" />
|
|
168 <param name="start" type="integer" value="1" label="Start from the Nth read or read pair" />
|
|
169 <param name="end" type="integer" value="4294967295" label="End at the Nth read or read pair" />
|
|
170 <param name="index_interval" type="integer" value="4" label="Index interval" />
|
|
171 <param name="seed_random" type="integer" value="-1" label="Seed for random number generation used in selecting multiple hits" help="other seed values generate pseudo random number based on read index number, to allow reproducible mapping results" />
|
|
172 <param name="rrbs" type="text" value="none" label="Activating RRBS mapping mode and set restriction enzyme digestion sites" help="digestion position marked by '-', example: -D C-CGG for MspI digestion. default: none (whole genome shotgun bisulfite mapping mode)" />
|
|
173 <param name="mode" type="select" label="Set mapping strand information">
|
|
174 <option value="0">only map to 2 forward strands</option>
|
|
175 <option value="1">map SE or PE reads to all 4 strands</option>
|
|
176 </param>
|
|
177 <param name="align_info" type="text" value="none" label="Set alignment information for the additional nucleotide transition" help="is in the form of two different nucleotides N1N2,indicating N1 in the reads could be mapped to N2 in the reference sequences. default: -M TC, corresponds to C=>U(T) transition in bisulfite conversion. example: -M GA could be used to detect A=>I(G) transition in RNA editing." />
|
|
178
|
|
179
|
|
180 </when> <!-- full -->
|
|
181 </conditional> <!-- pParams -->
|
|
182 </when> <!-- paired -->
|
|
183 </conditional> <!-- singlePaired -->
|
|
184
|
|
185
|
|
186 </inputs>
|
|
187 <outputs>
|
|
188 <data name="mapped" format="sam" label="BSMAP Mapped Reads">
|
|
189 <actions>
|
|
190 <action type="metadata" name="dbkey">
|
|
191 <option type="from_data_table" name="bsmap_fasta" column="1" offset="0">
|
|
192 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
|
|
193 <filter type="param_value" ref="reference" column="0"/>
|
|
194 </option>
|
|
195 </action>
|
|
196 </actions>
|
|
197 </data>
|
|
198 <data name="summary" format="txt" label="BSMAP Mapping Summary" />
|
|
199 <data name="unpaired" format ="sam" label="BSMAP Unpaired Hits">
|
|
200 <filter>(singlePaired['sPaired'] == 'paired')</filter>
|
|
201 </data>
|
|
202
|
|
203 </outputs>
|
|
204 <help>
|
|
205 **What it does**
|
|
206
|
|
207 BSMAP is a short reads mapping software for bisulfite sequencing reads. It has the following features:
|
|
208
|
|
209 - read length up to 144 nt, allow up to 15 mismatches, gap size up to 3 bp.
|
|
210
|
|
211 - support single end and pair end mapping. support multi-thread mapping.
|
|
212
|
|
213 - support both "Lister protocol" (sequence 2 forward strands only) and "Cokus protocol" (sequence all 4 bisulfite converted strands)
|
|
214
|
|
215 - reads are directly mapped to original reference genome sequence, no need to preprocess the reads and reference genome to convert C to T.
|
|
216
|
|
217 - support both whole genome bisulfite sequencing (WGBS) mode and reduced representation bisulfite sequencing (RRBS) mode, allow changing the digestion site information to support different digestion enzymes for RRBS.
|
|
218
|
|
219 - allow trimming adapter sequences and low quality nucleotides from the 3'end of reads
|
|
220
|
|
221 - allow trade off between speed/memory usage/mapping sensitivity. For human genome, the RRBS mode uses ~3GB. In WGBS mode, the typical memory usage is ~9GB, but can be as low as 5GB.
|
|
222
|
|
223 - allow alignment for other nucleotide transitions, for example, can be set to detect the A=>I(G) transition in RNA editing.
|
|
224
|
|
225 .. _BSMAP: http://code.google.com/p/bsmap/
|
|
226
|
|
227 **Input formats**
|
|
228
|
|
229 BSMAP accepts files in FASTA/FASTQ format.
|
|
230
|
|
231 **Outputs**
|
|
232
|
|
233 The output contains the following files:
|
|
234
|
|
235 - mapped reads in SAM format
|
|
236
|
|
237 - mapping summary
|
|
238
|
|
239 - unpaired hits (only for paired-end mapping)
|
|
240
|
|
241 </help>
|
|
242
|
|
243 <tests>
|
|
244 </tests>
|
|
245 </tool>
|
|
246
|