comparison methylation_analysis_bismark/methylation_analysis/bismark.xml @ 9:5b208d4d89e5 draft

Uploaded
author fcaramia
date Tue, 04 Dec 2012 20:15:26 -0500
parents d15b4a2e3bdc
children
comparison
equal deleted inserted replaced
8:4c5c3994bfcb 9:5b208d4d89e5
1 <tool id="bismark_tool" name="Bismark" version="0.7.6">
2 <description>: A bisulfite read mapper and methylation caller</description>
3 <requirements>
4 <requirement type="package" version="0.1.16">samtools</requirement>
5 <requirement type="package" version="0.12.7">bowtie2</requirement>
6 <requirement type="package" version="0.7.6">bismark</requirement>
7 </requirements>
8 <command interpreter="perl">
9
10 bismark_wrapper.pl
11
12
13 "GENOME::${genome.fields.path}"
14
15
16
17 #if str($option_input.input_option) == "mates":
18 "MATES::$option_input.file_mate1::$option_input.file_mate2"
19 #else
20 "SINGLES::$option_input.file_single"
21 #end if
22
23 #if str($format_option) == "fasta":
24 "FORMAT::--fasta"
25 #else
26 "FORMAT::--fastq"
27 #end if
28
29 #if str($non_directional) == "ON":
30 "DIRECTIONAL::--non_directional"
31 #end if
32
33 "OUTPUT::$output"
34 "SUMMARY::$summary"
35
36 </command>
37 <inputs>
38
39 <param name="genome" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
40 <options from_data_table="bismark_indexes">
41 <filter type="sort_by" column="2"/>
42 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
43 </options>
44 </param>
45
46 <param name="format_option" type="select" label="sample format">
47 <option value="fastq" selected="true">fastq</option>
48 <option value="fasta">fasta</option>
49 </param>
50
51
52 <conditional name="option_input">
53 <param name="input_option" type="select" label="Input files">
54 <option value="mates" selected="true">mates</option>
55 <option value="singles">singles</option>
56 </param>
57 <when value="mates">
58 <param format="fasta, fastq" name="file_mate1" type="data" label="Mate 1" help=""/>
59 <param format="fasta, fastq" name="file_mate2" type="data" label="Mate 2" help=""/>
60 </when>
61 <when value="singles">
62 <param format="fasta, fastq" name="file_single" type="data" label="Single" help=""/>
63 </when>
64 </conditional>
65
66 <param name="non_directional" type="select" label="non-directional" help="" optional="true">
67 <option value="ON" selected="true">ON</option>
68 <option value="OFF">OFF</option>
69 </param>
70
71 </inputs>
72 <outputs>
73 <data name="summary" format="txt" label="Bismark Sumary" />
74 <data format="bam" name="output" label="${tool.name} on ${on_string}">
75 <actions>
76 <action type="metadata" name="dbkey">
77 <option type="from_data_table" name="bismark_indexes" column="1" offset="0">
78 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
79 <filter type="param_value" ref="genome" column="0"/>
80 </option>
81 </action>
82 </actions>
83 </data>
84 </outputs>
85 <help>
86 |
87
88
89 **Reference**
90
91 http://www.bioinformatics.babraham.ac.uk/projects/bismark/
92
93 -----
94
95 **What it does**
96
97 Bismark takes in FastA or FastQ files and aligns the reads to a specified bisulfite genome.
98 Sequence reads are transformed into a bisulfite converted forward strand version (C->T conversion)
99 or into a bisulfite treated reverse strand (G->A conversion of the forward strand).
100 Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome
101 (C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the
102 forward strand, by doing this alignments will produce the same positions). These 4 instances of
103 Bowtie (1 or 2) are run in parallel. The sequence file(s) are then read in again sequence by sequence
104 to pull out the original sequence from the genome and determine if there were any protected C's present or not.
105
106 As of version 0.7.0 Bismark will only run 2 alignment threads for OT and OB in parallel, the 4 strand mode can be
107 re-enabled by using --non_directional.
108
109 The final output of Bismark is in SAM format by default. But for storage restrictions the output is compressed (BAM).
110
111
112 -----
113
114 **Required Parameters**
115
116 ::
117
118 -q/--fastq The query input files (specified as mate1,mate2 or singles are FASTQ
119 files (usually having extension .fg or .fastq). This is the default. See also
120 --solexa-quals.
121
122 -f/--fasta The query input files (specified as mate1,mate2 or singles are FASTA
123 files (usually havin extension .fa, .mfa, .fna or similar). All quality values
124 are assumed to be 40 on the Phred scale.
125
126 -1 mates1 List of files containing the #1 mates (filename usually includes
127 "_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
128 correspond file-for-file and read-for-read with those specified in mates2.
129 Reads may be a mix of different lengths. Bismark will produce one mapping result
130 and one report file per paired-end input file pair.
131
132 -2 mates2 List of files containing the #2 mates (filename usually includes
133 "_2"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
134 correspond file-for-file and read-for-read with those specified in mates1.
135 Reads may be a mix of different lengths.
136
137 singles List of files containing the reads to be aligned (e.g.
138 lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will
139 produce one mapping result and one report file per input file.
140
141 --non_directional The sequencing library was constructed in a non strand-specific manner, alignments to all four
142 bisulfite strands will be reported. Default: ON.
143
144 (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary
145 to the original strands are merely theoretical and should not exist in reality. Specifying directional
146 alignments (which is the default) will only run 2 alignment threads to the original top (OT)
147 or bottom (OB) strands in parallel and report these alignments. This is the recommended option
148 for sprand-specific libraries).
149
150 -----
151
152 **Default Parameters**
153
154 ::
155
156 --bowtie2 Uses Bowtie 2 instead of Bowtie 1. Bismark limits Bowtie 2 to only perform end-to-end
157 alignments, i.e. searches for alignments involving all read characters (also called
158 untrimmed or unclipped alignments). Bismark assumes that raw sequence data is adapter
159 and/or quality trimmed where appropriate. Default: on.
160
161
162
163
164 -p NTHREADS Launch NTHREADS parallel search threads (default: 4). Threads will run on separate processors/cores
165 and synchronize when parsing reads and outputting alignments. Searching for alignments is highly
166 parallel, and speedup is close to linear. Increasing -p increases Bowtie 2's memory footprint.
167 E.g. when aligning to a human genome index, increasing -p from 1 to 8 increases the memory footprint
168 by a few hundred megabytes. This option is only available if bowtie is linked with the pthreads
169 library (i.e. if BOWTIE_PTHREADS=0 is not specified at build time). In addition, this option will
170 automatically use the option '--reorder', which guarantees that output SAM records are printed in
171 an order corresponding to the order of the reads in the original input file, even when -p is set
172 greater than 1 (Bismark requires the Bowtie 2 output to be this way). Specifying --reorder and
173 setting -p greater than 1 causes Bowtie 2 to run somewhat slower and use somewhat more memory then
174 if --reorder were not specified. Has no effect if -p is set to 1, since output order will naturally
175 correspond to input order in that case.
176
177
178
179 </help>
180 </tool>
181
182