diff methylation_analysis/bismark.xml @ 4:282edadee017 draft

Uploaded
author fcaramia
date Mon, 03 Dec 2012 18:26:25 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/methylation_analysis/bismark.xml	Mon Dec 03 18:26:25 2012 -0500
@@ -0,0 +1,182 @@
+<tool id="bismark_tool" name="Bismark" version="0.7.6">
+  <description>: A bisulfite read mapper and methylation caller</description>
+  <requirements>
+    <requirement type="package" version="0.1.16">samtools</requirement>
+    <requirement type="package" version="0.12.7">bowtie2</requirement>
+    <requirement type="package" version="0.7.6">bismark</requirement>
+  </requirements>
+  <command interpreter="perl">
+    
+	bismark_wrapper.pl
+	
+	
+	"GENOME::${genome.fields.path}"       
+	
+	
+	
+	#if str($option_input.input_option) == "mates":
+		"MATES::$option_input.file_mate1::$option_input.file_mate2"
+	#else
+		"SINGLES::$option_input.file_single"
+	#end if
+	
+	#if str($format_option) == "fasta":
+		"FORMAT::--fasta"
+	#else
+		"FORMAT::--fastq"
+	#end if
+	
+	#if str($non_directional) == "ON":
+		"DIRECTIONAL::--non_directional"
+	#end if
+	
+	"OUTPUT::$output"
+	"SUMMARY::$summary"
+	
+  </command>
+	<inputs>
+
+		<param name="genome" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
+			<options from_data_table="bismark_indexes">
+				<filter type="sort_by" column="2"/>
+				<validator type="no_options" message="No indexes are available for the selected input dataset"/>
+			</options>
+		</param>
+		
+		<param name="format_option" type="select" label="sample format">
+			<option value="fastq" selected="true">fastq</option>
+			<option value="fasta">fasta</option>
+		</param>
+		
+		
+		<conditional name="option_input">
+			<param name="input_option" type="select" label="Input files">
+				<option value="mates" selected="true">mates</option>
+				<option value="singles">singles</option>
+			</param>
+			<when value="mates">
+				<param format="fasta, fastq" name="file_mate1" type="data" label="Mate 1" help=""/>
+				<param format="fasta, fastq" name="file_mate2" type="data" label="Mate 2" help=""/>
+			</when>
+			<when value="singles">
+				<param format="fasta, fastq" name="file_single" type="data" label="Single" help=""/>
+			</when>
+		</conditional>	
+		
+		<param name="non_directional" type="select" label="non-directional" help="" optional="true">
+			<option value="ON" selected="true">ON</option>
+			<option value="OFF">OFF</option>
+		</param>
+		
+	</inputs>
+	<outputs>
+		<data name="summary" format="txt" label="Bismark Sumary" />
+		<data format="bam" name="output" label="${tool.name} on ${on_string}">
+			<actions>
+				<action type="metadata" name="dbkey">
+					<option type="from_data_table" name="bismark_indexes" column="1" offset="0">
+					<filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
+					<filter type="param_value" ref="genome" column="0"/>
+					</option>
+				</action>
+			</actions>
+		</data>
+	</outputs>
+	<help>
+|
+
+
+**Reference**
+	
+  http://www.bioinformatics.babraham.ac.uk/projects/bismark/
+  
+-----
+
+**What it does**
+
+Bismark takes in FastA or FastQ files and aligns the reads to a specified bisulfite genome. 
+Sequence reads are transformed into a bisulfite converted forward strand version (C->T conversion)
+or into a bisulfite treated reverse strand (G->A conversion of the forward strand).
+Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome
+(C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the
+forward strand, by doing this alignments will produce the same positions). These 4 instances of 
+Bowtie (1 or 2) are run in parallel. The sequence file(s) are then read in again sequence by sequence 
+to pull out the original sequence from the genome and determine if there were any protected C's present or not.
+
+As of version 0.7.0 Bismark will only run 2 alignment threads for OT and OB in parallel, the 4 strand mode can be
+re-enabled by using --non_directional.
+
+The final output of Bismark is in SAM format by default. But for storage restrictions the output is compressed (BAM).
+
+
+-----
+ 
+**Required Parameters**
+
+::
+
+  -q/--fastq             The query input files (specified as mate1,mate2 or singles are FASTQ
+                         files (usually having extension .fg or .fastq). This is the default. See also
+                         --solexa-quals.
+
+  -f/--fasta             The query input files (specified as mate1,mate2 or singles are FASTA
+                         files (usually havin extension .fa, .mfa, .fna or similar). All quality values
+                         are assumed to be 40 on the Phred scale.
+
+  -1 mates1              List of files containing the #1 mates (filename usually includes
+                         "_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
+                         correspond file-for-file and read-for-read with those specified in mates2.
+                         Reads may be a mix of different lengths. Bismark will produce one mapping result
+                         and one report file per paired-end input file pair.
+
+  -2 mates2              List of files containing the #2 mates (filename usually includes
+                         "_2"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
+                         correspond file-for-file and read-for-read with those specified in mates1.
+                         Reads may be a mix of different lengths.
+
+   singles               List of files containing the reads to be aligned (e.g.
+                         lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will
+                         produce one mapping result and one report file per input file.
+
+  --non_directional      The sequencing library was constructed in a non strand-specific manner, alignments to all four
+                         bisulfite strands will be reported. Default: ON.
+
+                         (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary
+                         to the original strands are merely theoretical and should not exist in reality. Specifying directional
+                         alignments (which is the default) will only run 2 alignment threads to the original top (OT)
+                         or bottom (OB) strands in parallel and report these alignments. This is the recommended option
+                         for sprand-specific libraries).
+
+-----
+
+**Default Parameters**
+
+::
+
+  --bowtie2              Uses Bowtie 2 instead of Bowtie 1. Bismark limits Bowtie 2 to only perform end-to-end
+                         alignments, i.e. searches for alignments involving all read characters (also called 
+                         untrimmed or unclipped alignments). Bismark assumes that raw sequence data is adapter
+                         and/or quality trimmed where appropriate. Default: on.
+
+
+
+
+  -p NTHREADS            Launch NTHREADS parallel search threads (default: 4). Threads will run on separate processors/cores
+                         and synchronize when parsing reads and outputting alignments. Searching for alignments is highly
+                         parallel, and speedup is close to linear. Increasing -p increases Bowtie 2's memory footprint.
+                         E.g. when aligning to a human genome index, increasing -p from 1 to 8 increases the memory footprint
+                         by a few hundred megabytes. This option is only available if bowtie is linked with the pthreads
+                         library (i.e. if BOWTIE_PTHREADS=0 is not specified at build time). In addition, this option will
+                         automatically use the option '--reorder', which guarantees that output SAM records are printed in
+                         an order corresponding to the order of the reads in the original input file, even when -p is set
+                         greater than 1 (Bismark requires the Bowtie 2 output to be this way). Specifying --reorder and
+                         setting -p greater than 1 causes Bowtie 2 to run somewhat slower and use somewhat more memory then
+                         if --reorder were not specified. Has no effect if -p is set to 1, since output order will naturally
+                         correspond to input order in that case.
+
+
+
+	</help>
+</tool>
+
+