view methylation_analysis/bismark.xml @ 6:4f09ae8138d1 draft

Uploaded
author fcaramia
date Mon, 03 Dec 2012 18:27:21 -0500
parents 282edadee017
children
line wrap: on
line source

<tool id="bismark_tool" name="Bismark" version="0.7.6">
  <description>: A bisulfite read mapper and methylation caller</description>
  <requirements>
    <requirement type="package" version="0.1.16">samtools</requirement>
    <requirement type="package" version="0.12.7">bowtie2</requirement>
    <requirement type="package" version="0.7.6">bismark</requirement>
  </requirements>
  <command interpreter="perl">
    
	bismark_wrapper.pl
	
	
	"GENOME::${genome.fields.path}"       
	
	
	
	#if str($option_input.input_option) == "mates":
		"MATES::$option_input.file_mate1::$option_input.file_mate2"
	#else
		"SINGLES::$option_input.file_single"
	#end if
	
	#if str($format_option) == "fasta":
		"FORMAT::--fasta"
	#else
		"FORMAT::--fastq"
	#end if
	
	#if str($non_directional) == "ON":
		"DIRECTIONAL::--non_directional"
	#end if
	
	"OUTPUT::$output"
	"SUMMARY::$summary"
	
  </command>
	<inputs>

		<param name="genome" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
			<options from_data_table="bismark_indexes">
				<filter type="sort_by" column="2"/>
				<validator type="no_options" message="No indexes are available for the selected input dataset"/>
			</options>
		</param>
		
		<param name="format_option" type="select" label="sample format">
			<option value="fastq" selected="true">fastq</option>
			<option value="fasta">fasta</option>
		</param>
		
		
		<conditional name="option_input">
			<param name="input_option" type="select" label="Input files">
				<option value="mates" selected="true">mates</option>
				<option value="singles">singles</option>
			</param>
			<when value="mates">
				<param format="fasta, fastq" name="file_mate1" type="data" label="Mate 1" help=""/>
				<param format="fasta, fastq" name="file_mate2" type="data" label="Mate 2" help=""/>
			</when>
			<when value="singles">
				<param format="fasta, fastq" name="file_single" type="data" label="Single" help=""/>
			</when>
		</conditional>	
		
		<param name="non_directional" type="select" label="non-directional" help="" optional="true">
			<option value="ON" selected="true">ON</option>
			<option value="OFF">OFF</option>
		</param>
		
	</inputs>
	<outputs>
		<data name="summary" format="txt" label="Bismark Sumary" />
		<data format="bam" name="output" label="${tool.name} on ${on_string}">
			<actions>
				<action type="metadata" name="dbkey">
					<option type="from_data_table" name="bismark_indexes" column="1" offset="0">
					<filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
					<filter type="param_value" ref="genome" column="0"/>
					</option>
				</action>
			</actions>
		</data>
	</outputs>
	<help>
|


**Reference**
	
  http://www.bioinformatics.babraham.ac.uk/projects/bismark/
  
-----

**What it does**

Bismark takes in FastA or FastQ files and aligns the reads to a specified bisulfite genome. 
Sequence reads are transformed into a bisulfite converted forward strand version (C->T conversion)
or into a bisulfite treated reverse strand (G->A conversion of the forward strand).
Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome
(C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the
forward strand, by doing this alignments will produce the same positions). These 4 instances of 
Bowtie (1 or 2) are run in parallel. The sequence file(s) are then read in again sequence by sequence 
to pull out the original sequence from the genome and determine if there were any protected C's present or not.

As of version 0.7.0 Bismark will only run 2 alignment threads for OT and OB in parallel, the 4 strand mode can be
re-enabled by using --non_directional.

The final output of Bismark is in SAM format by default. But for storage restrictions the output is compressed (BAM).


-----
 
**Required Parameters**

::

  -q/--fastq             The query input files (specified as mate1,mate2 or singles are FASTQ
                         files (usually having extension .fg or .fastq). This is the default. See also
                         --solexa-quals.

  -f/--fasta             The query input files (specified as mate1,mate2 or singles are FASTA
                         files (usually havin extension .fa, .mfa, .fna or similar). All quality values
                         are assumed to be 40 on the Phred scale.

  -1 mates1              List of files containing the #1 mates (filename usually includes
                         "_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
                         correspond file-for-file and read-for-read with those specified in mates2.
                         Reads may be a mix of different lengths. Bismark will produce one mapping result
                         and one report file per paired-end input file pair.

  -2 mates2              List of files containing the #2 mates (filename usually includes
                         "_2"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
                         correspond file-for-file and read-for-read with those specified in mates1.
                         Reads may be a mix of different lengths.

   singles               List of files containing the reads to be aligned (e.g.
                         lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will
                         produce one mapping result and one report file per input file.

  --non_directional      The sequencing library was constructed in a non strand-specific manner, alignments to all four
                         bisulfite strands will be reported. Default: ON.

                         (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary
                         to the original strands are merely theoretical and should not exist in reality. Specifying directional
                         alignments (which is the default) will only run 2 alignment threads to the original top (OT)
                         or bottom (OB) strands in parallel and report these alignments. This is the recommended option
                         for sprand-specific libraries).

-----

**Default Parameters**

::

  --bowtie2              Uses Bowtie 2 instead of Bowtie 1. Bismark limits Bowtie 2 to only perform end-to-end
                         alignments, i.e. searches for alignments involving all read characters (also called 
                         untrimmed or unclipped alignments). Bismark assumes that raw sequence data is adapter
                         and/or quality trimmed where appropriate. Default: on.




  -p NTHREADS            Launch NTHREADS parallel search threads (default: 4). Threads will run on separate processors/cores
                         and synchronize when parsing reads and outputting alignments. Searching for alignments is highly
                         parallel, and speedup is close to linear. Increasing -p increases Bowtie 2's memory footprint.
                         E.g. when aligning to a human genome index, increasing -p from 1 to 8 increases the memory footprint
                         by a few hundred megabytes. This option is only available if bowtie is linked with the pthreads
                         library (i.e. if BOWTIE_PTHREADS=0 is not specified at build time). In addition, this option will
                         automatically use the option '--reorder', which guarantees that output SAM records are printed in
                         an order corresponding to the order of the reads in the original input file, even when -p is set
                         greater than 1 (Bismark requires the Bowtie 2 output to be this way). Specifying --reorder and
                         setting -p greater than 1 causes Bowtie 2 to run somewhat slower and use somewhat more memory then
                         if --reorder were not specified. Has no effect if -p is set to 1, since output order will naturally
                         correspond to input order in that case.



	</help>
</tool>