Mercurial > repos > fcaramia > methylation_analysis_bismark
diff methylation_analysis/bismark.xml @ 4:282edadee017 draft
Uploaded
author | fcaramia |
---|---|
date | Mon, 03 Dec 2012 18:26:25 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/methylation_analysis/bismark.xml Mon Dec 03 18:26:25 2012 -0500 @@ -0,0 +1,182 @@ +<tool id="bismark_tool" name="Bismark" version="0.7.6"> + <description>: A bisulfite read mapper and methylation caller</description> + <requirements> + <requirement type="package" version="0.1.16">samtools</requirement> + <requirement type="package" version="0.12.7">bowtie2</requirement> + <requirement type="package" version="0.7.6">bismark</requirement> + </requirements> + <command interpreter="perl"> + + bismark_wrapper.pl + + + "GENOME::${genome.fields.path}" + + + + #if str($option_input.input_option) == "mates": + "MATES::$option_input.file_mate1::$option_input.file_mate2" + #else + "SINGLES::$option_input.file_single" + #end if + + #if str($format_option) == "fasta": + "FORMAT::--fasta" + #else + "FORMAT::--fastq" + #end if + + #if str($non_directional) == "ON": + "DIRECTIONAL::--non_directional" + #end if + + "OUTPUT::$output" + "SUMMARY::$summary" + + </command> + <inputs> + + <param name="genome" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> + <options from_data_table="bismark_indexes"> + <filter type="sort_by" column="2"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + + <param name="format_option" type="select" label="sample format"> + <option value="fastq" selected="true">fastq</option> + <option value="fasta">fasta</option> + </param> + + + <conditional name="option_input"> + <param name="input_option" type="select" label="Input files"> + <option value="mates" selected="true">mates</option> + <option value="singles">singles</option> + </param> + <when value="mates"> + <param format="fasta, fastq" name="file_mate1" type="data" label="Mate 1" help=""/> + <param format="fasta, fastq" name="file_mate2" type="data" label="Mate 2" help=""/> + </when> + <when value="singles"> + <param format="fasta, fastq" name="file_single" type="data" label="Single" help=""/> + </when> + </conditional> + + <param name="non_directional" type="select" label="non-directional" help="" optional="true"> + <option value="ON" selected="true">ON</option> + <option value="OFF">OFF</option> + </param> + + </inputs> + <outputs> + <data name="summary" format="txt" label="Bismark Sumary" /> + <data format="bam" name="output" label="${tool.name} on ${on_string}"> + <actions> + <action type="metadata" name="dbkey"> + <option type="from_data_table" name="bismark_indexes" column="1" offset="0"> + <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> + <filter type="param_value" ref="genome" column="0"/> + </option> + </action> + </actions> + </data> + </outputs> + <help> +| + + +**Reference** + + http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +----- + +**What it does** + +Bismark takes in FastA or FastQ files and aligns the reads to a specified bisulfite genome. +Sequence reads are transformed into a bisulfite converted forward strand version (C->T conversion) +or into a bisulfite treated reverse strand (G->A conversion of the forward strand). +Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome +(C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the +forward strand, by doing this alignments will produce the same positions). These 4 instances of +Bowtie (1 or 2) are run in parallel. The sequence file(s) are then read in again sequence by sequence +to pull out the original sequence from the genome and determine if there were any protected C's present or not. + +As of version 0.7.0 Bismark will only run 2 alignment threads for OT and OB in parallel, the 4 strand mode can be +re-enabled by using --non_directional. + +The final output of Bismark is in SAM format by default. But for storage restrictions the output is compressed (BAM). + + +----- + +**Required Parameters** + +:: + + -q/--fastq The query input files (specified as mate1,mate2 or singles are FASTQ + files (usually having extension .fg or .fastq). This is the default. See also + --solexa-quals. + + -f/--fasta The query input files (specified as mate1,mate2 or singles are FASTA + files (usually havin extension .fa, .mfa, .fna or similar). All quality values + are assumed to be 40 on the Phred scale. + + -1 mates1 List of files containing the #1 mates (filename usually includes + "_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must + correspond file-for-file and read-for-read with those specified in mates2. + Reads may be a mix of different lengths. Bismark will produce one mapping result + and one report file per paired-end input file pair. + + -2 mates2 List of files containing the #2 mates (filename usually includes + "_2"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must + correspond file-for-file and read-for-read with those specified in mates1. + Reads may be a mix of different lengths. + + singles List of files containing the reads to be aligned (e.g. + lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will + produce one mapping result and one report file per input file. + + --non_directional The sequencing library was constructed in a non strand-specific manner, alignments to all four + bisulfite strands will be reported. Default: ON. + + (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary + to the original strands are merely theoretical and should not exist in reality. Specifying directional + alignments (which is the default) will only run 2 alignment threads to the original top (OT) + or bottom (OB) strands in parallel and report these alignments. This is the recommended option + for sprand-specific libraries). + +----- + +**Default Parameters** + +:: + + --bowtie2 Uses Bowtie 2 instead of Bowtie 1. Bismark limits Bowtie 2 to only perform end-to-end + alignments, i.e. searches for alignments involving all read characters (also called + untrimmed or unclipped alignments). Bismark assumes that raw sequence data is adapter + and/or quality trimmed where appropriate. Default: on. + + + + + -p NTHREADS Launch NTHREADS parallel search threads (default: 4). Threads will run on separate processors/cores + and synchronize when parsing reads and outputting alignments. Searching for alignments is highly + parallel, and speedup is close to linear. Increasing -p increases Bowtie 2's memory footprint. + E.g. when aligning to a human genome index, increasing -p from 1 to 8 increases the memory footprint + by a few hundred megabytes. This option is only available if bowtie is linked with the pthreads + library (i.e. if BOWTIE_PTHREADS=0 is not specified at build time). In addition, this option will + automatically use the option '--reorder', which guarantees that output SAM records are printed in + an order corresponding to the order of the reads in the original input file, even when -p is set + greater than 1 (Bismark requires the Bowtie 2 output to be this way). Specifying --reorder and + setting -p greater than 1 causes Bowtie 2 to run somewhat slower and use somewhat more memory then + if --reorder were not specified. Has no effect if -p is set to 1, since output order will naturally + correspond to input order in that case. + + + + </help> +</tool> + +