diff methylation_analysis/methylation_extractor.xml @ 4:282edadee017 draft

Uploaded
author fcaramia
date Mon, 03 Dec 2012 18:26:25 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/methylation_analysis/methylation_extractor.xml	Mon Dec 03 18:26:25 2012 -0500
@@ -0,0 +1,165 @@
+<tool id="methyation_extractor_tool" name="Methylation Extractor" version="0.7.6">
+  <description>: extracts the methylation information for individual cytosine</description>
+  <requirements>
+    <requirement type="package" version="0.1.16">samtools</requirement>
+    <requirement type="package" version="0.12.7">bowtie2</requirement>
+    <requirement type="package" version="0.7.6">bismark</requirement>
+  </requirements>
+  <command interpreter="perl">
+    
+	methylation_extractor_wrapper.pl
+	
+	
+	"GENOME::${genome.fields.path}"      
+	
+	
+	#if str($no_overlap) == "ON":
+		"OPTION::--no_overlap"
+	#end if
+	
+	#if str($ending) == "single":
+		"ENDING::-s"
+	#else
+		"ENDING::-p"
+	#end if
+	
+	#if str($report) == "ON":
+		"OPTION::--report"
+	#end if
+	
+	"OPTION::--bedGraph"
+	
+	"OPTION::--counts"
+	
+	
+	"OUTPUT::$output"
+	"SUMMARY::$summary"
+
+	"BAMFILE::$bamfile"
+
+
+  </command>
+	<inputs>
+
+		<param name="genome" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
+			<options from_data_table="bismark_indexes">
+				<filter type="sort_by" column="2"/>
+				<validator type="no_options" message="No indexes are available for the selected input dataset"/>
+			</options>
+		</param>
+		
+		<param name="bamfile" type="data" format="bam" label="Bam file: bismark output" />
+		
+		<param name="ending" type="select" label="ending" help="" optional="true">
+			<option value="single" >single-end</option>
+			<option value="paired" selected="true">paired-end</option>
+		</param>
+	
+		<param name="no_overlap" type="select" label="no-overlap" help="" optional="true">
+			<option value="ON" selected="true">ON</option>
+			<option value="OFF">OFF</option>
+		</param>
+	
+		<param name="report" type="select" label="Report" help="" optional="true">
+			<option value="ON" selected="true">ON</option>
+			<option value="OFF">OFF</option>
+		</param>
+	
+	</inputs>
+	<outputs>
+		<data format="bedgraph" name="output" label="${tool.name} on ${on_string}">
+			<actions>
+				<action type="metadata" name="dbkey">
+					<option type="from_data_table" name="bismark_indexes" column="1" offset="0">
+					<filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
+					<filter type="param_value" ref="genome" column="0"/>
+					</option>
+				</action>
+			</actions>
+		</data>
+		<data name="summary" format="txt" label="${tool.name} summary" />
+	</outputs>
+	<help>
+|
+
+**Reference**
+	
+  http://www.bioinformatics.babraham.ac.uk/projects/bismark/
+  
+-----
+
+**What it does**
+
+
+
+The script reads in a bisulfite read alignment results file 
+produced by the Bismark bisulfite mapper (BAM file) and extracts the methylation 
+informationfor individual cytosines. This information is found in the methylation 
+call field which can contain the following characters:
+
+::
+       
+ ~~~   X   for methylated C in CHG context (was protected)     ~~~
+ 
+ ~~~   x   for not methylated C CHG (was converted)            ~~~
+ 
+ ~~~   H   for methylated C in CHH context (was protected)     ~~~
+ 
+ ~~~   h   for not methylated C in CHH context (was converted) ~~~
+ 
+ ~~~   Z   for methylated C in CpG context (was protected)     ~~~
+ 
+ ~~~   z   for not methylated C in CpG context (was converted) ~~~
+ 
+ ~~~   .   for any bases not involving cytosines               ~~~
+       
+
+
+-----
+ 
+**Required Parameters**
+
+::
+
+  -s/--single-end        Input file(s) are Bismark result file(s) generated from single-end
+                         read data. Specifying either --single-end or --paired-end is
+                         mandatory.
+
+  -p/--paired-end        Input file(s) are Bismark result file(s) generated from paired-end
+                         read data. Specifying either --paired-end or --single-end is
+                         mandatory.
+
+  --no_overlap           For paired-end reads it is theoretically possible that read_1 and
+                         read_2 overlap. This option avoids scoring overlapping methylation
+                         calls twice (only methylation calls of read 1 are used for in the process
+                         since read 1 has historically higher quality basecalls than read 2).
+                         Whilst this option removes a bias towards more methylation calls
+                         in the center of sequenced fragments it may de facto remove a sizable
+                         proportion of the data. This option is highly recommended for paired-end
+                         data.
+
+  --report               Prints out a short methylation summary as well as the paramaters used to run
+                         this script.
+
+
+-----
+
+**Default Parameters**
+
+::
+
+  --bedGraph             After finishing the methylation extraction, the methylation output is written into a
+                         sorted bedGraph file that reports the position of a given cytosine and its methylation 
+                         state (in %, seem details below). The methylation extractor output is temporarily split up into
+                         temporary files, one per chromosome (written into the current directory or folder
+                         specified with -o/--output); these temp files are then used for sorting and deleted
+                         afterwards. By default, only cytosines in CpG context will be sorted. The option
+                         '--CX_context' may be used to report all cyosines irrespective of sequence context
+                         (this will take MUCH longer!).
+
+
+
+	</help>
+</tool>
+
+