changeset 5:ee5badb527cd draft default tip

Uploaded
author charles-bernard
date Wed, 16 Nov 2016 06:38:59 -0500
parents e989fc4b1b76
children
files cytosine_report_to_bedgraph/.shed.yml cytosine_report_to_bedgraph/bismark2bedgraph.awk cytosine_report_to_bedgraph/bismark2bedgraph.sh cytosine_report_to_bedgraph/cytosine_report_to_bedgraph.xml cytosine_report_to_bedgraph/cytosine_report_to_bedgraph_wrapper.py
diffstat 4 files changed, 33 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/cytosine_report_to_bedgraph/.shed.yml	Mon Nov 14 05:02:15 2016 -0500
+++ b/cytosine_report_to_bedgraph/.shed.yml	Wed Nov 16 06:38:59 2016 -0500
@@ -9,6 +9,7 @@
 
     The tool outputs offer the possibility to vizualise the methylation signal of covered cytosines thanks to softwares like IGV (Integrative Genomics Viewer).
     In this respect, the tool can optionally generate a tdf binary file (Tiled Data Format) from each converted bedGraph. Tdf format is indeed better handled by IGV than bedGraph.
-name: bismark
+name: cytosine_report_to_bedgraph
+remote_repository_url: https://github.com/charles-bernard/Galaxy_tools/tree/master/cytosine_report_to_bedgraph
 owner: charles-bernard
 type: unrestricted
\ No newline at end of file
--- a/cytosine_report_to_bedgraph/bismark2bedgraph.awk	Mon Nov 14 05:02:15 2016 -0500
+++ b/cytosine_report_to_bedgraph/bismark2bedgraph.awk	Wed Nov 16 06:38:59 2016 -0500
@@ -1,11 +1,14 @@
 #!/usr/bin/awk
 
+#USAGE:
+#awk -v context=<list_of_contexts> -v coverage=<boolean> -f <script_path>/bismark2bedgraph.awk <cytosine_report_name> >> <bedgraph_name>
+
 BEGIN {
 	FS = "\t";
 }
 
 {
-	if ( $6 ~ context && ($4 > 0 || $5 > 0) ) { 
+	if ( $6 ~ context && ( $4 > 0 || $5 > 0 ) ) { 
 
 		chr_name = $1;
 		chr_pos = $2;
@@ -17,13 +20,13 @@
 			nb_reads = c_meth_count + c_unmeth_count
 			printf("%s\t%s\t%s\t%s\n", chr_name, chr_pos, chr_pos, nb_reads)
 		} else {
-			if ( strand == "-") { 
+			if ( strand == "-" ) { 
 				s = "-"; 
 			} else { 
 				s = "";
 			}
-			meth_ratio = c_meth_count / (c_meth_count + c_unmeth_count); 
-			printf("%s\t%s\t%s\t%s%s\n", chr_name, chr_pos, chr_pos, s, meth_ratio)	
+			meth_ratio = c_meth_count / ( c_meth_count + c_unmeth_count ); 
+			printf( "%s\t%s\t%s\t%s%s\n", chr_name, chr_pos, chr_pos, s, meth_ratio )	
 		}
 	}
-}
\ No newline at end of file
+}
--- a/cytosine_report_to_bedgraph/bismark2bedgraph.sh	Mon Nov 14 05:02:15 2016 -0500
+++ b/cytosine_report_to_bedgraph/bismark2bedgraph.sh	Wed Nov 16 06:38:59 2016 -0500
@@ -8,27 +8,34 @@
 while true ; do
 	case "$1" in
 		-e | --epi )
+			#epi is the prefix of the output files names
 			case "$2" in
 				"" ) epi="current_job"; shift 2 ;;
 				*) epi=$2; shift 2 ;;
 			esac ;;
 		-i | --infile_cov )
+			#infile_cov is the filename of the cytosine report taken as input
 			case "$2" in
 				*) infile_cov=$2; shift 2 ;;
 			esac ;;
 		-c | --context ) 
+			#context defines weither 2 or 4 bedgraphs are returned
 			context=true; shift ;;
 		--tdf )
+			#tdf defines weither or not bedgraphs have to be converted into tdf files.
 			tdf=true; shift ;;
 		--igv_genome )
+			#tdf conversion is achieved by igvtools and requires a file with the chrs_len of the genome
 			case "$2" in
 				*) igv_genome=$2; shift 2 ;;
 			esac ;;
 		-o | --output_dir )
+			#output_dir in this galaxy tool is the tmp dir created by the wrapper.py
 			case "$2" in
 				*) output_dir=$2; shift 2 ;;
 			esac ;;
 		--tool_dir )
+			#tool_dir is recquired to call other scripts stored in this directory
 			case "$2" in
 				*) tool_dir=$2; shift 2 ;;
 			esac ;;
@@ -37,31 +44,26 @@
 	esac
 done
 
-# do something with the variables -- in this case the lamest possible one :-)
-echo "epi = $epi"
-echo "infile_cov = $infile_cov"
-echo "output_dir = $output_dir"
-echo "context = $context"
-echo "tool_dir = $tool_dir"
-echo "tdf = $tdf"
-echo "igv_genome = $igv_genome"
-
 #IGV_path
-IGV_path="/users/biocomp/chbernar/galaxy_testing/database/dependencies/igvtools/2.3.32/geert-vandeweyer/package_igvtools_2_3_32/3c087cee3b8f/bin"
+#IGV_path="/users/biocomp/chbernar/galaxy_testing/database/dependencies/igvtools/2.3.32/geert-vandeweyer/package_igvtools_2_3_32/3c087cee3b8f/bin"
 
 # define outputs according to options
 if [[ "$context" = true ]]; then
 	context_list=("CG" "CHG" "CHH")
+	n="4"
 	output_types=("CG" "CHG" "CHH" "coverage")
 	bedgraph_list=("$output_dir""/""$epi""_CpG.bedgraph" "$output_dir""/""$epi""_CHG.bedgraph" "$output_dir""/""$epi""_CHH.bedgraph" "$output_dir""/""$epi""_coverage.bedgraph")
-	tdf_list=("$output_dir""/""$epi""_CpG.tdf" "$output_dir""/""$epi""_CHG.tdf" "$output_dir""/""$epi""_CHH.tdf" "$output_dir""/""$epi""_coverage.tdf")
-	n="4"
+	if [[ "$tdf" = true ]]; then
+		tdf_list=("$output_dir""/""$epi""_CpG.tdf" "$output_dir""/""$epi""_CHG.tdf" "$output_dir""/""$epi""_CHH.tdf" "$output_dir""/""$epi""_coverage.tdf")
+	fi
 else
 	context_list=(".*")
+	n="2"
 	output_types=("CXX" "coverage")
 	bedgraph_list=("$output_dir""/""$epi""_CXX.bedgraph" "$output_dir""/""$epi""_coverage.bedgraph")
-	tdf_list=("$output_dir""/""$epi""_CXX.tdf" "$output_dir""/""$epi""_coverage.tdf")
-	n="2"
+	if [[ "$tdf" = true ]]; then
+		tdf_list=("$output_dir""/""$epi""_CXX.tdf" "$output_dir""/""$epi""_coverage.tdf")
+	fi
 fi
 
 # process
@@ -69,18 +71,16 @@
 	printf "________________________________________________________________________\n"
 	printf "Processing %s\n" ${output_types[$i]}
 	printf "... Converting Cytosine Report to Bedgraph\n" 
-	if (( i < n - 1 )); then
-		#if not coverage:
-		#printf "track type=bedGraph name=%s Coverage description=%s Coverage\n" "$epi""_""${context_list[$i]}" "$epi""_""${context_list[$i]}" > "${bedgraph_list[$i]}"
+	if (( i < n - 1 )); then #if not coverage:
 		printf "#<Chr>\t<Start>\t<End>\t<Strand;Meth_ratio>\n" > "${bedgraph_list[$i]}"
 		awk -v context="${context_list[$i]}" -v coverage="false" -f "$tool_dir"/bismark2bedgraph.awk $infile_cov >> "${bedgraph_list[$i]}"
 	else
-		#printf "track type=bedGraph name=%s Coverage description=%s Coverage\n" "$epi""_""${context_list[$i]}" "$epi""_""${context_list[$i]}" > "${bedgraph_list[$i]}"
 		printf "#<Chr>\t<Start>\t<End>\t<Coverage>\n" > "${bedgraph_list[$i]}"
 		awk -v context="${context_list[$i]}" -v coverage="true" -f "$tool_dir"/bismark2bedgraph.awk $infile_cov >> "${bedgraph_list[$i]}"
 	fi
 	if [[ "$tdf" = true ]]; then
 		printf "... Converting Bedgraph to Tdf\n"
-		"$IGV_path""/"igvtools toTDF "${bedgraph_list[$i]}" "${tdf_list[$i]}" "$igv_genome" > stdout_file  
+		#"$IGV_path""/"igvtools toTDF "${bedgraph_list[$i]}" "${tdf_list[$i]}" "$igv_genome" > stdout_file  
+		igvtools toTDF "${bedgraph_list[$i]}" "${tdf_list[$i]}" "$igv_genome" > stdout_file 
 	fi
 done
\ No newline at end of file
--- a/cytosine_report_to_bedgraph/cytosine_report_to_bedgraph.xml	Mon Nov 14 05:02:15 2016 -0500
+++ b/cytosine_report_to_bedgraph/cytosine_report_to_bedgraph.xml	Wed Nov 16 06:38:59 2016 -0500
@@ -90,7 +90,7 @@
     </data>
   </outputs>
 
-  <tests></tests>
+  <tests><!--TO DO --></tests>
 
   <help>
 <![CDATA[
@@ -98,7 +98,7 @@
 **What it does**
 
   | This tool takes as input a genome-wide cytosine methylation report (generated by the tool *Bismark Meth. Extractor*) and converts it into a bedGraph for each cytosine context (CpG, CHG and CHH). 
-  | These bedGraphs display, for a given context, the ratio of methylation of each covered cytosine in the genome.
+  | These bedGraphs display, for any given context, the ratio of methylation of each covered cytosine in the genome.
   |
   | It also produces a bedGraph displaying the coverage count of each cytosine in the genome (non-covered cytosine are ignored).
   | 
@@ -106,7 +106,7 @@
   .. class:: infomark
 
   | The tool outputs offer the possibility to vizualise the methylation signal of covered cytosines thanks to softwares like IGV (*Integrative Genomics Viewer*).
-  | In this respect, the tool can optionally generate a tdf binary file (*Tiled Data Format*) from each converted bedGraph. Tdf format is indeed better handled by IGV than bedGraph.
+  | In this respect, the tool can optionally generate a tdf binary file (*Tiled Data Format*) from each converted bedGraph ; tdf format is indeed better handled by IGV than bedGraph.
 ]]>
   </help>
-</tool>
\ No newline at end of file
+</tool>