Mercurial > repos > bgruening > bismark

diff bismark_methylation_extractor @ 0:62c6da72dd4a draft
Uploaded
author: bgruening
date: Sat, 06 Jul 2013 09:57:36 -0400
children: 91f07ff056ca
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bismark_methylation_extractor	Sat Jul 06 09:57:36 2013 -0400
@@ -0,0 +1,4005 @@
+#!/usr/bin/perl
+use warnings;
+use strict;
+$|++;
+use Getopt::Long;
+use Cwd;
+use Carp;
+use FindBin qw($Bin);
+use lib "$Bin/../lib";
+
+## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)
+
+## This program is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+
+## You should have received a copy of the GNU General Public License
+## along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+my @filenames; # input files
+my %counting;
+my $parent_dir = getcwd();
+
+my %fhs;
+
+my $version = 'v0.7.11';
+my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip) = process_commandline();
+
+
+### only needed for bedGraph output
+my @sorting_files; # if files are to be written to bedGraph format, these are the methylation extractor output files
+my @methylcalls = qw (0 0 0); # [0] = methylated, [1] = unmethylated, [2] = total
+my @bedfiles;
+
+### only needed for genome-wide cytosine methylation report
+my %chromosomes;
+
+##############################################################################################
+### Summarising Run Parameters
+##############################################################################################
+
+### METHYLATION EXTRACTOR
+
+warn "Summarising Bismark methylation extractor parameters:\n";
+warn '='x63,"\n";
+
+if ($single){
+  if ($vanilla){
+    warn "Bismark single-end vanilla format specified\n";
+  }
+  else{
+    warn "Bismark single-end SAM format specified (default)\n"; # default
+  }
+}
+elsif ($paired){
+  if ($vanilla){
+    warn "Bismark paired-end vanilla format specified\n";
+  }
+  else{
+    warn "Bismark paired-end SAM format specified (default)\n"; # default
+  }
+}
+
+if ($ignore){
+  warn "First $ignore bases will be disregarded when processing the methylation call string\n";
+}
+
+if ($full){
+  warn "Strand-specific outputs will be skipped. Separate output files for cytosines in CpG, CHG and CHH context will be generated\n";
+}
+if ($merge_non_CpG){
+  warn "Merge CHG and CHH context to non-CpG context specified\n";
+}
+### output directory
+if ($output_dir eq ''){
+  warn "Output will be written to the current directory ('$parent_dir')\n";
+}
+else{
+  warn "Output path specified as: $output_dir\n";
+}
+
+
+sleep (1);
+
+### BEDGRAPH
+
+if ($bedGraph){
+  warn "\n\nSummarising bedGraph parameters:\n";
+  warn '='x63,"\n";
+
+  if ($counts){
+    warn "Generating additional output in bedGraph format including methylating counts (output format: <Chromosome> <Start Position> <End Position> <Methylation Percentage> <count methylated> <count non-methylated>)\n";
+  }
+  else{
+    warn "Generating additional sorted output in bedGraph format (output format: <Chromosome> <Start Position> <End Position> <Methylation Percentage>)\n";
+  }
+
+  warn "Using a cutoff of $coverage_threshold read(s) to report cytosine positions\n";
+
+  if ($CX_context){
+    warn "Reporting and sorting methylation information for all cytosine context (sorting may take a long time, you have been warned ...)\n";
+  }
+  else{ # default
+    $CpG_only = 1;
+    warn "Reporting and sorting cytosine methylation information in CpG context only (default)\n";
+  }
+
+  if ($remove){
+    warn "White spaces in read ID names will be removed prior to sorting\n";
+  }
+
+  if (defined $sort_size){
+    warn "The bedGraph UNIX sort command will use the following memory setting:\t'$sort_size'. Temporary directory used for sorting is the output directory\n";
+  }
+  else{
+    warn "Setting a default memory usage for the bedGraph UNIX sort command to 2GB\n";
+  }
+
+
+
+  sleep (1);
+
+  if ($cytosine_report){
+    warn "\n\nSummarising genome-wide cytosine methylation report parameters:\n";
+    warn '='x63,"\n";
+    warn "Generating comprehensive genome-wide cytosine report\n(output format: <Chromosome> <Position> <Strand> <count methylated> <count non-methylated>  <C-context>  <trinucleotide context> )\n";
+
+
+    if ($CX_context){
+      warn "Reporting methylation for all cytosine contexts. Be aware that this will generate enormous files\n";
+    }
+    else{ # default
+      $CpG_only = 1;
+      warn "Reporting cytosine methylation in CpG context only (default)\n";
+    }
+
+    if ($split_by_chromosome){
+      warn "Splitting the cytosine report output up into individual files for each chromosome\n";
+    }
+
+    ### Zero-based coordinates
+    if ($zero){
+      warn "Using zero-based genomic coordinates (user-defined)\n";
+    }
+    else{ # default, 1-based coords
+      warn "Using 1-based genomic coordinates (default)\n";
+    }
+
+    ### GENOME folder
+    if ($genome_folder){
+      unless ($genome_folder =~/\/$/){
+	$genome_folder =~ s/$/\//;
+      }
+      warn "Genome folder was specified as $genome_folder\n";
+    }
+    else{
+      $genome_folder  = '/data/public/Genomes/Mouse/NCBIM37/';
+      warn "Using the default genome folder /data/public/Genomes/Mouse/NCBIM37/\n";
+    }
+    sleep (1);
+  }
+}
+
+warn "\n";
+sleep (5);
+
+######################################################
+### PROCESSING FILES
+######################################################
+
+foreach my $filename (@filenames){
+  # resetting counters and filehandles
+  %fhs = ();
+  %counting =(
+	      total_meCHG_count => 0,
+	      total_meCHH_count => 0,
+	      total_meCpG_count => 0,
+	      total_unmethylated_CHG_count => 0,
+	      total_unmethylated_CHH_count => 0,
+	      total_unmethylated_CpG_count => 0,
+	      sequences_count => 0,
+	     );
+  @sorting_files = ();
+  @bedfiles = ();
+
+  process_Bismark_results_file($filename);
+
+  ### Closing all filehandles so that the Bismark methylation extractor output doesn't get truncated due to buffering issues
+  foreach my $fh (keys %fhs) {
+    if ($fh =~ /^[1230]$/) {
+      foreach my $context (keys %{$fhs{$fh}}) {
+	close $fhs{$fh}->{$context} or die $!;
+
+      }
+    } else {
+      close $fhs{$fh} or die $!;
+    }
+  }
+
+  if ($bedGraph){
+
+    my $out = (split (/\//,$filename))[-1]; # extracting the filename if a full path was specified
+    $out =~ s/gz$//;
+    $out =~ s/sam$//;
+    $out =~ s/bam$//;
+    $out =~ s/txt$//;
+    $out =~ s/$/bedGraph/;
+
+
+
+    my $bedGraph_output = $out;
+    my @args;
+
+    if ($remove){
+      push @args, '--remove';
+    }
+    if ($CX_context){
+      push @args, '--CX_context';
+    }
+    if ($no_header){
+      push @args, '--no_header';
+    }
+    if ($counts){
+      push @args, "--counts";
+    }
+
+    push @args, "--buffer_size $sort_size";
+    push @args, "--cutoff $coverage_threshold";
+    push @args, "--output $bedGraph_output";
+    push @args, "--dir '$output_dir'";
+
+    ### adding all files to be sorted to @args
+    foreach my $f (@sorting_files){
+      push @args, $f;
+    }
+
+    #  print join "\t",@args,"\n";
+
+    system ("$Bin/bismark2bedGraph @args");
+
+    warn "Finished BedGraph conversion ...\n\n";
+    sleep(3);
+
+    # open (OUT,'>',$output_dir.$bedGraph_output) or die "Problems with the bedGraph output filename detected: file path: '$output_dir'\tfile name: '$bedGraph_output' $!";
+    # warn "Writing bedGraph to file: $bedGraph_output\n";
+    # process_bedGraph_output();
+    # close OUT or die $!;
+
+    ### genome-wide cytosine methylation report requires bedGraph processing anyway
+    if ($cytosine_report){
+      @args = (); # resetting @args
+      my $cytosine_out = $out;
+      $cytosine_out =~ s/bedGraph$//;
+
+      if ($CX_context){
+	$cytosine_out =~ s/$/CX_report.txt/;
+      }
+      else{
+	$cytosine_out =~ s/$/CpG_report.txt/;
+      }
+
+      push @args, "--output $cytosine_out";
+      push @args, "--dir '$output_dir'";
+      push @args, "--genome '$genome_folder'";
+      push @args, "--parent_dir '$parent_dir'";
+
+      if ($zero){
+	push @args, "--zero";
+      }
+      if ($CX_context){
+	push @args, '--CX_context';
+      }
+      if ($split_by_chromosome){
+	push @args, '--split_by_chromosome';
+      }
+
+      push @args, $bedGraph_output; # this will be the infile
+
+      system ("$Bin/bedGraph2cytosine @args");
+      # generate_genome_wide_cytosine_report($bedGraph_output,$cytosine_out);
+      warn "\n\nFinished generating genome-wide cytosine report\n\n";
+    }
+  }
+}
+
+
+sub process_commandline{
+  my $help;
+  my $single_end;
+  my $paired_end;
+  my $ignore;
+  my $genomic_fasta;
+  my $full;
+  my $report;
+  my $extractor_version;
+  my $no_overlap;
+  my $merge_non_CpG;
+  my $vanilla;
+  my $output_dir;
+  my $no_header;
+  my $bedGraph;
+  my $coverage_threshold = 1; # Minimum number of reads covering before calling methylation status
+  my $remove;
+  my $counts;
+  my $cytosine_report;
+  my $genome_folder;
+  my $zero;
+  my $CpG_only;
+  my $CX_context;
+  my $split_by_chromosome;
+  my $sort_size;
+  my $samtools_path;
+  my $gzip;
+
+  my $command_line = GetOptions ('help|man' => \$help,
+				 'p|paired-end' => \$paired_end,
+				 's|single-end' => \$single_end,
+				 'fasta' => \$genomic_fasta,
+				 'ignore=i' => \$ignore,
+				 'comprehensive' => \$full,
+				 'report' => \$report,
+				 'version' => \$extractor_version,
+				 'no_overlap' => \$no_overlap,
+				 'merge_non_CpG' => \$merge_non_CpG, 
+				 'vanilla' => \$vanilla,
+				 'o|output=s' => \$output_dir,
+				 'no_header' => \$no_header,
+				 'bedGraph' => \$bedGraph,
+				 "cutoff=i" => \$coverage_threshold,
+				 "remove_spaces" => \$remove,
+				 "counts" => \$counts,
+				 "cytosine_report" => \$cytosine_report,
+				 'g|genome_folder=s' => \$genome_folder,
+				 "zero_based" => \$zero,	
+				 "CX|CX_context" => \$CX_context,
+				 "split_by_chromosome" => \$split_by_chromosome,
+				 "buffer_size=s" => \$sort_size,
+				 'samtools_path=s' => \$samtools_path,
+				 "gzip" => \$gzip,
+				);
+
+  ### EXIT ON ERROR if there were errors with any of the supplied options
+  unless ($command_line){
+    die "Please respecify command line options\n";
+  }
+
+  ### HELPFILE
+  if ($help){
+    print_helpfile();
+    exit;
+  }
+
+  if ($extractor_version){
+    print << "VERSION";
+
+
+                           Bismark Methylation Extractor
+
+                      Bismark Extractor Version: $version
+              Copyright 2010-13 Felix Krueger, Babraham Bioinformatics
+                www.bioinformatics.babraham.ac.uk/projects/bismark/
+
+
+VERSION
+    exit;
+  }
+
+
+  ### no files provided
+  unless (@ARGV){
+    die "You need to provide one or more Bismark files to create an individual C methylation output. Please respecify!\n";
+  }
+  @filenames = @ARGV;
+
+  warn "\n *** Bismark methylation extractor version $version ***\n\n";
+
+  ### IGNORING <INT> bases at the start of the read when processing the methylation call string
+  unless ($ignore){
+    $ignore = 0;
+  }
+  ### PRINT A REPORT
+  unless ($report){
+    $report = 0;
+  }
+
+  ### OUTPUT DIR PATH
+  if ($output_dir){
+    unless ($output_dir =~ /\/$/){
+      $output_dir =~ s/$/\//;
+    }
+  }
+  else{
+    $output_dir = '';
+  }
+
+  ### NO HEADER
+  unless ($no_header){
+    $no_header = 0;
+  }
+
+  ### OLD (VANILLA) OUTPUT FORMAT
+  unless ($vanilla){
+    $vanilla = 0;
+  }
+
+  if ($single_end){
+    $paired_end = 0;   ### SINGLE END ALIGNMENTS
+  }
+  elsif ($paired_end){
+    $single_end = 0;   ### PAIRED-END ALIGNMENTS
+  }
+  else{
+    die "Please specify whether the supplied file(s) are in Bismark single-end or paired-end format\n\n";
+  }
+
+  ### NO OVERLAP
+  if ($no_overlap){
+    die "The option '--no_overlap' can only be specified for paired-end input!\n" unless ($paired_end);
+  }
+  else{
+    $no_overlap = 0;
+  }
+
+  ### COMPREHENSIVE OUTPUT
+  unless ($full){
+    $full = 0;
+  }
+
+  ### MERGE NON-CpG context
+  unless ($merge_non_CpG){
+    $merge_non_CpG = 0;
+  }
+
+  ### remove white spaces in read ID (needed for sorting using the sort command
+  unless ($remove){
+    $remove = 0;
+  }
+
+  ### COVERAGE THRESHOLD FOR bedGraph OUTPUT
+  if (defined $coverage_threshold){
+    unless ($coverage_threshold > 0){
+      die "Please select a coverage greater than 0 (positive integers only)\n";
+    }
+  }
+  else{
+    $coverage_threshold = 1;
+  }
+
+  ### SORT buffer size
+  if (defined $sort_size){
+    unless ($sort_size =~ /^\d+\%$/ or $sort_size =~ /^\d+(K|M|G|T)$/){
+      die "Please select a buffer size as percentage (e.g. --buffer_size 20%) or a number to be multiplied with K, M, G, T etc. (e.g. --buffer_size 20G). For more information on sort type 'info sort' on a command line\n";
+    }
+  }
+  else{
+    $sort_size = '2G';
+  }
+
+  if ($zero){
+    die "Option '--zero' is only available if  '--cytosine_report' is specified as well. Please respecify\n" unless ($cytosine_report);
+  }
+
+  if ($CX_context){
+    die "Option '--CX_context' is only available if  '--cytosine_report' or '--bedGraph' is specified as well. Please respecify\n" unless ($cytosine_report or $bedGraph);
+  }
+  else{
+    $CX_context = 0;
+  }
+
+  unless ($counts){
+    $counts = 0;
+  }
+
+  if ($cytosine_report){
+
+    ### GENOME folder
+    if ($genome_folder){
+      unless ($genome_folder =~/\/$/){
+	$genome_folder =~ s/$/\//;
+      }
+    }
+    else{
+      die "Please specify a genome folder to proceed (full path only)\n";
+    }
+
+    unless ($bedGraph){
+      warn "Setting the option '--bedGraph' since this is required for the genome-wide cytosine report\n";
+      $bedGraph = 1;
+    }
+    unless ($counts){
+      warn "Setting the option '--counts' since this is required for the genome-wide cytosine report\n";
+      $counts = 1;
+    }
+    warn "\n";
+  }
+
+  ### PATH TO SAMTOOLS
+  if (defined $samtools_path){
+    # if Samtools was specified as full command
+    if ($samtools_path =~ /samtools$/){
+      if (-e $samtools_path){
+	# Samtools executable found
+      }
+      else{
+	die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
+      }
+    }
+    else{
+      unless ($samtools_path =~ /\/$/){
+	$samtools_path =~ s/$/\//;
+      }
+      $samtools_path .= 'samtools';
+      if (-e $samtools_path){
+	# Samtools executable found
+      }
+      else{
+	die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
+      }
+    }
+  }
+  # Check whether Samtools is in the PATH if no path was supplied by the user
+  else{
+    if (!system "which samtools >/dev/null 2>&1"){ # STDOUT is binned, STDERR is redirected to STDOUT. Returns 0 if Samtools is in the PATH
+      $samtools_path = `which samtools`;
+      chomp $samtools_path;
+    }
+  }
+
+  unless (defined $samtools_path){
+    $samtools_path = '';
+  }
+
+  return ($ignore,$genomic_fasta,$single_end,$paired_end,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip);
+}
+
+
+sub process_Bismark_results_file{
+  my $filename = shift;
+
+  warn "\nNow reading in Bismark result file $filename\n\n";
+
+  if ($filename =~ /\.gz$/) {
+    open (IN,"zcat $filename |") or die "Can't open gzipped file $filename: $!\n";
+  }
+  elsif ($filename =~ /bam$/) {
+    if ($samtools_path){
+      open (IN,"$samtools_path view -h $filename |") or die "Can't open BAM file $filename: $!\n";
+    }
+    else{
+      die "Sorry couldn't find an installation of Samtools. Either specifiy an alternative path using the option '--samtools_path /your/path/', or use a SAM file instead\n\n";
+    }
+  }
+  else {
+    open (IN,$filename) or die "Can't open file $filename: $!\n";
+  }
+
+  ### Vanilla and SAM output need to read different numbers of header lines
+  if ($vanilla) {
+    my $bismark_version = <IN>; ## discarding the Bismark version info
+    chomp $bismark_version;
+    $bismark_version =~ s/\r//; # replaces \r line feed
+    $bismark_version =~  s/Bismark version: //;
+    if ($bismark_version =~ /^\@/) {
+      warn "Detected \@ as the first character of the version information. Is it possible that the file is in SAM format?\n\n";
+      sleep (2);
+    }
+
+    unless ($version eq $bismark_version){
+      die "The methylation extractor and Bismark itself need to be of the same version!\n\nVersions used:\nmethylation extractor: '$version'\nBismark: '$bismark_version'\n";
+    }
+  } else {
+    # If the read is in SAM format (default) it can either start with @ header lines or start with alignments directly.
+    # We are reading from it further down
+  }
+
+  my $output_filename = (split (/\//,$filename))[-1];
+
+  ### OPENING OUTPUT-FILEHANDLES
+  if ($report) {
+    my $report_filename = $output_filename;
+    $report_filename =~ s/\.sam$//;
+    $report_filename =~ s/\.txt$//;
+    $report_filename =~ s/$/_splitting_report.txt/;
+    $report_filename = $output_dir . $report_filename;
+    open (REPORT,'>',$report_filename) or die "Failed to write to file $report_filename $!\n";
+  }
+
+  if ($report) {
+    print REPORT "$output_filename\n\n";
+    print REPORT "Parameters used to extract methylation information:\n";
+    if ($paired) {
+      if ($vanilla) {
+	print REPORT "Bismark result file: paired-end (vanilla Bismark format)\n";
+      } else {
+	print REPORT "Bismark result file: paired-end (SAM format)\n"; # default
+      }
+    }
+
+    if ($single) {
+      if ($vanilla) {
+	print REPORT "Bismark result file: single-end (vanilla Bismark format)\n";
+      } else {
+	print REPORT "Bismark result file: single-end (SAM format)\n"; # default
+      }
+    }
+
+    if ($ignore) {
+      print REPORT "Ignoring first $ignore bases\n";
+    }
+
+    if ($full) {
+      print REPORT "Output specified: comprehensive\n";
+    } else {
+      print REPORT "Output specified: strand-specific (default)\n";
+    }
+
+    if ($no_overlap) {
+      print REPORT "No overlapping methylation calls specified\n";
+    }
+    if ($genomic_fasta) {
+      print REPORT "Genomic equivalent sequences will be printed out in FastA format\n";
+    }
+    if ($merge_non_CpG) {
+      print REPORT "Methylation in CHG and CHH context will be merged into \"non-CpG context\" output\n";
+    }
+
+    print REPORT "\n";
+  }
+
+#####   open (OUT,"| gzip -c - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+
+  ### CpG-context and non-CpG context. THIS SECTION IS OPTIONAL
+  ### if --comprehensive AND --merge_non_CpG was specified we are only writing out one CpG-context and one Any-Other-context result file
+  if ($full and $merge_non_CpG) {
+    my $cpg_output = my $other_c_output = $output_filename;
+    ### C in CpG context
+    $cpg_output =~ s/^/CpG_context_/;
+    $cpg_output =~ s/sam$/txt/;
+    $cpg_output =~ s/bam$/txt/;
+    $cpg_output =~ s/$/.txt/ unless ($cpg_output =~ /\.txt$/);
+    $cpg_output = $output_dir . $cpg_output;
+
+    if ($gzip){
+      $cpg_output .= '.gz';
+      open ($fhs{CpG_context},"| gzip -c - > $cpg_output") or die "Failed to write to $cpg_output $! \n";
+    }
+    else{
+      open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n";
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context to $cpg_output\n";
+    push @sorting_files,$cpg_output;
+
+    unless ($no_header) {
+      print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n";
+    }
+
+    ### C in any other context than CpG
+    $other_c_output =~ s/^/Non_CpG_context_/;
+    $other_c_output =~ s/sam$/txt/;
+    $other_c_output =~ s/bam$/txt/;
+    $other_c_output =~ s/$/.txt/ unless ($other_c_output =~ /\.txt$/);
+    $other_c_output = $output_dir . $other_c_output;
+
+    if ($gzip){
+      $other_c_output .= '.gz';
+      open ($fhs{other_context},"| gzip -c - > $other_c_output") or die "Failed to write to $other_c_output $! \n";
+    }
+    else{
+      open ($fhs{other_context},'>',$other_c_output) or die "Failed to write to $other_c_output $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in any other context to $other_c_output\n";
+    push @sorting_files,$other_c_output;
+
+
+    unless ($no_header) {
+      print {$fhs{other_context}} "Bismark methylation extractor version $version\n";
+    }
+  }
+
+  ### if only --merge_non_CpG was specified we will write out 8 different output files, depending on where the (first) unique best alignment has been found
+  elsif ($merge_non_CpG) {
+
+    my $cpg_ot = my $cpg_ctot = my $cpg_ctob = my $cpg_ob = $output_filename;
+
+    ### For cytosines in CpG context
+    $cpg_ot =~ s/^/CpG_OT_/;
+    $cpg_ot =~ s/sam$/txt/;
+    $cpg_ot =~ s/bam$/txt/;
+    $cpg_ot =~ s/$/.txt/ unless ($cpg_ot =~ /\.txt$/);
+    $cpg_ot = $output_dir . $cpg_ot;
+
+    if ($gzip){
+      $cpg_ot .= '.gz';
+      open ($fhs{0}->{CpG},"| gzip -c - > $cpg_ot") or die "Failed to write to $cpg_ot $!\n";
+    }
+    else{
+      open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n";
+    push @sorting_files,$cpg_ot;
+
+    unless($no_header){
+      print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n";
+    }
+
+    $cpg_ctot =~ s/^/CpG_CTOT_/;
+    $cpg_ctot =~ s/sam$/txt/;
+    $cpg_ctot =~ s/bam$/txt/;
+    $cpg_ctot =~ s/$/.txt/ unless ($cpg_ctot =~ /\.txt$/);
+    $cpg_ctot = $output_dir . $cpg_ctot;
+
+    if ($gzip){
+      $cpg_ctot .= '.gz';
+      open ($fhs{1}->{CpG},"| gzip -c - > $cpg_ctot") or die "Failed to write to $cpg_ctot $!\n";
+    }
+    else{
+      open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n";
+    push @sorting_files,$cpg_ctot;
+
+    unless($no_header){
+      print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n";
+    }
+
+    $cpg_ctob =~ s/^/CpG_CTOB_/;
+    $cpg_ctob =~ s/sam$/txt/;
+    $cpg_ctob =~ s/bam$/txt/;
+    $cpg_ctob =~ s/$/.txt/ unless ($cpg_ctob =~ /\.txt$/);
+    $cpg_ctob = $output_dir . $cpg_ctob;
+
+    if ($gzip){
+      $cpg_ctob .= '.gz';
+      open ($fhs{2}->{CpG},"| gzip -c - > $cpg_ctob") or die "Failed to write to $cpg_ctob $!\n";
+    }
+    else{
+      open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n";
+    push @sorting_files,$cpg_ctob;
+
+    unless($no_header){
+      print {$fhs{2}->{CpG}}  "Bismark methylation extractor version $version\n";
+    }
+
+    $cpg_ob =~ s/^/CpG_OB_/;
+    $cpg_ob =~ s/sam$/txt/;
+    $cpg_ob =~ s/bam$/txt/;
+    $cpg_ob =~ s/$/.txt/ unless ($cpg_ob =~ /\.txt$/);
+    $cpg_ob = $output_dir . $cpg_ob;
+
+    if ($gzip){
+      $cpg_ob .= '.gz';
+      open ($fhs{3}->{CpG},"| gzip -c - > $cpg_ob") or die "Failed to write to $cpg_ob $!\n";
+    }
+    else{
+      open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n";
+    push @sorting_files,$cpg_ob;
+
+    unless($no_header){
+      print {$fhs{3}->{CpG}}  "Bismark methylation extractor version $version\n";
+    }
+
+    ### For cytosines in Non-CpG (CC, CT or CA) context
+    my $other_c_ot = my $other_c_ctot = my $other_c_ctob = my $other_c_ob = $output_filename;
+
+    $other_c_ot =~ s/^/Non_CpG_OT_/;
+    $other_c_ot =~ s/sam$/txt/;
+    $other_c_ot =~ s/bam$/txt/;
+    $other_c_ot =~ s/$/.txt/ unless ($other_c_ot =~ /\.txt$/);
+    $other_c_ot = $output_dir . $other_c_ot;
+
+    if ($gzip){
+      $other_c_ot .= '.gz';
+      open ($fhs{0}->{other_c},"| gzip -c - > $other_c_ot") or die "Failed to write to $other_c_ot $!\n";
+    }
+    else{
+      open ($fhs{0}->{other_c},'>',$other_c_ot) or die "Failed to write to $other_c_ot $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in any other context from the original top strand to $other_c_ot\n";
+    push @sorting_files,$other_c_ot;
+
+    unless($no_header){
+      print {$fhs{0}->{other_c}} "Bismark methylation extractor version $version\n";
+    }
+
+    $other_c_ctot =~ s/^/Non_CpG_CTOT_/;
+    $other_c_ctot =~ s/sam$/txt/;
+    $other_c_ctot =~ s/bam$/txt/;
+    $other_c_ctot =~ s/$/.txt/ unless ($other_c_ctot =~ /\.txt$/);
+    $other_c_ctot = $output_dir . $other_c_ctot;
+
+    if ($gzip){
+      $other_c_ctot .= '.gz';
+      open ($fhs{1}->{other_c},"| gzip -c - > $other_c_ctot") or die "Failed to write to $other_c_ctot $!\n";
+    }
+    else{
+      open ($fhs{1}->{other_c},'>',$other_c_ctot) or die "Failed to write to $other_c_ctot $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in any other context from the complementary to original top strand to $other_c_ctot\n";
+    push @sorting_files,$other_c_ctot;
+
+    unless($no_header){
+      print {$fhs{1}->{other_c}} "Bismark methylation extractor version $version\n";
+    }
+
+    $other_c_ctob =~ s/^/Non_CpG_CTOB_/;
+    $other_c_ctob =~ s/sam$/txt/;
+    $other_c_ctob =~ s/bam$/txt/;
+    $other_c_ctob =~ s/$/.txt/ unless ($other_c_ctob =~ /\.txt$/);
+    $other_c_ctob = $output_dir . $other_c_ctob;
+
+    if ($gzip){
+      $other_c_ctob .= '.gz';
+      open ($fhs{2}->{other_c},"| gzip -c - > $other_c_ctob") or die "Failed to write to $other_c_ctob $!\n";
+    }
+    else{
+      open ($fhs{2}->{other_c},'>',$other_c_ctob) or die "Failed to write to $other_c_ctob $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in any other context from the complementary to original bottom strand to $other_c_ctob\n";
+    push @sorting_files,$other_c_ctob;
+
+    unless($no_header){
+      print {$fhs{2}->{other_c}} "Bismark methylation extractor version $version\n";
+    }
+
+    $other_c_ob =~ s/^/Non_CpG_OB_/;
+    $other_c_ob =~ s/sam$/txt/;
+    $other_c_ob =~ s/sam$/txt/;
+    $other_c_ob =~ s/$/.txt/ unless ($other_c_ob =~ /\.txt$/);
+    $other_c_ob = $output_dir . $other_c_ob;
+
+    if ($gzip){
+      $other_c_ob .= '.gz';
+      open ($fhs{3}->{other_c},"| gzip -c - > $other_c_ob") or die "Failed to write to $other_c_ob $!\n";
+    }
+    else{
+      open ($fhs{3}->{other_c},'>',$other_c_ob) or die "Failed to write to $other_c_ob $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in any other context from the original bottom strand to $other_c_ob\n\n";
+    push @sorting_files,$other_c_ob;
+
+    unless($no_header){
+      print {$fhs{3}->{other_c}} "Bismark methylation extractor version $version\n";
+    }
+  }
+  ### THIS SECTION IS THE DEFAULT (CpG, CHG and CHH context)
+
+  ### if --comprehensive was specified we are only writing one file per context
+  elsif ($full) {
+    my $cpg_output = my $chg_output =  my $chh_output = $output_filename;
+    ### C in CpG context
+    $cpg_output =~ s/^/CpG_context_/;
+    $cpg_output =~ s/sam$/txt/;
+    $cpg_output =~ s/bam$/txt/;
+    $cpg_output =~ s/$/.txt/ unless ($cpg_output =~ /\.txt$/);
+    $cpg_output = $output_dir . $cpg_output;
+
+    if ($gzip){
+      $cpg_output .= '.gz';
+      open ($fhs{CpG_context},"| gzip -c - > $cpg_output") or die "Failed to write to $cpg_output $! \n";
+    }
+    else{
+      open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n";
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context to $cpg_output\n";
+    push @sorting_files,$cpg_output;
+
+    unless($no_header){
+      print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n";
+    }
+
+    ### C in CHG context
+    $chg_output =~ s/^/CHG_context_/;
+    $chg_output =~ s/sam$/txt/;
+    $chg_output =~ s/bam$/txt/;
+    $chg_output =~ s/$/.txt/ unless ($chg_output =~ /\.txt$/);
+    $chg_output = $output_dir . $chg_output;
+
+    if ($gzip){
+      $chg_output .= '.gz';
+      open ($fhs{CHG_context},"| gzip -c - > $chg_output") or die "Failed to write to $chg_output $!\n";
+    }
+    else{
+      open ($fhs{CHG_context},'>',$chg_output) or die "Failed to write to $chg_output $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CHG context to $chg_output\n";
+    push @sorting_files,$chg_output;
+
+    unless($no_header){
+      print {$fhs{CHG_context}} "Bismark methylation extractor version $version\n";
+    }
+
+    ### C in CHH context
+    $chh_output =~ s/^/CHH_context_/;
+    $chh_output =~ s/sam$/txt/;
+    $chh_output =~ s/bam$/txt/;
+    $chh_output =~ s/$/.txt/ unless ($chh_output =~ /\.txt$/);
+    $chh_output = $output_dir . $chh_output;
+
+    if ($gzip){
+      $chh_output .= '.gz';
+      open ($fhs{CHH_context},"| gzip -c - > $chh_output") or die "Failed to write to $chh_output $!\n";
+    }
+    else{
+      open ($fhs{CHH_context},'>',$chh_output) or die "Failed to write to $chh_output $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CHH context to $chh_output\n";
+    push @sorting_files, $chh_output;
+
+    unless($no_header){
+      print {$fhs{CHH_context}} "Bismark methylation extractor version $version\n";
+    }
+  }
+  ### else we will write out 12 different output files, depending on where the (first) unique best alignment was found
+  else {
+    my $cpg_ot = my $cpg_ctot = my $cpg_ctob = my $cpg_ob = $output_filename;
+
+    ### For cytosines in CpG context
+    $cpg_ot =~ s/^/CpG_OT_/;
+    $cpg_ot =~ s/sam$/txt/;
+    $cpg_ot =~ s/bam$/txt/;
+    $cpg_ot =~ s/$/.txt/ unless ($cpg_ot =~ /\.txt$/);
+    $cpg_ot = $output_dir . $cpg_ot;
+
+    if ($gzip){
+      $cpg_ot .= '.gz';
+      open ($fhs{0}->{CpG},"| gzip -c - > $cpg_ot") or die "Failed to write to $cpg_ot $!\n";
+    }
+    else{
+      open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n";
+    push @sorting_files,$cpg_ot;
+
+    unless($no_header){
+      print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n";
+    }
+
+    $cpg_ctot =~ s/^/CpG_CTOT_/;
+    $cpg_ctot =~ s/sam$/txt/;
+    $cpg_ctot =~ s/bam$/txt/;
+    $cpg_ctot =~ s/$/.txt/ unless ($cpg_ctot =~ /\.txt$/);
+    $cpg_ctot = $output_dir . $cpg_ctot;
+
+    if ($gzip){
+      $cpg_ctot .= '.gz';
+      open ($fhs{1}->{CpG},"| gzip -c - > $cpg_ctot") or die "Failed to write to $cpg_ctot $!\n";
+    }
+    else{
+      open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n";
+    push @sorting_files,$cpg_ctot;
+
+    unless($no_header){
+      print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n";
+    }
+
+    $cpg_ctob =~ s/^/CpG_CTOB_/;
+    $cpg_ctob =~ s/sam$/txt/;
+    $cpg_ctob =~ s/bam$/txt/;
+    $cpg_ctob =~ s/$/.txt/ unless ($cpg_ctob =~ /\.txt$/);
+    $cpg_ctob = $output_dir . $cpg_ctob;
+
+    if ($gzip){
+      $cpg_ctob .= '.gz';
+      open ($fhs{2}->{CpG},"| gzip -c - > $cpg_ctob") or die "Failed to write to $cpg_ctob $!\n";
+    }
+    else{
+      open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n";
+    push @sorting_files,$cpg_ctob;
+
+    unless($no_header){
+      print {$fhs{2}->{CpG}}  "Bismark methylation extractor version $version\n";
+    }
+
+    $cpg_ob =~ s/^/CpG_OB_/;
+    $cpg_ob =~ s/sam$/txt/;
+    $cpg_ob =~ s/bam$/txt/;
+    $cpg_ob =~ s/$/.txt/ unless ($cpg_ob =~ /\.txt$/);
+    $cpg_ob = $output_dir . $cpg_ob;
+
+    if ($gzip){
+      $cpg_ob .= '.gz';
+      open ($fhs{3}->{CpG},"| gzip -c - > $cpg_ob") or die "Failed to write to $cpg_ob $!\n";
+    }
+    else{
+      open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n";
+    push @sorting_files,$cpg_ob;
+
+    unless($no_header){
+      print {$fhs{3}->{CpG}}  "Bismark methylation extractor version $version\n";
+    }
+
+    ### For cytosines in CHG context
+    my $chg_ot = my $chg_ctot = my $chg_ctob = my $chg_ob = $output_filename;
+
+    $chg_ot =~ s/^/CHG_OT_/;
+    $chg_ot =~ s/sam$/txt/;
+    $chg_ot =~ s/bam$/txt/;
+    $chg_ot =~ s/$/.txt/ unless ($chg_ot =~ /\.txt$/);
+    $chg_ot = $output_dir . $chg_ot;
+
+    if ($gzip){
+      $chg_ot .= '.gz';
+      open ($fhs{0}->{CHG},"| gzip -c - > $chg_ot") or die "Failed to write to $chg_ot $!\n";
+    }
+    else{
+      open ($fhs{0}->{CHG},'>',$chg_ot) or die "Failed to write to $chg_ot $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CHG context from the original top strand to $chg_ot\n";
+    push @sorting_files,$chg_ot;
+
+    unless($no_header){
+      print {$fhs{0}->{CHG}} "Bismark methylation extractor version $version\n";
+    }
+
+    $chg_ctot =~ s/^/CHG_CTOT_/;
+    $chg_ctot =~ s/sam$/txt/;
+    $chg_ctot =~ s/bam$/txt/;
+    $chg_ctot =~ s/$/.txt/ unless ($chg_ctot =~ /\.txt$/);
+    $chg_ctot = $output_dir . $chg_ctot;
+
+    if ($gzip){
+      $chg_ctot .= '.gz';
+      open ($fhs{1}->{CHG},"| gzip -c - > $chg_ctot") or die "Failed to write to $chg_ctot $!\n";
+    }
+    else{
+      open ($fhs{1}->{CHG},'>',$chg_ctot) or die "Failed to write to $chg_ctot $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CHG context from the complementary to original top strand to $chg_ctot\n";
+    push @sorting_files,$chg_ctot;
+
+    unless($no_header){
+      print {$fhs{1}->{CHG}} "Bismark methylation extractor version $version\n";
+    }
+
+    $chg_ctob =~ s/^/CHG_CTOB_/;
+    $chg_ctob =~ s/sam$/txt/;
+    $chg_ctob =~ s/bam$/txt/;
+    $chg_ctob =~ s/$/.txt/ unless ($chg_ctob =~ /\.txt$/);
+    $chg_ctob = $output_dir . $chg_ctob;
+
+    if ($gzip){
+      $chg_ctob .= '.gz';
+      open ($fhs{2}->{CHG},"| gzip -c - > $chg_ctob") or die "Failed to write to $chg_ctob $!\n";
+    }
+    else{
+      open ($fhs{2}->{CHG},'>',$chg_ctob) or die "Failed to write to $chg_ctob $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CHG context from the complementary to original bottom strand to $chg_ctob\n";
+    push @sorting_files,$chg_ctob;
+
+    unless($no_header){
+      print {$fhs{2}->{CHG}} "Bismark methylation extractor version $version\n";
+    }
+
+    $chg_ob =~ s/^/CHG_OB_/;
+    $chg_ob =~ s/sam$/txt/;
+    $chg_ob =~ s/bam$/txt/;
+    $chg_ob =~ s/$/.txt/ unless ($chg_ob =~ /\.txt$/);
+    $chg_ob = $output_dir . $chg_ob;
+
+    if ($gzip){
+      $chg_ob .= '.gz';
+      open ($fhs{3}->{CHG},"| gzip -c - > $chg_ob") or die "Failed to write to $chg_ob $!\n";
+    }
+    else{
+      open ($fhs{3}->{CHG},'>',$chg_ob) or die "Failed to write to $chg_ob $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CHG context from the original bottom strand to $chg_ob\n\n";
+    push @sorting_files,$chg_ob;
+
+    unless($no_header){
+      print {$fhs{3}->{CHG}} "Bismark methylation extractor version $version\n";
+    }
+
+    ### For cytosines in CHH context
+    my $chh_ot = my $chh_ctot = my $chh_ctob = my $chh_ob = $output_filename;
+
+    $chh_ot =~ s/^/CHH_OT_/;
+    $chh_ot =~ s/sam$/txt/;
+    $chh_ot =~ s/bam$/txt/;
+    $chh_ot =~ s/$/.txt/ unless ($chh_ot =~ /\.txt$/);
+    $chh_ot = $output_dir . $chh_ot;
+
+    if ($gzip){
+      $chh_ot .= '.gz';
+      open ($fhs{0}->{CHH},"| gzip -c - > $chh_ot") or die "Failed to write to $chh_ot $!\n";
+    }
+    else{
+      open ($fhs{0}->{CHH},'>',$chh_ot) or die "Failed to write to $chh_ot $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CHH context from the original top strand to $chh_ot\n";
+    push @sorting_files,$chh_ot;
+
+    unless($no_header){
+      print {$fhs{0}->{CHH}} "Bismark methylation extractor version $version\n";
+    }
+
+    $chh_ctot =~ s/^/CHH_CTOT_/;
+    $chh_ctot =~ s/sam$/txt/;
+    $chh_ctot =~ s/bam$/txt/;
+    $chh_ctot =~ s/$/.txt/ unless ($chh_ctot =~ /\.txt$/);
+    $chh_ctot = $output_dir . $chh_ctot;
+
+    if ($gzip){
+      $chh_ctot .= '.gz';
+      open ($fhs{1}->{CHH},"| gzip -c - > $chh_ctot") or die "Failed to write to $chh_ctot $!\n";
+    }
+    else{
+      open ($fhs{1}->{CHH},'>',$chh_ctot) or die "Failed to write to $chh_ctot $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CHH context from the complementary to original top strand to $chh_ctot\n";
+    push @sorting_files,$chh_ctot;
+
+    unless($no_header){
+      print {$fhs{1}->{CHH}} "Bismark methylation extractor version $version\n";
+    }
+
+    $chh_ctob =~ s/^/CHH_CTOB_/;
+    $chh_ctob =~ s/sam$/txt/;
+    $chh_ctob =~ s/bam$/txt/;
+    $chh_ctob =~ s/$/.txt/ unless ($chh_ctob =~ /\.txt$/);
+    $chh_ctob = $output_dir . $chh_ctob;
+
+    if ($gzip){
+      $chh_ctob .= '.gz';
+      open ($fhs{2}->{CHH},"| gzip -c - > $chh_ctob") or die "Failed to write to $chh_ctob $!\n";
+    }
+    else{
+      open ($fhs{2}->{CHH},'>',$chh_ctob) or die "Failed to write to $chh_ctob $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CHH context from the complementary to original bottom strand to $chh_ctob\n";
+    push @sorting_files,$chh_ctob;
+
+    unless($no_header){
+      print {$fhs{2}->{CHH}} "Bismark methylation extractor version $version\n";
+    }
+
+    $chh_ob =~ s/^/CHH_OB_/;
+    $chh_ob =~ s/sam$/txt/;
+    $chh_ob =~ s/bam$/txt/;
+    $chh_ob =~ s/$/.txt/ unless ($chh_ob =~ /\.txt$/);
+    $chh_ob = $output_dir . $chh_ob;
+
+    if ($gzip){
+      $chh_ob .= '.gz';
+      open ($fhs{3}->{CHH},"| gzip -c - > $chh_ob") or die "Failed to write to $chh_ob $!\n";
+    }
+    else{
+      open ($fhs{3}->{CHH},'>',$chh_ob) or die "Failed to write to $chh_ob $!\n";
+    }
+
+    warn "Writing result file containing methylation information for C in CHH context from the original bottom strand to $chh_ob\n\n";
+    push @sorting_files,$chh_ob;
+
+    unless($no_header){
+      print {$fhs{3}->{CHH}} "Bismark methylation extractor version $version\n";
+    }
+  }
+
+  my $methylation_call_strings_processed = 0;
+  my $line_count = 0;
+
+  ### proceeding differently now for single-end or paired-end Bismark files
+
+  ### PROCESSING SINGLE-END RESULT FILES
+  if ($single) {
+
+    ### also proceeding differently now for SAM format or vanilla Bismark format files
+    if ($vanilla) {		# old vanilla Bismark output format
+      while (<IN>) {
+	++$line_count;
+	warn "Processed lines: $line_count\n" if ($line_count%500000==0);
+	
+	### $seq here is the chromosomal sequence (to use for the repeat analysis for example)
+	my ($id,$strand,$chrom,$start,$seq,$meth_call,$read_conversion,$genome_conversion) = (split("\t"))[0,1,2,3,6,7,8,9];
+	
+	### we need to remove 2 bp of the genomic sequence as we were extracting read + 2bp long fragments to make a methylation call at the first or
+	### last position
+	chomp $genome_conversion;
+
+	my $index;
+	if ($meth_call) {
+
+	  if ($read_conversion eq 'CT' and $genome_conversion eq 'CT') { ## original top strand
+	    $index = 0;
+	  } elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT') { ## complementary to original top strand
+	    $index = 1;
+	  } elsif ($read_conversion eq 'CT' and $genome_conversion eq 'GA') { ## original bottom strand
+	    $index = 3;
+	  } elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA') { ## complementary to original bottom strand
+	    $index = 2;
+	  } else {
+	    die "Unexpected combination of read and genome conversion: '$read_conversion' / '$genome_conversion'\n";
+	  }
+	
+	  ### Clipping off the first <int> number of bases from the methylation call string as specified with --ignore <int>
+	  if ($ignore) {
+	    $meth_call = substr($meth_call,$ignore,length($meth_call)-$ignore);	
+	
+	    ### If we are clipping off some bases at the start we need to adjust the start position of the alignments accordingly!
+	    if ($strand eq '+') {
+	      $start += $ignore;
+	    } elsif ($strand eq '-') {
+	      $start += length($meth_call)-1; ## $meth_call is already shortened!
+	    } else {
+	      die "Alignment did not have proper strand information: $strand\n";
+	    }
+	  }
+	  ### printing out the methylation state of every C in the read
+	  print_individual_C_methylation_states_single_end($meth_call,$chrom,$start,$id,$strand,$index);
+	
+	  ++$methylation_call_strings_processed; # 1 per single-end result
+	}
+      }
+    } else {		  # processing single-end SAM format (default)
+      while (<IN>) {
+	### SAM format can either start with header lines (starting with @) or start with alignments directly
+	if (/^\@/) {	     # skipping header lines (starting with @)
+	  warn "skipping SAM header line:\t$_";
+	  next;
+	}
+
+	++$line_count;
+	warn "Processed lines: $line_count\n" if ($line_count%500000==0);
+	
+	# example read in SAM format
+	# 1_R1/1	67	5	103172224	255	40M	=	103172417	233	AATATTTTTTTTATTTTAAAATGTGTATTGATTTAAATTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:4	XX:Z:4T1T24TT7	XM:Z:....h.h........................hh.......	XR:Z:CT	XG:Z:CT
+	###
+
+	# < 0.7.6 my ($id,$chrom,$start,$meth_call,$read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
+	# < 0.7.6 $meth_call =~ s/^XM:Z://;
+	# < 0.7.6 $read_conversion =~ s/^XR:Z://;
+	# < 0.7.6 $genome_conversion =~ s/^XG:Z://;	
+
+	my ($id,$chrom,$start,$cigar) = (split("\t"))[0,2,3,5];
+
+	### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
+	my $meth_call;	  ### Thanks to Zachary Zeno for this solution
+	my $read_conversion;
+	my $genome_conversion;
+	
+	while ( /(XM|XR|XG):Z:([^\t]+)/g ) {
+	  my $tag = $1;
+	  my $value = $2;
+
+	  if ($tag eq "XM") {
+	    $meth_call = $value;
+	    $meth_call =~ s/\r//;
+	  } elsif ($tag eq "XR") {
+	    $read_conversion = $value;
+	    $read_conversion =~ s/\r//;
+	  } elsif ($tag eq "XG") {
+	    $genome_conversion = $value;
+	    $genome_conversion =~ s/\r//;
+	  }
+	}
+
+	my $strand;
+	chomp $genome_conversion;
+	# print "$meth_call\n$read_conversion\n$genome_conversion\n";
+	
+	my $index;
+	if ($meth_call) {
+	  if ($read_conversion eq 'CT' and $genome_conversion eq 'CT') { ## original top strand
+	    $index = 0;
+	    $strand = '+';
+	  } elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT') { ## complementary to original top strand
+	    $index = 1;
+	    $strand = '-';
+	  } elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA') { ## complementary to original bottom strand
+	    $index = 2;
+	    $strand = '+';
+	  } elsif ($read_conversion eq 'CT' and $genome_conversion eq 'GA') { ## original bottom strand
+	    $index = 3;
+	    $strand = '-';
+	  } else {
+	    die "Unexpected combination of read and genome conversion: '$read_conversion' / '$genome_conversion'\n";
+	  }
+	
+	  ### If the read is in SAM format we need to reverse the methylation call if the read has been reverse-complemented for the output
+	  if ($strand eq '-') {
+	    $meth_call = reverse $meth_call;
+	  }
+	
+	  ### Clipping off the first <int> number of bases from the methylation call string as specified with --ignore <int>
+	  if ($ignore) {
+	    # print "\n\n$meth_call\n";
+	    $meth_call = substr($meth_call,$ignore,length($meth_call)-$ignore);	
+	    # print "$meth_call\n";
+	    ### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly
+
+	    my @len = split (/\D+/,$cigar); # storing the length per operation
+	    my @ops = split (/\d+/,$cigar); # storing the operation
+	    shift @ops;		# remove the empty first element
+	    die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+		
+	    my @comp_cigar; # building an array with all CIGAR operations
+	    foreach my $index (0..$#len) {
+	      foreach (1..$len[$index]) {
+		# print  "$ops[$index]";
+		push @comp_cigar, $ops[$index];
+	      }
+	    }
+	    # print "original CIGAR: $cigar\n";
+	    # print "original CIGAR: @comp_cigar\n";
+
+	    ### If we are clipping off some bases at the start we need to adjust the start position of the alignments accordingly!
+	    if ($strand eq '+') {
+	
+	      my $D_count = 0; # counting all deletions that affect the ignored genomic position, i.e. Deletions and insertions
+	      my $I_count = 0;
+
+	      for (1..$ignore) {
+		my $op = shift @comp_cigar; # adjusting composite CIGAR string by removing $ignore operations from the start
+		# print "$_ deleted $op\n";
+
+		while ($op eq 'D') { # repeating this for deletions (D)
+		  $D_count++;
+		  $op = shift @comp_cigar;
+		  # print "$_ deleted $op\n";
+		}
+		if ($op eq 'I') { # adjusting the genomic position for insertions (I)
+		  $I_count++;
+		}
+	      }
+	      $start += $ignore + $D_count - $I_count;
+	      # print "start $start\t ignore: $ignore\t D count: $D_count I_count: $I_count\n";
+	    } elsif ($strand eq '-') {
+
+	      for (1..$ignore) {
+		my $op = pop @comp_cigar; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
+		while ($op eq 'D') { # repeating this for deletions (D)
+		  $op = pop @comp_cigar;
+		}
+	      }
+
+	      ### For reverse strand alignments we need to determine the number of matching bases (M) or deletions (D) in the read from the CIGAR
+	      ### string to be able to work out the starting position of the read which is on the 3' end of the sequence
+	      my $MD_count = 0;	# counting all operations that affect the genomic position, i.e. M and D. Insertions do not affect the start position
+	      foreach (@comp_cigar) {
+		++$MD_count if ($_ eq 'M' or $_ eq 'D');
+	      }
+	      $start += $MD_count - 1;
+	    }
+	
+	    ### reconstituting shortened CIGAR string
+	    my $new_cigar;
+	    my $count = 0;
+	    my $last_op;
+	    # print "ignore adjusted: @comp_cigar\n";
+	    foreach my $op (@comp_cigar) {
+	      unless (defined $last_op){
+		$last_op = $op;
+		++$count;
+		next;
+	      }
+	      if ($last_op eq $op) {
+		++$count;
+	      } else {
+		$new_cigar .= "$count$last_op";
+		$last_op = $op;
+		$count = 1;
+	      }
+	    }
+	    $new_cigar .= "$count$last_op"; # appending the last operation and count
+	    $cigar = $new_cigar;
+	    # print "ignore adjusted scalar: $cigar\n";
+	  }
+	}
+	### printing out the methylation state of every C in the read
+	print_individual_C_methylation_states_single_end($meth_call,$chrom,$start,$id,$strand,$index,$cigar);
+	
+	++$methylation_call_strings_processed; # 1 per single-end result
+      }
+    }
+  }
+
+  ### PROCESSING PAIRED-END RESULT FILES
+  elsif ($paired) {
+
+    ### proceeding differently now for SAM format or vanilla Bismark format files
+    if ($vanilla) {	# old vanilla Bismark paired-end output format
+      while (<IN>) {
+	++$line_count;
+	warn "processed line: $line_count\n" if ($line_count%500000==0);
+
+	### $seq here is the chromosomal sequence (to use for the repeat analysis for example)
+	my ($id,$strand,$chrom,$start_read_1,$end_read_2,$seq_1,$meth_call_1,$seq_2,$meth_call_2,$first_read_conversion,$genome_conversion) = (split("\t"))[0,1,2,3,4,6,7,9,10,11,12,13];
+
+	my $index;
+	chomp $genome_conversion;
+	
+	if ($first_read_conversion eq 'CT' and $genome_conversion eq 'CT') {
+	  $index = 0;		## this is OT
+	} elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'GA') {
+	  $index = 2;		## this is CTOB!!!
+	} elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'CT') {
+	  $index = 1;		## this is CTOT!!!
+	} elsif ($first_read_conversion eq 'CT' and $genome_conversion eq 'GA') {
+	  $index = 3;		## this is OB
+	} else {
+	  die "Unexpected combination of read and genome conversion: $first_read_conversion / $genome_conversion\n";
+	}
+	
+	if ($meth_call_1 and $meth_call_2) {
+	  ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>'
+	  if ($ignore) {
+	    $meth_call_1 = substr($meth_call_1,$ignore,length($meth_call_1)-$ignore);
+	    $meth_call_2 = substr($meth_call_2,$ignore,length($meth_call_2)-$ignore);
+
+	    ### we also need to adjust the start and end positions of the alignments accordingly if '--ignore' was specified
+	    $start_read_1 += $ignore;
+	    $end_read_2   -= $ignore;
+	  }
+	  my $end_read_1;
+	  my $start_read_2;
+
+	  if ($strand eq '+') {
+
+	    $end_read_1 = $start_read_1+length($meth_call_1)-1;
+	    $start_read_2 = $end_read_2-length($meth_call_2)+1;
+		
+	    ## we first pass the first read which is in + orientation on the forward strand
+	    print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id,'+',$index,0,0);
+	
+	    # we next pass the second read which is in - orientation on the reverse strand
+	    ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
+	    print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$end_read_2,$id,'-',$index,$no_overlap,$end_read_1);
+	  } else {
+	
+	    $end_read_1 = $start_read_1+length($meth_call_2)-1;	# read 1 is the second reported read!
+	    $start_read_2 = $end_read_2-length($meth_call_1)+1;	# read 2 is the first reported read!
+
+	    ## we first pass the first read which is in - orientation on the reverse strand
+	    print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$end_read_2,$id,'-',$index,0,0);
+
+	    # we next pass the second read which is in + orientation on the forward strand
+	    ### if --no_overlap was specified we also pass the end of read 2. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
+	    print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_1,$id,'+',$index,$no_overlap,$start_read_2);
+	  }
+	
+	  $methylation_call_strings_processed += 2; # paired-end = 2 methylation calls
+	}	
+      }
+    } else {	      # Bismark paired-end SAM output format (default)
+      while (<IN>) {
+	### SAM format can either start with header lines (starting with @) or start with alignments directly
+	if (/^\@/) {	     # skipping header lines (starting with @)
+	  warn "skipping SAM header line:\t$_";
+	  next;
+	}
+	
+	++$line_count;
+	warn "Processed lines: $line_count\n" if ($line_count%500000==0);
+	
+	# example paired-end reads in SAM format (2 consecutive lines)
+	# 1_R1/1	67	5	103172224	255	40M	=	103172417	233	AATATTTTTTTTATTTTAAAATGTGTATTGATTTAAATTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:4	XX:Z:4T1T24TT7	XM:Z:....h.h........................hh.......	XR:Z:CT	XG:Z:CT
+	# 1_R1/2	131	5	103172417	255	40M	=	103172224	-233	TATTTTTTTTTAGAGTATTTTTTAATGGTTATTAGATTTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:6	XX:Z:T5T1T9T9T7T3	XM:Z:h.....h.h.........h.........h.......h...	XR:Z:GA	XG:Z:CT
+	
+	#  < version 0.7.6 my ($id_1,$chrom,$start_read_1,$meth_call_1,$first_read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
+
+	my ($id_1,$chrom,$start_read_1,$cigar_1) = (split("\t"))[0,2,3,5]; ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
+	my $meth_call_1;
+	my $first_read_conversion;
+	my $genome_conversion;
+	
+	while ( /(XM|XR|XG):Z:([^\t]+)/g ) {
+	  my $tag = $1;
+	  my $value = $2;
+
+	  if ($tag eq "XM") {
+	    $meth_call_1 = $value;
+	    $meth_call_1 =~ s/\r//;
+	  } elsif ($tag eq "XR") {
+	    $first_read_conversion = $value;
+	    $first_read_conversion =~ s/\r//;
+	  } elsif ($tag eq "XG") {
+	    $genome_conversion = $value;
+	    $genome_conversion =~ s/\r//;
+	  }
+	}
+
+	$_ = <IN>;		# reading in the paired read
+
+	# < version 0.7.6 my ($id_2,$start_read_2,$meth_call_2,$second_read_conversion) = (split("\t"))[0,3,13,14];
+	# < version 0.7.6 $meth_call_1 =~ s/^XM:Z://;
+	# < version 0.7.6 $meth_call_2 =~ s/^XM:Z://;
+	# < version 0.7.6 $first_read_conversion =~ s/^XR:Z://;
+	# < version 0.7.6 $second_read_conversion =~ s/^XR:Z://;
+
+	my ($id_2,$start_read_2,$cigar_2) = (split("\t"))[0,3,5]; ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
+
+	my $meth_call_2;
+	my $second_read_conversion;
+	
+	while ( /(XM|XR):Z:([^\t]+)/g ) {
+	  my $tag = $1;
+	  my $value = $2;
+
+	  if ($tag eq "XM") {
+	    $meth_call_2 = $value;
+	    $meth_call_2 =~ s/\r//;
+	  } elsif ($tag eq "XR") {
+	    $second_read_conversion = $value;
+	    $second_read_conversion = s/\r//;
+	  }
+	}
+	
+	# < version 0.7.6 $genome_conversion =~ s/^XG:Z://;	
+	chomp $genome_conversion; # in case it captured a new line character	
+
+	# print join ("\t",$meth_call_1,$meth_call_2,$first_read_conversion,$second_read_conversion,$genome_conversion),"\n";
+
+	my $index;
+	my $strand;
+
+	if ($first_read_conversion eq 'CT' and $genome_conversion eq 'CT') {
+	  $index = 0;		## this is OT
+	  $strand = '+';
+	} elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'CT') {
+	  $index = 1;		## this is CTOT
+	  $strand = '-';
+	} elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'GA') {
+	  $index = 2;		## this is CTOB
+	  $strand = '+';
+	} elsif ($first_read_conversion eq 'CT' and $genome_conversion eq 'GA') {
+	  $index = 3;		## this is OB
+	  $strand = '-';
+	} else {
+	  die "Unexpected combination of read and genome conversion: $first_read_conversion / $genome_conversion\n";
+	}
+
+	### reversing the methylation call of the read that was reverse-complemented
+	if ($strand eq '+') {
+	  $meth_call_2 = reverse $meth_call_2;
+	} else {
+	  $meth_call_1 = reverse $meth_call_1;
+	}
+
+	if ($meth_call_1 and $meth_call_2) {
+
+	  my $end_read_1;
+	
+	  ### READ 1
+	  my @len_1 = split (/\D+/,$cigar_1); # storing the length per operation
+	  my @ops_1 = split (/\d+/,$cigar_1); # storing the operation
+	  shift @ops_1;		# remove the empty first element
+	  die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len_1 == scalar @ops_1);
+	
+	  my @comp_cigar_1; # building an array with all CIGAR operations
+	  foreach my $index (0..$#len_1) {
+	    foreach (1..$len_1[$index]) {
+	      # print  "$ops_1[$index]";
+	      push @comp_cigar_1, $ops_1[$index];
+	    }
+	  }
+	  # print "original CIGAR read 1: $cigar_1\n";
+	  # print "original CIGAR read 1: @comp_cigar_1\n";
+
+	  ### READ 2
+	  my @len_2 = split (/\D+/,$cigar_2); # storing the length per operation
+	  my @ops_2 = split (/\d+/,$cigar_2); # storing the operation
+	  shift @ops_2;		# remove the empty first element
+	  die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len_2 == scalar @ops_2);
+	  my @comp_cigar_2; # building an array with all CIGAR operations for read 2
+	  foreach my $index (0..$#len_2) {
+	    foreach (1..$len_2[$index]) {
+	      # print  "$ops_2[$index]";
+	      push @comp_cigar_2, $ops_2[$index];
+	    }
+	  }
+	  # print "original CIGAR read 2: $cigar_2\n";
+	  # print "original CIGAR read 2: @comp_cigar_2\n";
+	
+	  if ($ignore) {
+	    ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>'	
+	    ### the methylation calls have already been reversed where necessary
+	    $meth_call_1 = substr($meth_call_1,$ignore,length($meth_call_1)-$ignore);
+	    $meth_call_2 = substr($meth_call_2,$ignore,length($meth_call_2)-$ignore);
+
+	    ### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly
+
+	    if ($strand eq '+') {
+		
+	      ### if the (read 1) strand information is '+', read 1 needs to be trimmed from the start
+	      my $D_count_1 = 0; # counting all deletions that affect the ignored genomic position for read 1, i.e. Deletions and insertions
+	      my $I_count_1 = 0;
+
+	      for (1..$ignore) {
+		my $op = shift @comp_cigar_1; # adjusting composite CIGAR string of read 1 by removing $ignore operations from the start
+		# print "$_ deleted $op\n";
+		
+		while ($op eq 'D') { # repeating this for deletions (D)
+		  $D_count_1++;
+		  $op = shift @comp_cigar_1;
+		  # print "$_ deleted $op\n";
+		}
+		if ($op eq 'I') { # adjusting the genomic position for insertions (I)
+		  $I_count_1++;
+		}
+	      }
+		
+	      $start_read_1 += $ignore + $D_count_1 - $I_count_1;
+	      # print "start read 1 $start_read_1\t ignore: $ignore\t D count 1: $D_count_1\tI_count 1: $I_count_1\n";
+	
+	      ### if the (read 1) strand information is '+', read 2 needs to be trimmed from the back
+
+	      for (1..$ignore) {
+		my $op = pop @comp_cigar_2; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
+		while ($op eq 'D') { # repeating this for deletions (D)
+		  $op = pop @comp_cigar_2;
+		}
+	      }
+	      # the start position of reads mapping to the reverse strand is being adjusted further below
+	    } elsif ($strand eq '-') {
+	
+	      ### if the (read 1) strand information is '-', read 1 needs to be trimmed from the back
+	      for (1..$ignore) {
+		my $op = pop @comp_cigar_1; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
+		while ($op eq 'D') { # repeating this for deletions (D)
+		  $op = pop @comp_cigar_1;
+		}
+	      }
+	      # the start position of reads mapping to the reverse strand is being adjusted further below
+
+	      ### if the (read 1) strand information is '-', read 2 needs to be trimmed from the start
+	      my $D_count_2 = 0; # counting all deletions that affect the ignored genomic position for read 2, i.e. Deletions and insertions
+	      my $I_count_2 = 0;
+		
+	      for (1..$ignore) {
+		my $op = shift @comp_cigar_2; # adjusting composite CIGAR string of read 2 by removing $ignore operations from the start
+		# print "$_ deleted $op\n";
+		
+		while ($op eq 'D') { # repeating this for deletions (D)
+		  $D_count_2++;
+		  $op = shift @comp_cigar_2;
+		  # print "$_ deleted $op\n";
+		}
+		if ($op eq 'I') { # adjusting the genomic position for insertions (I)
+		  $I_count_2++;
+		}
+	      }
+		
+	      $start_read_2 += $ignore + $D_count_2 - $I_count_2;
+	      # print "start read 2 $start_read_2\t ignore: $ignore\t D count 2: $D_count_2\tI_count 2: $I_count_2\n";
+	
+	    }
+	
+	    ### reconstituting shortened CIGAR string 1
+	    my $new_cigar_1;
+	    my $count_1 = 0;
+	    my $last_op_1;
+	    # print "ignore adjusted CIGAR 1: @comp_cigar_1\n";
+	    foreach my $op (@comp_cigar_1) {
+	      unless (defined $last_op_1){
+		$last_op_1 = $op;
+		++$count_1;
+		next;
+	      }
+	      if ($last_op_1 eq $op) {
+		++$count_1;
+	      } else {
+		$new_cigar_1 .= "$count_1$last_op_1";
+		$last_op_1 = $op;
+		$count_1 = 1;
+	      }
+	    }
+	    $new_cigar_1 .= "$count_1$last_op_1"; # appending the last operation and count
+	    $cigar_1 = $new_cigar_1;
+	    # print "ignore adjusted CIGAR 1 scalar: $cigar_1\n";
+
+	    ### reconstituting shortened CIGAR string 2
+	    my $new_cigar_2;
+	    my $count_2 = 0;
+	    my $last_op_2;
+	    # print "ignore adjusted CIGAR 2: @comp_cigar_2\n";
+	    foreach my $op (@comp_cigar_2) {
+	      unless (defined $last_op_2){
+		$last_op_2 = $op;
+		++$count_2;
+		next;
+	      }
+	      if ($last_op_2 eq $op) {
+		++$count_2;
+	      } else {
+		$new_cigar_2 .= "$count_2$last_op_2";
+		$last_op_2 = $op;
+		$count_2 = 1;
+	      }
+	    }
+	    $new_cigar_2 .= "$count_2$last_op_2"; # appending the last operation and count
+	    $cigar_2 = $new_cigar_2;
+	    # print "ignore adjusted CIGAR 2 scalar: $cigar_2\n";
+
+	  }
+	
+	  if ($strand eq '+') {
+	    ### adjusting the start position for all reads mapping to the reverse strand, in this case read 2
+	    @comp_cigar_2  = reverse@comp_cigar_2; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
+	    # print "reverse: @comp_cigar_2\n";
+	
+	    my $MD_count_1 = 0;
+	    foreach (@comp_cigar_1) {
+	      ++$MD_count_1 if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
+	    }
+
+	    my $MD_count_2 = 0;
+	    foreach (@comp_cigar_2) {
+	      ++$MD_count_2 if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
+	    }
+
+	    $end_read_1 = $start_read_1 + $MD_count_1 - 1;
+	    $start_read_2 += $MD_count_2 - 1; ## Passing on the start position on the reverse strand
+	  } else {
+	    ### adjusting the start position for all reads mapping to the reverse strand, in this case read 1
+	
+	    @comp_cigar_1  = reverse@comp_cigar_1; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
+	    # print "reverse: @comp_cigar_1\n";
+
+	    my $MD_count_1 = 0;
+	    foreach (@comp_cigar_1) {
+	      ++$MD_count_1 if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
+	    }
+
+	    $end_read_1 = $start_read_1;	
+	    $start_read_1 +=  $MD_count_1 - 1; ### Passing on the start position on the reverse strand
+
+	  }
+
+	  if ($strand eq '+') {
+	    ## we first pass the first read which is in + orientation on the forward strand
+	    print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'+',$index,0,0,$cigar_1);
+	
+	    # we next pass the second read which is in - orientation on the reverse strand
+	    ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
+	    print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'-',$index,$no_overlap,$end_read_1,$cigar_2);
+	  } else {
+	    ## we first pass the first read which is in - orientation on the reverse strand
+	    print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'-',$index,0,0,$cigar_1);
+	
+	    # we next pass the second read which is in + orientation on the forward strand
+	    ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
+	    print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'+',$index,$no_overlap,$end_read_1,$cigar_2);
+	  }
+	
+	  $methylation_call_strings_processed += 2; # paired-end = 2 methylation calls
+	}	
+      }
+    }
+  } else {
+    die "Single-end or paired-end reads not specified properly\n";
+  }
+
+  print "\n\nProcessed $line_count lines from $filename in total\n";
+  print "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
+  if ($report) {
+    print REPORT "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
+  }
+  print_splitting_report ();
+}
+
+
+
+sub print_splitting_report{
+
+  ### Calculating methylation percentages if applicable
+
+  my $percent_meCpG;
+  if (($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}) > 0){
+    $percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
+  }
+
+  my $percent_meCHG;
+  if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
+    $percent_meCHG = sprintf("%.1f",100*$counting{total_meCHG_count}/($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}));
+  }
+
+  my $percent_meCHH;
+  if (($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}) > 0){
+    $percent_meCHH = sprintf("%.1f",100*$counting{total_meCHH_count}/($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}));
+  }
+
+  my $percent_non_CpG_methylation;
+  if ($merge_non_CpG){
+    if ( ($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}+$counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
+      $percent_non_CpG_methylation = sprintf("%.1f",100* ( $counting{total_meCHH_count}+$counting{total_meCHG_count} ) / ( $counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}+$counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count} ) );
+    }
+  }
+
+  if ($report){
+    ### detailed information about Cs analysed
+    print REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";
+
+    my $total_number_of_C = $counting{total_meCHG_count}+$counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
+    print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";
+
+    print REPORT "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
+    print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
+    print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
+
+    print REPORT "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+    print REPORT "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+    print REPORT "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+
+    ### calculating methylated CpG percentage if applicable
+    if ($percent_meCpG){
+      print REPORT "C methylated in CpG context:\t${percent_meCpG}%\n";
+    }
+    else{
+      print REPORT "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
+    }
+
+    ### 2-Context Output
+    if ($merge_non_CpG){
+      if ($percent_non_CpG_methylation){
+	print REPORT "C methylated in non-CpG context:\t${percent_non_CpG_methylation}%\n\n\n";
+      }
+      else{
+	print REPORT "Can't determine percentage of methylated Cs in non-CpG context if value was 0\n\n\n";
+      }
+    }
+
+    ### 3 Context Output
+    else{
+      ### calculating methylated CHG percentage if applicable
+      if ($percent_meCHG){
+	print REPORT "C methylated in CHG context:\t${percent_meCHG}%\n";
+      }
+      else{
+	print REPORT "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
+      }
+
+      ### calculating methylated CHH percentage if applicable
+      if ($percent_meCHH){
+	print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+      }
+      else{
+	print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
+      }
+    }
+  }
+
+  ### detailed information about Cs analysed for on-screen report
+  print "Final Cytosine Methylation Report\n",'='x33,"\n";
+
+  my $total_number_of_C = $counting{total_meCHG_count}+$counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
+  print "Total number of C's analysed:\t$total_number_of_C\n\n";
+
+  print "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
+  print "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
+  print "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
+
+  print "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+  print "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+  print "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+
+  ### printing methylated CpG percentage if applicable
+  if ($percent_meCpG){
+    print "C methylated in CpG context:\t${percent_meCpG}%\n";
+  }
+  else{
+    print "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
+  }
+
+  ### 2-Context Output
+  if ($merge_non_CpG){
+    if ($percent_non_CpG_methylation){
+      print "C methylated in non-CpG context:\t${percent_non_CpG_methylation}%\n\n\n";
+    }
+    else{
+      print "Can't determine percentage of methylated Cs in non-CpG context if value was 0\n\n\n";
+    }
+  }
+
+  ### 3-Context Output
+  else{
+    ### printing methylated CHG percentage if applicable
+    if ($percent_meCHG){
+      print "C methylated in CHG context:\t${percent_meCHG}%\n";
+    }
+    else{
+      print "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
+    }
+
+    ### printing methylated CHH percentage if applicable
+    if ($percent_meCHH){
+      print "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+    }
+    else{
+      print "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
+    }
+  }
+}
+
+
+
+
+
+sub print_individual_C_methylation_states_paired_end_files{
+
+  my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$no_overlap,$end_read_1,$cigar) = @_;
+  my @methylation_calls = split(//,$meth_call);
+
+  #################################################################
+  ### . for bases not involving cytosines                       ###
+  ### X for methylated C in CHG context (was protected)         ###
+  ### x for not methylated C in CHG context (was converted)     ###
+  ### H for methylated C in CHH context (was protected)         ###
+  ### h for not methylated C in CHH context (was converted)     ###
+  ### Z for methylated C in CpG context (was protected)         ###
+  ### z for not methylated C in CpG context (was converted)     ###
+  #################################################################
+
+  my $methyl_CHG_count = 0;
+  my $methyl_CHH_count = 0;
+  my $methyl_CpG_count = 0;
+  my $unmethylated_CHG_count = 0;
+  my $unmethylated_CHH_count = 0;
+  my $unmethylated_CpG_count = 0;
+
+
+  my $pos_offset = 0; # this is only relevant for SAM reads with insertions or deletions
+  my $cigar_offset = 0; # again, this is only relevant for SAM reads containing indels
+  my @comp_cigar;
+
+  ### Checking whether the CIGAR string is a linear genomic match or whether if requires indel processing
+  if ($cigar =~ /^\d+M$/){
+  }
+  else{ # parsing CIGAR string
+    my @len;
+    my @ops;
+    @len = split (/\D+/,$cigar); # storing the length per operation
+    @ops = split (/\d+/,$cigar); # storing the operation
+    shift @ops; # remove the empty first element
+    die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+
+    foreach my $index (0..$#len){
+      foreach (1..$len[$index]){
+	# print  "$ops[$index]";
+	push @comp_cigar, $ops[$index];
+      }
+    }
+    # warn "\nDetected CIGAR string: $cigar\n";
+    # warn "Length of methylation call: ",length $meth_call,"\n";
+    # warn "number of operations: ",scalar @ops,"\n";
+    # warn "number of length digits: ",scalar @len,"\n\n";
+    # print @comp_cigar,"\n";
+    # print "$meth_call\n\n";
+    # sleep (1);
+  }
+
+  if ($strand eq '-') {
+
+    ### the  CIGAR string needs to be reversed, the methylation call has already been reversed above
+    if (@comp_cigar){
+      @comp_cigar  = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
+    }
+    #  print "reverse CIGAR string: @comp_cigar\n";
+
+    ### the start position of paired-end files has already been corrected, see above
+  }
+
+  ### THIS IS AN OPTIONAL 2-CONTEXT (CpG and non-CpG) SECTION IF --merge_non_CpG was specified
+
+  if ($merge_non_CpG) {
+
+    if ($no_overlap) {
+
+      ### single-file CpG and non-CpG context output
+      if ($full) {
+	if ($strand eq '+') {
+	  for my $index (0..$#methylation_calls) {
+	
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+	
+	    ### Returning as soon as the methylation calls start overlapping
+	    if ($start+$index+$pos_offset >= $end_read_1) {
+	      return;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.'){}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} elsif ($strand eq '-') {
+	  for my $index (0..$#methylation_calls) {
+	
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+	
+	    ### Returning as soon as the methylation calls start overlapping
+	    if ($start-$index+$pos_offset <= $end_read_1) {
+	      return;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} else {
+	  die "The read orientation was neither + nor -: '$strand'\n";
+	}
+      }
+
+      ### strand-specific methylation output
+      else {
+	if ($strand eq '+') {
+	  for my $index (0..$#methylation_calls) {
+
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+
+	    ### Returning as soon as the methylation calls start overlapping
+	    if ($start+$index+$pos_offset >= $end_read_1) {
+	      return;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }	
+	  }
+	} elsif ($strand eq '-') {
+	  for my $index (0..$#methylation_calls) {
+
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+
+	    ### Returning as soon as the methylation calls start overlapping
+	    if ($start-$index+$pos_offset <= $end_read_1) {
+	      return;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} else {
+	  die "The strand orientation was neither + nor -: '$strand'/n";
+	}
+      }
+    }
+
+    ### this is the default paired-end procedure allowing overlaps and using every single C position
+    ### Still within the 2-CONTEXT ONLY optional section
+    else {
+      ### single-file CpG and non-CpG context output
+      if ($full) {
+	if ($strand eq '+') {
+	  for my $index (0..$#methylation_calls) {
+
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} elsif ($strand eq '-') {
+	  for my $index (0..$#methylation_calls) {
+
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} else {
+	  die "The strand orientation as neither + nor -: '$strand'\n";
+	}
+      }
+
+      ### strand-specific methylation output
+      ### still within the 2-CONTEXT optional section
+      else {
+	if ($strand eq '+') {
+	  for my $index (0..$#methylation_calls) {
+
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} elsif ($strand eq '-') {
+	  for my $index (0..$#methylation_calls) {
+
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} else {
+	  die "The strand orientation as neither + nor -: '$strand'\n";
+	}
+      }
+    }
+  }
+
+  ############################################
+  ### THIS IS THE DEFAULT 3-CONTEXT OUTPUT ###
+  ############################################
+
+  elsif ($no_overlap) {
+    ### single-file CpG, CHG and CHH context output
+    if ($full) {
+      if ($strand eq '+') {
+	for my $index (0..$#methylation_calls) {
+	
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  ### Returning as soon as the methylation calls start overlapping
+	  if ($start+$index+$pos_offset >= $end_read_1) {
+	    return;
+	  }
+	
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } elsif ($strand eq '-') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  ### Returning as soon as the methylation calls start overlapping
+	  if ($start-$index+$pos_offset <= $end_read_1) {
+	    return;
+	  }
+	
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } else {
+	die "The strand orientation as neither + nor -: '$strand'\n";
+      }
+    }
+
+    ### strand-specific methylation output
+    else {
+      if ($strand eq '+') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  ### Returning as soon as the methylation calls start overlapping
+	  if ($start+$index+$pos_offset >= $end_read_1) {
+	    return;
+	  }
+	
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }	
+	}
+      } elsif ($strand eq '-') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  ### Returning as soon as the methylation calls start overlapping
+	  if ($start-$index+$pos_offset <= $end_read_1) {
+	    return;
+	  }
+	
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } else {
+	die "The strand orientation as neither + nor -: '$strand'\n";
+      }
+    }
+  }
+
+  ### this is the default paired-end procedure allowing overlaps and using every single C position
+  else {
+    ### single-file CpG, CHG and CHH context output
+    if ($full) {
+      if ($strand eq '+') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } elsif ($strand eq '-') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } else {
+	die "The strand orientation as neither + nor -: '$strand'\n";
+      }
+    }
+
+    ### strand-specific methylation output
+    else {
+      if ($strand eq '+') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } elsif ($strand eq '-') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } else {
+	die "The strand orientation as neither + nor -: '$strand'\n";
+      }
+    }
+  }
+}
+
+sub check_cigar_string {
+  my ($index,$cigar_offset,$pos_offset,$strand,$comp_cigar) = @_;
+  # print "$index\t$cigar_offset\t$pos_offset\t$strand\t";
+  my ($new_cigar_offset,$new_pos_offset) = (0,0);
+
+  if ($strand eq '+') {
+    #  print "### $strand strand @$comp_cigar[$index + $cigar_offset]\t";
+
+    if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
+      #  warn "position needs no adjustment\n";
+    }
+
+    elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){ # insertion in the read sequence
+      $new_pos_offset -= 1; # we need to subtract the length of inserted bases from the genomic position
+      # warn "adjusted genomic position by -1 bp (insertion)\n";
+    }
+
+    elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
+      $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
+      $new_pos_offset += 1; # we need to add the length of deleted bases to get the genomic position
+      # warn "adjusted genomic position by +1 bp (deletion). Now looping through the CIGAR string until we hit another M or I\n";
+
+      while ( ($index + $cigar_offset + $new_cigar_offset)  < (scalar @$comp_cigar) ){	
+	if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
+	  #  warn "position needs no adjustment\n";
+	  last;
+	}
+	elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){
+	  $new_pos_offset -= 1; # we need to subtract the length of inserted bases from the genomic position
+	  # warn "adjusted genomic position by another -1 bp (insertion)\n";
+	  last;
+	}
+	elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
+	  $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
+	  $new_pos_offset += 1; # we need to add the length of deleted bases to get the genomic position
+	  # warn "adjusted genomic position by another +1 bp (deletion)\n";
+	}
+	else{
+	  die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
+	}
+      }
+    }
+    else{
+      die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
+    }
+  }
+
+  elsif ($strand eq '-') {
+    # print "### $strand strand @$comp_cigar[$index + $cigar_offset]\t";
+
+    if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
+     # warn "position needs no adjustment\n";
+    }
+
+    elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){ # insertion in the read sequence
+      $new_pos_offset += 1; # we need to add the length of inserted bases to the genomic position
+      # warn "adjusted genomic position by +1 bp (insertion)\n";
+    }
+
+    elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
+      $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
+      $new_pos_offset -= 1; # we need to subtract the length of deleted bases to get the genomic position
+      # warn "adjusted genomic position by -1 bp (deletion). Now looping through the CIGAR string until we hit another M or I\n";
+
+      while ( ($index + $cigar_offset + $new_cigar_offset)  < (scalar @$comp_cigar) ){	
+	if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
+	  # warn "Found new 'M' operation; position needs no adjustment\n";
+	  last;
+	}
+	elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){
+	  $new_pos_offset += 1; # we need to subtract the length of inserted bases from the genomic position
+	  # warn "Found new 'I' operation; adjusted genomic position by another +1 bp (insertion)\n";
+	  last;
+	}
+	elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
+	  $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
+	  $new_pos_offset -= 1; # we need to subtract the length of deleted bases to get the genomic position
+	  # warn "adjusted genomic position by another -1 bp (deletion)\n";
+	}
+	else{
+	  die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
+	}
+      }
+    }
+    else{
+      die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
+    }
+  }
+  # print "new cigar offset: $new_cigar_offset\tnew pos offset: $new_pos_offset\n";
+  return ($new_cigar_offset,$new_pos_offset);
+}
+
+sub print_individual_C_methylation_states_single_end{
+
+  my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$cigar) = @_;
+  my @methylation_calls = split(//,$meth_call);
+
+  #################################################################
+  ### . for bases not involving cytosines                       ###
+  ### X for methylated C in CHG context (was protected)         ###
+  ### x for not methylated C in CHG context (was converted)     ###
+  ### H for methylated C in CHH context (was protected)         ###
+  ### h for not methylated C in CHH context (was converted)     ###
+  ### Z for methylated C in CpG context (was protected)         ###
+  ### z for not methylated C in CpG context (was converted)     ###
+  #################################################################
+
+  my $methyl_CHG_count = 0;
+  my $methyl_CHH_count = 0;
+  my $methyl_CpG_count = 0;
+  my $unmethylated_CHG_count = 0;
+  my $unmethylated_CHH_count = 0;
+  my $unmethylated_CpG_count = 0;
+
+  my $pos_offset = 0; # this is only relevant for SAM reads with insertions or deletions
+  my $cigar_offset = 0; # again, this is only relevant for SAM reads containing indels
+
+  my @comp_cigar;
+
+  if ($cigar){ # parsing CIGAR string
+
+    ### Checking whether the CIGAR string is a linear genomic match or whether if requires indel processing
+    if ($cigar =~ /^\d+M$/){
+      #  warn "See!? I told you so! $cigar\n";
+      # sleep(1);
+    }
+    else{
+
+      my @len;
+      my @ops;
+
+      @len = split (/\D+/,$cigar); # storing the length per operation
+      @ops = split (/\d+/,$cigar); # storing the operation
+      shift @ops; # remove the empty first element
+      die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+      
+      foreach my $index (0..$#len){
+	foreach (1..$len[$index]){
+	  # print  "$ops[$index]";
+	  push @comp_cigar, $ops[$index];
+	}
+      }
+    }
+    # warn "\nDetected CIGAR string: $cigar\n";
+    # warn "Length of methylation call: ",length $meth_call,"\n";
+    # warn "number of operations: ",scalar @ops,"\n";
+    # warn "number of length digits: ",scalar @len,"\n\n";
+    # print @comp_cigar,"\n";
+    # print "$meth_call\n\n";
+    # sleep (1);
+  }
+  
+  ### adjusting the start position for all reads mapping to the reverse strand
+  if ($strand eq '-') {
+
+    if (@comp_cigar){ # only needed for SAM reads with InDels
+      @comp_cigar  = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
+      # print @comp_cigar,"\n";
+    }
+
+    unless ($ignore){  ### if --ignore was specified the start position has already been corrected
+      
+      if ($cigar){ ### SAM format
+	if ($cigar =~ /^(\d+)M$/){ # linear match
+	  $start += $1 - 1;
+	}
+	else{ # InDel read
+	  my $MD_count = 0;
+	  foreach (@comp_cigar){
+	    ++$MD_count if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
+	  }
+	  $start += $MD_count - 1;
+	}
+      }
+      else{ ### vanilla format
+	$start += length($meth_call)-1;
+      }
+    }
+  }
+
+  ### THIS IS THE CpG and Non-CpG SECTION (OPTIONAL)
+
+  ### single-file CpG and other-context output
+  if ($full and $merge_non_CpG) {
+    if ($strand eq '+') {
+      for my $index (0..$#methylation_calls) {
+	
+	if ($cigar and @comp_cigar){ # only needed for SAM alignments with InDels
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+	
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {
+	}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    elsif ($strand eq '-') {
+
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+	if ($cigar and @comp_cigar){ # only needed for SAM entries with InDels
+	  # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.'){
+	}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    else {
+      die "The strand information was neither + nor -: $strand\n";
+    }
+  }
+
+  ### strand-specific methylation output
+  elsif ($merge_non_CpG) {
+    if ($strand eq '+') {
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {
+	}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    elsif ($strand eq '-') {
+
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+    	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {
+	}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    else {
+      die "The strand information was neither + nor -: $strand\n";
+    }
+  }
+
+  ### THIS IS THE 3-CONTEXT (CpG, CHG and CHH) DEFAULT SECTION
+
+  elsif ($full) {
+    if ($strand eq '+') {
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    elsif ($strand eq '-') {
+
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+	
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    else {
+      die "The read had a strand orientation which was neither + nor -: $strand\n";
+    }
+  }
+
+  ### strand-specific methylation output
+  else {
+    if ($strand eq '+') {
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    elsif ($strand eq '-') {
+
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    else {
+      die "The strand information was neither + nor -: $strand\n";
+    }
+  }
+}
+
+
+
+#######################################################################################################################################
+### bismark2bedGaph section - START
+#######################################################################################################################################
+
+### has now been moved to the external script bismark2bedGraph
+
+# sub process_bedGraph_output{
+#   warn  "="x64,"\n";
+#   warn "Methylation information will now be written into a bedGraph file\n";
+#   warn  "="x64,"\n\n";
+#   sleep (2);
+
+#   ### Closing all filehandles so that the Bismark methtylation extractor output doesn't get truncated due to buffering issues
+#   foreach my $fh (keys %fhs) {
+#     if ($fh =~ /^[1230]$/) {
+#       foreach my $context (keys %{$fhs{$fh}}) {
+# 	close $fhs{$fh}->{$context} or die $!;
+#       }
+#     } else {
+#       close $fhs{$fh} or die $!;
+#     }
+#   }
+
+#   ### deciding which files to use for bedGraph conversion
+#   foreach my $filename (@sorting_files){
+#     # warn "$filename\n";
+#     if ($filename =~ /\//){ # if files are in a different output folder we extract the filename again
+#       $filename =~ s/.*\///; # replacing everything up to the last slash in the filename
+#       # warn "$filename\n";
+#     }
+
+#     if ($CX_context){
+#       push @bedfiles,$filename;
+#     }
+#     else{ ## CpG context only (default)
+#       if ($filename =~ /^CpG_/){
+# 	push @bedfiles,$filename;
+#       }
+#       else{
+# 	# skipping CHH or CHG files
+#       }
+#     }
+#   }
+
+#   warn "Using the following files as Input:\n";
+#   print join ("\t",@bedfiles),"\n\n";
+#   sleep (2);
+
+#   my %temp_fhs;
+#   my @temp_files; # writing all context files (default CpG only) to these files prior to sorting
+
+#   ### changing to the output directory
+#   unless ($output_dir eq ''){ # default
+#     chdir $output_dir or die "Failed to change directory to $output_dir\n";
+#     warn "Changed directory to $output_dir\n";
+#   }
+
+#   foreach my $infile (@bedfiles) {
+
+#     if ($remove) {
+#       warn "Now replacing whitespaces in the sequence ID field of the Bismark methylation extractor output $infile prior to bedGraph conversion\n\n";
+
+#       if ($infile =~ /gz$/){
+# 	open (READ,"zcat $infile |") or die $!;
+#       }
+#       else{
+# 	open (READ,$infile) or die $!;
+#       }
+
+#       my $removed_spaces_outfile = $infile;
+#       $removed_spaces_outfile =~ s/$/.spaces_removed.txt/;
+
+#       open (REM,'>',$output_dir.$removed_spaces_outfile) or die "Couldn't write to file $removed_spaces_outfile: $!\n";
+
+#       unless ($no_header){
+# 	$_ = <READ>;		### Bismark version header
+# 	print REM $_;		### Bismark version header
+#       }
+
+#       while (<READ>) {
+# 	chomp;
+# 	my ($id,$strand,$chr,$pos,$context) = (split (/\t/));
+# 	$id =~ s/\s+/_/g;
+# 	print REM join ("\t",$id,$strand,$chr,$pos,$context),"\n";
+#       }
+	
+#       close READ or die $!;
+#       close REM or die $!;
+
+#       ### changing the infile name to the new file without spaces
+#       $infile = $removed_spaces_outfile;
+#     }
+
+#     warn "Now writing methylation information for file $infile to individual files for each chromosome\n";
+#     if ($infile =~ /gz$/){
+#       open (IN,"zcat $infile |") or die $!;
+#     }
+#     else{
+#       open (IN,$infile) or die $!;
+#     }
+
+#     ## always ignoring the version header
+#     unless ($no_header){
+#       $_ = <IN>;		### Bismark version header
+#     }
+	
+#     while (<IN>) {
+#       chomp;
+#       my ($chr) = (split (/\t/))[2];
+#       # warn "This is the chromosome name before replacing '|' characters:\t$chr\n\n";
+#       $chr =~ s/\|/_/g; # replacing pipe ('|') characters in the file names
+#       # warn "This is the chromosome name AFTER replacing '|' characters:\t$chr\n\n";
+
+#       unless (exists $temp_fhs{$chr}) {
+# 	open ($temp_fhs{$chr},'>','chr'.$chr.'.meth_extractor.temp') or die "Failed to open filehandle: $!";
+#       }
+#       print {$temp_fhs{$chr}} "$_\n";
+#     }
+
+#     warn "Finished writing out individual chromosome files for $infile\n";
+#   }
+#   warn "\n";
+
+#   @temp_files = <*.meth_extractor.temp>;
+
+#   warn "Collecting temporary chromosome file information...\n";
+#   sleep (1);
+#   warn "processing the following input file(s):\n";
+#   warn join ("\n",@temp_files),"\n\n";
+#   sleep (1);
+
+#   foreach my $in (@temp_files) {
+#     if ($sort_size){
+#       warn "Sorting input file $in by positions (using -S of '$sort_size')\n";
+#     }
+#     else{
+#       warn "Sorting input file $in by positions (using default memory settings)\n";
+#     }
+#     my $sort_dir = $output_dir;
+#     if ($sort_dir eq ''){
+#       $sort_dir = './';
+#     }
+#     open my $ifh, "sort -S $sort_size -T $sort_dir -k3,3 -k4,4n $in |" or die "Input file could not be sorted. $!";
+#     # print "Chromosome\tStart Position\tEnd Position\tMethylation Percentage\n";
+	
+#     ############################################# m.a.bentley - moved the variables out of the while loop to hold the current line data {
+
+#     my $name;
+#     my $meth_state;
+#     my $chr = "";
+#     my $pos = 0;
+#     my $meth_state2;
+
+#     my $last_pos;
+#     my $last_chr;
+	
+#     #############################################  }
+	
+#     while (my $line = <$ifh>) {
+#       next if $line =~ /^Bismark/;
+#       chomp $line;
+
+#       ########################################### m.a.bentley - (1) set the last_chr and last_pos variables early in the while loop, before the line split (2) removed unnecessary setting of same variables in if statement {
+
+#       $last_chr = $chr;
+#       $last_pos = $pos;
+#       ($name, $meth_state, $chr, $pos, $meth_state2) = split "\t", $line;
+
+#       if (($last_pos ne $pos) || ($last_chr ne $chr)) {
+# 	generate_output($last_chr,$last_pos) if $methylcalls[2] > 0;
+# 	@methylcalls = qw (0 0 0);
+#       }
+
+#       #############################################  }
+	
+#       my $validated = validate_methylation_call($meth_state, $meth_state2);
+#       unless($validated){
+# 	warn "Methylation state of sequence ($name) in file ($in) on line $. is inconsistent (meth_state is $meth_state, meth_state2 = $meth_state2)\n";
+# 	next;
+#       }
+#       if ($meth_state eq "+") {
+# 	$methylcalls[0]++;
+# 	$methylcalls[2]++;
+#       } else {
+# 	$methylcalls[1]++;
+# 	$methylcalls[2]++;
+#       }
+#     }
+
+#     ############################################# m.a.bentley - set the last_chr and last_pos variables for the last line in the file (outside the while loop's scope using the method i've implemented) {
+	
+#     $last_chr = $chr;
+#     $last_pos = $pos;
+#     if ($methylcalls[2] > 0) {
+#       generate_output($last_chr,$last_pos) if $methylcalls[2] > 0;
+#     }
+#     #############################################  }
+	
+#     close $ifh or die $!;
+	
+#     @methylcalls = qw (0 0 0); # resetting @methylcalls
+
+#     ### deleting temporary files
+#     my $delete = unlink $in;
+#     if ($delete) {
+#       warn "Successfully deleted the temporary input file $in\n\n";
+#     }
+#     else {
+#       warn "The temporary inputfile $in could not be deleted $!\n\n";
+#     }
+#   }
+# }
+
+# sub generate_output{
+#   my $methcount = $methylcalls[0];
+#   my $nonmethcount = $methylcalls[1];
+#   my $totalcount = $methylcalls[2];
+#   my $last_chr = shift;
+#   my $last_pos = shift;
+#   croak "Should not be generating output if there's no reads to this region" unless $totalcount > 0;
+#   croak "Total counts ($totalcount) is not the sum of the methylated ($methcount) and unmethylated ($nonmethcount) counts" if $totalcount != ($methcount + $nonmethcount);
+
+#   ############################################# m.a.bentley - declare a new variable 'bed_pos' to distinguish from bismark positions (-1) - previous scripts modified the last_pos variable earlier in the script leading to problems in meth % calculation {
+
+#   my $bed_pos = $last_pos -1; ### Bismark coordinates are 1 based whereas bedGraph coordinates are 0 based.
+#   my $meth_percentage;
+#   ($totalcount >= $coverage_threshold) ? ($meth_percentage = ($methcount/$totalcount) * 100) : ($meth_percentage = undef);
+#   # $meth_percentage =~ s/(\.\d\d).+$/$1/ unless $meth_percentage =~ /^Below/;
+#   if (defined $meth_percentage){
+#     if ($counts){
+#       print OUT "$last_chr\t$bed_pos\t$bed_pos\t$meth_percentage\t$methcount\t$nonmethcount\n";
+#     }
+#     else{
+#       print OUT "$last_chr\t$bed_pos\t$bed_pos\t$meth_percentage\n";
+#     }
+#   }
+#   #############################################  }
+# }
+
+# sub validate_methylation_call{
+#   my $meth_state = shift;
+#   croak "Missing (+/-) methylation call" unless defined $meth_state;
+#   my $meth_state2 = shift;
+#   croak "Missing alphabetical methylation call" unless defined $meth_state2;
+#   my $is_consistent;
+#   ($meth_state2 =~ /^z/i) ? ($is_consistent = check_CpG_methylation_call($meth_state, $meth_state2)) 
+#                           : ($is_consistent = check_nonCpG_methylation_call($meth_state,$meth_state2));
+#   return 1 if $is_consistent;
+#   return 0;
+# }
+
+# sub check_CpG_methylation_call{
+#   my $meth1 = shift;
+#   my $meth2 = shift;
+#   return 1 if($meth1 eq "+" && $meth2 eq "Z");
+#   return 1 if($meth1 eq "-" && $meth2 eq "z");
+#   return 0;
+# }
+
+# sub check_nonCpG_methylation_call{
+#   my $meth1 = shift;
+#   my $meth2 = shift;
+#   return 1 if($meth1 eq "+" && $meth2 eq "C");
+#   return 1 if($meth1 eq "+" && $meth2 eq "X");
+#   return 1 if($meth1 eq "+" && $meth2 eq "H");
+#   return 1 if($meth1 eq "-" && $meth2 eq "c");
+#   return 1 if($meth1 eq "-" && $meth2 eq "x");
+#   return 1 if($meth1 eq "-" && $meth2 eq "h");
+#   return 0;
+# }
+
+#######################################################################################################################################
+### bismark2bedGaph section - END
+#######################################################################################################################################
+
+
+
+
+
+
+# #######################################################################################################################################
+# ### genome-wide cytosine methylation report - START
+# #######################################################################################################################################
+
+### has now been moved to the external script bedGraph2cytosine
+
+# sub generate_genome_wide_cytosine_report {
+
+#   warn  "="x78,"\n";
+#   warn "Methylation information will now be written into a genome-wide cytosine report\n";
+#   warn  "="x78,"\n\n";
+#   sleep (2);
+
+#   ### changing to the output directory again
+#   unless ($output_dir eq ''){ # default
+#     chdir $output_dir or die "Failed to change directory to $output_dir\n";
+#     # warn "Changed directory to $output_dir\n";
+#   }
+
+#   my $in = shift;
+#   open (IN,$in) or die $!;
+
+#   my $cytosine_out = shift;
+
+#   if ($CX_context){
+#     $cytosine_out =~ s/$/genome-wide_CX_report.txt/;
+#   }
+#   else{
+#     $cytosine_out =~ s/$/genome_wide_CpG_report.txt/;
+#   }
+
+#   ### note: we are still in the folder: $output_dir, so we do not have to include this into the open commands
+#   unless ($split_by_chromosome){ ### writing all output to a single file (default)
+#     open (CYT,'>',$cytosine_out) or die $!;
+#     warn "Writing genome-wide cytosine report to: $cytosine_out\n";
+#     sleep (3);
+#   }
+
+#   my $last_chr;
+#   my %chr; # storing reads for one chromosome at a time
+
+#   my $count = 0;
+#   while (<IN>){
+#     chomp;
+#     ++$count;
+#     my ($chr,$start,$end,undef,$meth,$nonmeth) = (split /\t/);
+
+#     # defining the first chromosome
+#     unless (defined $last_chr){
+#       $last_chr = $chr;
+#       # warn "Storing all covered cytosine positions for chromosome: $chr\n";
+#     }
+
+#     if ($chr eq $last_chr){
+#       $chr{$chr}->{$start}->{meth} = $meth;
+#       $chr{$chr}->{$start}->{nonmeth} = $nonmeth;
+#     }
+#     else{
+#       warn "Writing cytosine reports for chromosome $last_chr (stored ",scalar keys %{$chr{$last_chr}}," different covered positions)\n";
+
+#       if ($split_by_chromosome){ ## writing output to 1 file per chromosome
+# 	my $chromosome_out = $cytosine_out;
+# 	$chromosome_out =~ s/txt$/chr${last_chr}.txt/;
+#       open (CYT,'>',$chromosome_out) or die $!;
+#     }
+
+#       while ( $chromosomes{$last_chr} =~ /([CG])/g){
+	
+# 	my $tri_nt = '';
+# 	my $context = '';
+# 	my $pos = pos$chromosomes{$last_chr};
+	
+# 	my $strand;
+# 	my $meth = 0;
+# 	my $nonmeth = 0;
+	
+# 	if ($1 eq 'C'){    # C on forward strand
+# 	  $tri_nt = substr ($chromosomes{$last_chr},($pos-1),3);   # positions are 0-based!
+# 	  $strand = '+';
+# 	}
+# 	elsif ($1 eq 'G'){ # C on reverse strand
+# 	  $tri_nt = substr ($chromosomes{$last_chr},($pos-3),3);   # positions are 0-based!
+# 	  $tri_nt = reverse $tri_nt;
+# 	  $tri_nt =~ tr/ACTG/TGAC/;
+# 	  $strand = '-';
+# 	}
+# 	next if (length$tri_nt < 3); # trinucleotide sequence could not be extracted
+
+# 	if (exists $chr{$last_chr}->{($pos-1)}){ # stored positions are 0-based!
+# 	  $meth =  $chr{$last_chr}->{$pos-1}->{meth};
+# 	  $nonmeth = $chr{$last_chr}->{$pos-1}->{nonmeth};
+# 	}
+
+# 	### determining cytosine context	
+# 	if ($tri_nt =~ /^CG/){
+# 	  $context = 'CG';
+# 	}
+# 	elsif ($tri_nt =~ /^C.{1}G$/){
+# 	  $context = 'CHG';
+# 	}
+# 	elsif ($tri_nt =~ /^C.{2}$/){
+# 	  $context = 'CHH';
+# 	}
+# 	else{ # if the context can't be determined the positions will not be printed (it will equally not have been reported by Bismark)
+# 	  warn "The sequence context could not be determined (found: '$tri_nt'). Skipping.\n";
+# 	  next;
+# 	}
+
+# 	if ($CpG_only){
+# 	  if ($tri_nt =~ /^CG/){ # CpG context is the default
+# 	    if ($zero){ # zero based coordinates
+# 	      $pos -= 1;
+# 	      print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
+# 	    }
+# 	    else{ # default
+# 	      print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
+# 	    }
+# 	  }
+# 	}
+# 	else{ ## all cytosines, specified with --CX
+# 	  if ($zero){ # zero based coordinates
+# 	    $pos -= 1;
+# 	    print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
+# 	  }
+# 	  else{ # default
+# 	    print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
+# 	  }
+# 	}
+#       }
+
+#       %chr = (); # resetting the hash
+
+#       # new first entry
+#       $last_chr = $chr;
+#       $chr{$chr}->{$start}->{meth} = $meth;
+#       $chr{$chr}->{$start}->{nonmeth} = $nonmeth;
+#     }
+#   }
+
+#   # Last found chromosome
+# warn "Writing cytosine reports for chromosome $last_chr (stored ",scalar keys %{$chr{$last_chr}}," different covered positions)\n";
+
+# if ($split_by_chromosome){ ## writing output to 1 file per chromosome
+#   my $chromosome_out = $cytosine_out;
+#   $chromosome_out =~ s/txt$/chr${last_chr}.txt/;
+#   open (CYT,'>',$chromosome_out) or die $!;
+# }
+
+#   while ( $chromosomes{$last_chr} =~ /([CG])/g){
+	
+#     my $tri_nt;
+#     my $context;
+#     my $pos = pos$chromosomes{$last_chr};
+
+#     my $strand;
+#     my $meth = 0;
+#     my $nonmeth = 0;
+
+#     if ($1 eq 'C'){    # C on forward strand
+#       $tri_nt = substr ($chromosomes{$last_chr},($pos-1),3);   # positions are 0-based!
+#       $strand = '+';
+#     }
+#     elsif ($1 eq 'G'){ # C on reverse strand
+#       $tri_nt = substr ($chromosomes{$last_chr},($pos-3),3);   # positions are 0-based!
+#       $tri_nt = reverse $tri_nt;
+#       $tri_nt =~ tr/ACTG/TGAC/;
+#       $strand = '-';
+#     }
+
+#     if (exists $chr{$last_chr}->{($pos-1)}){ # stored positions are 0-based!
+#       $meth =  $chr{$last_chr}->{$pos-1}->{meth};
+#       $nonmeth = $chr{$last_chr}->{$pos-1}->{nonmeth};
+#     }
+
+#     next if (length$tri_nt < 3); # trinucleotide sequence could not be extracted
+
+#     ### determining cytosine context	
+#     if ($tri_nt =~ /^CG/){
+#       $context = 'CG';
+#     }
+#     elsif ($tri_nt =~ /^C.{1}G$/){
+#       $context = 'CHG';
+#     }
+#     elsif ($tri_nt =~ /^C.{2}$/){
+#       $context = 'CHH';
+#     }
+#     else{ # if the context can't be determined the positions will not be printed (it will equally not have been reported by Bismark)
+#       warn "The cytosine context could not be determined (found: '$tri_nt'). Skipping.\n";
+#       next;
+#     }
+	
+#     if ($CpG_only){
+#       if ($tri_nt =~ /^CG/){ # CpG context is the default
+# 	if ($zero){ # zero-based coordinates
+# 	  $pos -= 1;
+# 	  print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
+# 	}
+# 	else{ # default
+# 	  print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
+# 	}
+#       }
+#     }
+#     else{ ## all cytosines, specified with --CX
+#       if ($zero){ # zero based coordinates
+# 	$pos -= 1;
+# 	print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
+#       }
+#       else{ # default
+# 	print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
+#       }
+#     }
+#   }
+#   close CYT or die $!;
+# }
+
+
+# sub read_genome_into_memory{
+
+#   ## reading in and storing the specified genome in the %chromosomes hash
+#   chdir ($genome_folder) or die "Can't move to $genome_folder: $!";
+#   warn "Now reading in and storing sequence information of the genome specified in: $genome_folder\n\n";
+
+#   my @chromosome_filenames =  <*.fa>;
+
+#   ### if there aren't any genomic files with the extension .fa we will look for files with the extension .fasta
+#   unless (@chromosome_filenames){
+#     @chromosome_filenames =  <*.fasta>;
+#   }
+#   unless (@chromosome_filenames){
+#     die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions)\n";
+#   }
+
+#   foreach my $chromosome_filename (@chromosome_filenames){
+
+#     # skipping the tophat entire mouse genome fasta file
+#     next if ($chromosome_filename eq 'Mus_musculus.NCBIM37.fa');
+
+#     open (CHR_IN,$chromosome_filename) or die "Failed to read from sequence file $chromosome_filename $!\n";
+#     ### first line needs to be a fastA header
+#     my $first_line = <CHR_IN>;
+#     chomp $first_line;
+#     $first_line =~ s/\r//; # removing /r carriage returns
+
+#     ### Extracting chromosome name from the FastA header
+#     my $chromosome_name = extract_chromosome_name($first_line);
+	
+#     my $sequence;
+#     while (<CHR_IN>){
+#       chomp;
+#       $_ =~ s/\r//; # removing /r carriage returns
+
+#       if ($_ =~ /^>/){
+# 	### storing the previous chromosome in the %chromosomes hash, only relevant for Multi-Fasta-Files (MFA)
+# 	if (exists $chromosomes{$chromosome_name}){
+# 	  warn "chr $chromosome_name (",length $sequence ," bp)\n";
+# 	  die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name!\n";
+# 	}
+# 	else {
+# 	  if (length($sequence) == 0){
+# 	    warn "Chromosome $chromosome_name in the multi-fasta file $chromosome_filename did not contain any sequence information!\n";
+# 	  }
+# 	  warn "chr $chromosome_name (",length $sequence ," bp)\n";
+# 	  $chromosomes{$chromosome_name} = $sequence;
+# 	}
+# 	### resetting the sequence variable
+# 	$sequence = '';
+# 	### setting new chromosome name
+# 	$chromosome_name = extract_chromosome_name($_);
+#       }
+#       else{
+# 	$sequence .= uc$_;
+#       }
+#     }
+
+#     if (exists $chromosomes{$chromosome_name}){
+#       warn "chr $chromosome_name (",length $sequence ," bp)\t";
+#       die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name.\n";
+#     }
+#     else{
+#       if (length($sequence) == 0){
+# 	warn "Chromosome $chromosome_name in the file $chromosome_filename did not contain any sequence information!\n";
+#       }
+#       warn "chr $chromosome_name (",length $sequence ," bp)\n";
+#       $chromosomes{$chromosome_name} = $sequence;
+#     }
+#   }
+#   warn "\n";
+#   chdir $parent_dir or die "Failed to move to directory $parent_dir\n";
+# }
+
+# sub extract_chromosome_name {
+#   ## Bowtie extracts the first string after the inition > in the FASTA file, so we are doing this as well
+#   my $fasta_header = shift;
+#   if ($fasta_header =~ s/^>//){
+#     my ($chromosome_name) = split (/\s+/,$fasta_header);
+#     return $chromosome_name;
+#   }
+#   else{
+#     die "The specified chromosome ($fasta_header) file doesn't seem to be in FASTA format as required!\n";
+#   }
+# }
+
+# #######################################################################################################################################
+# ### genome-wide cytosine methylation report - END
+# #######################################################################################################################################
+
+
+
+
+sub print_helpfile{
+
+ print << 'HOW_TO';
+
+
+DESCRIPTION
+
+The following is a brief description of all options to control the Bismark
+methylation extractor. The script reads in a bisulfite read alignment results file 
+produced by the Bismark bisulfite mapper and extracts the methylation information
+for individual cytosines. This information is found in the methylation call field
+which can contain the following characters:
+
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+       ~~~   X   for methylated C in CHG context (was protected)     ~~~
+       ~~~   x   for not methylated C CHG (was converted)            ~~~
+       ~~~   H   for methylated C in CHH context (was protected)     ~~~
+       ~~~   h   for not methylated C in CHH context (was converted) ~~~
+       ~~~   Z   for methylated C in CpG context (was protected)     ~~~
+       ~~~   z   for not methylated C in CpG context (was converted) ~~~
+       ~~~   .   for any bases not involving cytosines               ~~~
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The methylation extractor outputs result files for cytosines in CpG, CHG and CHH
+context (this distinction is actually already made in Bismark itself). As the methylation
+information for every C analysed can produce files which easily have tens or even hundreds of
+millions of lines, file sizes can become very large and more difficult to handle. The C
+methylation info additionally splits cytosine methylation calls up into one of the four possible
+strands a given bisulfite read aligned against:
+
+             OT      original top strand
+             CTOT    complementary to original top strand
+
+             OB      original bottom strand
+             CTOB    complementary to original bottom strand
+
+Thus, by default twelve individual output files are being generated per input file (unless
+--comprehensive is specified, see below). The output files can be imported into a genome
+viewer, such as SeqMonk, and re-combined into a single data group if desired (in fact
+unless the bisulfite reads were generated preserving directionality it doesn't make any
+sense to look at the data in a strand-specific manner). Strand-specific output files can
+optionally be skipped, in which case only three output files for CpG, CHG or CHH context
+will be generated. For both the strand-specific and comprehensive outputs there is also
+the option to merge both non-CpG contexts (CHG and CHH) into one single non-CpG context.
+
+
+The output files are in the following format (tab delimited):
+
+<sequence_id>     <strand>      <chromosome>     <position>     <methylation call>
+
+
+USAGE: methylation_extractor [options] <filenames>
+
+
+ARGUMENTS:
+
+<filenames>              A space-separated list of Bismark result files in SAM format from
+                         which methylation information is extracted for every cytosine in
+                         the reads. For alignment files in the older custom Bismark output
+                         see option '--vanilla'.
+
+OPTIONS:
+
+-s/--single-end          Input file(s) are Bismark result file(s) generated from single-end
+                         read data. Specifying either --single-end or --paired-end is
+                         mandatory.
+
+-p/--paired-end          Input file(s) are Bismark result file(s) generated from paired-end
+                         read data. Specifying either --paired-end or --single-end is
+                         mandatory.
+
+--vanilla                The Bismark result input file(s) are in the old custom Bismark format
+                         (up to version 0.5.x) and not in SAM format which is the default as
+                         of Bismark version 0.6.x or higher. Default: OFF.
+
+--no_overlap             For paired-end reads it is theoretically possible that read_1 and
+                         read_2 overlap. This option avoids scoring overlapping methylation
+                         calls twice (only methylation calls of read 1 are used for in the process
+                         since read 1 has historically higher quality basecalls than read 2).
+                         Whilst this option removes a bias towards more methylation calls
+                         in the center of sequenced fragments it may de facto remove a sizable
+                         proportion of the data. This option is highly recommended for paired-end
+                         data.
+
+--ignore <int>           Ignore the first <int> bp at the 5' end of each read when processing the
+                         methylation call string. This can remove e.g. a restriction enzyme site
+                         at the start of each read.
+
+--comprehensive          Specifying this option will merge all four possible strand-specific 
+                         methylation info into context-dependent output files. The default 
+                         contexts are:
+                          - CpG context
+                          - CHG context
+                          - CHH context
+
+--merge_non_CpG          This will produce two output files (in --comprehensive mode) or eight
+                         strand-specific output files (default) for Cs in
+                          - CpG context
+                          - non-CpG context
+
+--report                 Prints out a short methylation summary as well as the paramaters used to run
+                         this script.
+
+--no_header              Suppresses the Bismark version header line in all output files for more convenient
+                         batch processing.
+
+-o/--output DIR          Allows specification of a different output directory (absolute or relative
+                         path). If not specified explicitely, the output will be written to the current directory.
+
+--samtools_path          The path to your Samtools installation, e.g. /home/user/samtools/. Does not need to be specified
+                         explicitly if Samtools is in the PATH already.
+
+--gzip                   The methylation extractor files (CpG_OT_..., CpG_OB_... etc) will be written out in
+                         a GZIP compressed form to save disk space. This option does not work on bedGraph and
+                         genome-wide cytosine reports as they are 'tiny' anyway.
+
+--version                Displays version information.
+
+-h/--help                Displays this help file and exits.
+
+
+
+bedGraph specific options:
+
+--bedGraph               After finishing the methylation extraction, the methylation output is written into a
+                         sorted bedGraph file that reports the position of a given cytosine and its methylation 
+                         state (in %, see details below). The methylation extractor output is temporarily split up into
+                         temporary files, one per chromosome (written into the current directory or folder
+                         specified with -o/--output); these temp files are then used for sorting and deleted
+                         afterwards. By default, only cytosines in CpG context will be sorted. The option
+                         '--CX_context' may be used to report all cytosines irrespective of sequence context
+                         (this will take MUCH longer!). The default folder for temporary files during the sorting
+                         process is the output directory. The bedGraph conversion step is performed by the external
+                         module 'bismark2bedGraph'; this script needs to reside in the same folder as the 
+                         bismark_methylation_extractor itself.
+
+
+--cutoff [threshold]     The minimum number of times a methylation state has to be seen for that nucleotide
+                         before its methylation percentage is reported. Default: 1.
+
+--remove_spaces          Replaces whitespaces in the sequence ID field with underscores to allow sorting.
+
+
+--counts                 Adds two additional columns to the output file to enable further calculations:
+                             col 5: number of methylated calls
+                             col 6: number of unmethylated calls
+                         This option is required if '--cytosine_report' is specified (and will be set automatically if
+                         necessary).
+
+--CX/--CX_context        The sorted bedGraph output file contains information on every single cytosine that was covered
+                         in the experiment irrespective of its sequence context. This applies to both forward and
+                         reverse strands. Please be aware that this option may generate large temporary and output files
+                         and may take a long time to sort (up to many hours). Default: OFF.
+                         (i.e. Default = CpG context only).
+
+--buffer_size <string>   This allows you to specify the main memory sort buffer when sorting the methylation information.
+                         Either specify a percentage of physical memory by appending % (e.g. --buffer_size 50%) or
+			 a multiple of 1024 bytes, e.g. 'K' multiplies by 1024, 'M' by 1048576 and so on for 'T' etc. 
+                         (e.g. --buffer_size 20G). For more information on sort type 'info sort' on a command line.
+                         Defaults to 2G.
+
+
+Genome-wide cytosine methylation report specific options:
+
+--cytosine_report        After the conversion to bedGraph has completed, the option '--cytosine_report' produces a
+                         genome-wide methylation report for all cytosines in the genome. By default, the output uses 1-based
+                         chromosome coordinates (zero-based cords are optional) and reports CpG context only (all
+                         cytosine context is optional). The output considers all Cs on both forward and reverse strands and
+                         reports their position, strand, trinucleotide content and methylation state (counts are 0 if not
+                         covered). The cytsoine report conversion step is performed by the external module 
+                         'bedGraph2cytosine'; this script needs to reside in the same folder as the bismark_methylation_extractor
+                         itself.
+
+--CX/--CX_context        The output file contains information on every single cytosine in the genome irrespective of
+                         its context. This applies to both forward and reverse strands. Please be aware that this will
+                         generate output files with > 1.1 billion lines for a mammalian genome such as human or mouse.
+                         Default: OFF (i.e. Default = CpG context only).
+
+--zero_based             Uses zero-based coordinates like used in e.g. bed files instead of 1-based coordinates. Default: OFF.
+
+--genome_folder <path>   Enter the genome folder you wish to use to extract sequences from (full path only). Accepted
+                         formats are FastA files ending with '.fa' or '.fasta'. Specifying a genome folder path is mandatory.
+
+--split_by_chromosome    Writes the output into individual files for each chromosome instead of a single output file. Files
+                         will be named to include the input filename and the chromosome number.
+
+
+
+OUTPUT:
+
+The bismark_methylation_extractor output is in the form:
+========================================================
+<seq-ID>  <methylation state*>  <chromosome>  <start position (= end position)>  <methylation call>
+
+* Methylated cytosines receive a '+' orientation,
+* Unmethylated cytosines receive a '-' orientation.
+
+
+
+The bedGraph output (optional) looks like this (tab-delimited):
+===============================================================
+<chromosome>  <start position>  <end position>  <methylation percentage>
+
+The bedGraph output with '--counts' specified looks like this (tab-delimited):
+
+<chromosome>  <start position>  <end position>  <methylation percentage>  <count methylated>  <count non-methylated>
+
+
+
+The genome-wide cytosine methylation output file is tab-delimited in the following format:
+==========================================================================================
+<chromosome>  <position>  <strand>  <count methylated>  <count non-methylated>  <C-context>  <trinucleotide context>
+
+
+
+This script was last modified on 21 April 2013.
+
+HOW_TO
+}
author	bgruening
date	Sat, 06 Jul 2013 09:57:36 -0400
parents
children	91f07ff056ca