changeset 11:6479112a673b draft

Uploaded
author fcaramia
date Wed, 12 Dec 2012 19:46:06 -0500
parents 2432df265dad
children 109dcafe9aa6
files methylation_analysis_bismark/methylation_analysis/methylation_extractor
diffstat 1 files changed, 2570 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/methylation_analysis_bismark/methylation_analysis/methylation_extractor	Wed Dec 12 19:46:06 2012 -0500
@@ -0,0 +1,2570 @@
+#!/usr/bin/perl
+use warnings;
+use strict;
+$|++;
+use Getopt::Long;
+use Cwd;
+
+my @filenames;
+my %counting;
+my $parent_dir = getcwd();
+
+my %fhs;
+
+my $version = 'v0.7.6';
+my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla) = process_commandline();
+
+process_Bismark_results_file();
+
+sub process_commandline{
+  my $help;
+  my $single_end;
+  my $paired_end;
+  my $ignore;
+  my $genomic_fasta;
+  my $full;
+  my $report;
+  my $extractor_version;
+  my $no_overlap;
+  my $merge_non_CpG;
+  my $vanilla;
+
+  my $command_line = GetOptions ('help|man' => \$help,
+				 'p|paired-end' => \$paired_end,
+				 's|single-end' => \$single_end,
+				 'fasta' => \$genomic_fasta,
+				 'ignore=i' => \$ignore,
+				 'comprehensive' => \$full,
+				 'report' => \$report,
+				 'version' => \$extractor_version,
+				 'no_overlap' => \$no_overlap,
+				 'merge_non_CpG' => \$merge_non_CpG, 
+				 'vanilla' => \$vanilla,
+				);
+
+  ### EXIT ON ERROR if there were errors with any of the supplied options
+  unless ($command_line){
+    die "Please respecify command line options\n";
+  }
+
+  ### HELPFILE
+  if ($help){
+    print_helpfile();
+    exit;
+  }
+
+  if ($extractor_version){
+    print << "VERSION";
+
+
+                           Bismark Methylation Extractor
+
+   Bismark Extractor Version: $version Copyright 2010-12 Felix Krueger, Babraham Bioinformatics
+                www.bioinformatics.babraham.ac.uk/projects/bismark/
+
+
+VERSION
+    exit;
+  }
+
+
+  ### no files provided
+  unless (@ARGV){
+    die "You need to provide one or more Bismark files to create an individual C methylation output. Please respecify!\n";
+  }
+  @filenames = @ARGV;
+
+  warn "\n *** Bismark methylation extractor version $version ***\n\n";
+
+  ### IGNORING <INT> bases at the start of the read when processing the methylation call string
+  unless ($ignore){
+    $ignore = 0;
+  }
+  ### PRINT A REPORT
+  unless ($report){
+    $report = 0;
+  }
+
+  ### OLD (VANILLA) OUTPUT FORMAT
+  unless ($vanilla){
+    $vanilla = 0;
+  }
+
+  if ($single_end){
+    $paired_end = 0;   ### SINGLE END ALIGNMENTS
+  }
+  elsif ($paired_end){
+    $single_end = 0;   ### PAIRED-END ALIGNMENTS
+  }
+  else{
+    die "Please specify whether the supplied file(s) are in Bismark single-end or paired-end format\n\n";
+  }
+
+  ### NO OVERLAP
+  if ($no_overlap){
+    die "The option '--no_overlap' can only be specified for paired-end input!\n" unless ($paired_end);
+  }
+  else{
+    $no_overlap = 0;
+  }
+
+  ### COMPREHENSIVE OUTPUT
+  unless ($full){
+    $full = 0;
+  }
+
+  ### MERGE NON-CpG context
+  unless ($merge_non_CpG){
+    $merge_non_CpG = 0;
+  }
+
+  return ($ignore,$genomic_fasta,$single_end,$paired_end,$full,$report,$no_overlap,$merge_non_CpG,$vanilla);
+}
+
+
+sub process_Bismark_results_file{
+
+  if ($single){
+    if ($vanilla){
+      warn "Bismark Single-end vanilla format specified\n";
+    }
+    else{
+      warn "Bismark Single-end SAM format specified (default)\n"; # default
+    }
+  }
+  elsif ($paired){
+    if ($vanilla){
+      warn "Bismark Paired-end vanilla format specified\n";
+    }
+    else{
+      warn "Bismark Paired-end SAM format specified (default)\n"; # default
+    }
+  }
+
+  if ($ignore){
+    warn "First $ignore bases will be disregarded when processing the methylation call string\n";
+  }
+
+  if ($full){
+    warn "Strand-specific outputs will be skipped. Separate output files for cytosines in CpG, CHG and CHH context will be generated\n";
+  }
+  if ($merge_non_CpG){
+    warn "Merge CHG and CHH context to non-CpG context specified\n";
+  }
+  warn "\n";
+
+  sleep (3);
+
+  foreach my $filename (@filenames){
+    %fhs = ();
+    %counting =(
+		total_meCHG_count => 0,
+		total_meCHH_count => 0,
+		total_meCpG_count => 0,
+		total_unmethylated_CHG_count => 0,
+		total_unmethylated_CHH_count => 0,
+		total_unmethylated_CpG_count => 0,
+		sequences_count => 0,
+	       );
+    warn "\nNow reading in Bismark result file $filename\n\n";
+
+    if ($filename =~ /\.gz$/){
+      open (IN,"zcat $filename |") or die "Can't open gzipped file $filename: $!\n";
+    }
+    else{
+      open (IN,$filename) or die "Can't open file $filename: $!\n";
+    }
+
+    ### Vanilla and SAM output need to read different numbers of header lines
+    if ($vanilla){
+      my $bismark_version = <IN>; ## discarding the Bismark version info
+      chomp $bismark_version;
+      $bismark_version =~ s/\r//; # replaces \r line feed
+      $bismark_version =~  s/Bismark version: //;
+      if ($bismark_version =~ /^\@/){
+	warn "Detected \@ as the first character of the version information. Is it possible that the file is in SAM format?\n\n";
+	sleep (2);
+      }
+
+      unless ($version eq $bismark_version){
+	die "The methylation extractor and Bismark itself need to be of the same version!\n\nVersions used:\nmethylation extractor: '$version'\nBismark: '$bismark_version'\n";
+      }
+    }
+    else{
+      # If the read is in SAM format (default) it can either start with @ header lines or start with alignments directly.
+      # We are reading from it further down
+    }
+
+    my $output_filename = (split (/\//,$filename))[-1];
+
+    ### OPENING OUTPUT-FILEHANDLES
+    if ($report){
+      my $report_filename = $output_filename;
+      $report_filename =~ s/$/_splitting_report.txt/;
+      open (REPORT,'>',$report_filename) or die "Failed to write to file $report_filename $!\n";
+    }
+
+    if ($report){
+      print REPORT "$output_filename\n\n";
+      print REPORT "Parameters used to extract methylation information:\n";
+      if ($paired){
+	if ($vanilla){
+	  print REPORT "Bismark result file: paired-end (vanilla Bismark format)\n";
+	}
+	else{
+	  print REPORT "Bismark result file: paired-end (SAM format)\n"; # default
+	}
+      }
+
+      if ($single){
+	if ($vanilla){
+	  print REPORT "Bismark result file: single-end (vanilla Bismark format)\n";
+	}
+	else{
+	  print REPORT "Bismark result file: single-end (SAM format)\n"; # default
+	}
+      }
+
+      if ($ignore){
+	print REPORT "Ignoring first $ignore bases\n";
+      }
+
+      if ($full){
+	print REPORT "Output specified: comprehensive\n";
+      }
+      else{
+	print REPORT "Output specified: strand-specific (default)\n";
+      }
+
+      if ($no_overlap){
+	print REPORT "No overlapping methylation calls specified\n";
+      }
+      if ($genomic_fasta){
+	print REPORT "Genomic equivalent sequences will be printed out in FastA format\n";
+      }
+      if ($merge_non_CpG){
+	print REPORT "Methylation in CHG and CHH context will be merged into \"non-CpG context\" output\n";
+      }
+
+      print REPORT "\n";
+    }
+
+    ### CpG-context and non-CpG context. THIS SECTION IS OPTIONAL
+
+    ### if --comprehensive AND --merge_non_CpG was specified we are only writing out one CpG-context and one Any-Other-context result file
+    if ($full and $merge_non_CpG){
+      my $cpg_output = my $other_c_output = $output_filename;
+      ### C in CpG context
+      $cpg_output =~ s/^/CpG_context_/;
+      $cpg_output =~ s/$/.txt/ unless ($cpg_output =~ /\.txt$/);
+      open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n";
+      print "Writing result file containing methylation information for C in CpG context to $cpg_output\n";
+      print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n";
+
+      ### C in any other context than CpG
+      $other_c_output =~ s/^/Non_CpG_context_/;
+      $other_c_output =~ s/$/.txt/ unless ($other_c_output =~ /\.txt$/);
+      open ($fhs{other_context},'>',$other_c_output) or die "Failed to write to $other_c_output $!\n";
+      print "Writing result file containing methylation information for C in any other context to $other_c_output\n";
+      print {$fhs{other_context}} "Bismark methylation extractor version $version\n";
+    }
+
+    ### if only --merge_non_CpG was specified we will write out 8 different output files, depending on where the (first) unique best alignment has been found
+    elsif ($merge_non_CpG){
+
+      my $cpg_ot = my $cpg_ctot = my $cpg_ctob = my $cpg_ob = $output_filename;
+
+      ### For cytosines in CpG context
+      $cpg_ot =~ s/^/CpG_OT_/;
+      $cpg_ot =~ s/$/.txt/ unless ($cpg_ot =~ /\.txt$/);
+      open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n";
+      print "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n";
+      print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n";
+
+      $cpg_ctot =~ s/^/CpG_CTOT_/;
+      $cpg_ctot =~ s/$/.txt/ unless ($cpg_ctot =~ /\.txt$/);
+      open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n";
+      print "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n";
+      print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n";
+
+      $cpg_ctob =~ s/^/CpG_CTOB_/;
+      $cpg_ctob =~ s/$/.txt/ unless ($cpg_ctob =~ /\.txt$/);
+      open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n";
+      print "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n";
+      print {$fhs{2}->{CpG}}  "Bismark methylation extractor version $version\n";
+
+      $cpg_ob =~ s/^/CpG_OB_/;
+      $cpg_ob =~ s/$/.txt/ unless ($cpg_ob =~ /\.txt$/);
+      open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n";
+      print "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n";
+      print {$fhs{3}->{CpG}}  "Bismark methylation extractor version $version\n";
+
+      ### For cytosines in Non-CpG (CC, CT or CA) context
+      my $other_c_ot = my $other_c_ctot = my $other_c_ctob = my $other_c_ob = $output_filename;
+
+      $other_c_ot =~ s/^/Non_CpG_OT_/;
+      $other_c_ot =~ s/$/.txt/ unless ($other_c_ot =~ /\.txt$/);
+      open ($fhs{0}->{other_c},'>',$other_c_ot) or die "Failed to write to $other_c_ot $!\n";
+      print "Writing result file containing methylation information for C in any other context from the original top strand to $other_c_ot\n";
+      print {$fhs{0}->{other_c}} "Bismark methylation extractor version $version\n";
+
+      $other_c_ctot =~ s/^/Non_CpG_CTOT_/;
+      $other_c_ctot =~ s/$/.txt/ unless ($other_c_ctot =~ /\.txt$/);
+      open ($fhs{1}->{other_c},'>',$other_c_ctot) or die "Failed to write to $other_c_ctot $!\n";
+      print "Writing result file containing methylation information for C in any other context from the complementary to original top strand to $other_c_ctot\n";
+      print {$fhs{1}->{other_c}} "Bismark methylation extractor version $version\n";
+
+      $other_c_ctob =~ s/^/Non_CpG_CTOB_/;
+      $other_c_ctob =~ s/$/.txt/ unless ($other_c_ctob =~ /\.txt$/);
+      open ($fhs{2}->{other_c},'>',$other_c_ctob) or die "Failed to write to $other_c_ctob $!\n";
+      print "Writing result file containing methylation information for C in any other context from the complementary to original bottom strand to $other_c_ctob\n";
+      print {$fhs{2}->{other_c}} "Bismark methylation extractor version $version\n";
+
+      $other_c_ob =~ s/^/Non_CpG_OB_/;
+      $other_c_ob =~ s/$/.txt/ unless ($other_c_ob =~ /\.txt$/);
+      open ($fhs{3}->{other_c},'>',$other_c_ob) or die "Failed to write to $other_c_ob $!\n";
+      print "Writing result file containing methylation information for C in any other context from the original bottom strand to $other_c_ob\n\n";
+      print {$fhs{3}->{other_c}} "Bismark methylation extractor version $version\n";
+    }
+
+    ### THIS SECTION IS THE DEFAULT (CpG, CHG and CHH context)
+
+    ### if --comprehensive was specified we are only writing one file per context
+    elsif ($full){
+      my $cpg_output = my $chg_output =  my $chh_output = $output_filename;
+      ### C in CpG context
+      $cpg_output =~ s/^/CpG_context_/;
+      $cpg_output =~ s/$/.txt/ unless ($cpg_output =~ /\.txt$/);
+      open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n";
+      print "Writing result file containing methylation information for C in CpG context to $cpg_output\n";
+      print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n";
+
+      ### C in CHG context
+      $chg_output =~ s/^/CHG_context_/;
+      $chg_output =~ s/$/.txt/ unless ($chg_output =~ /\.txt$/);
+      open ($fhs{CHG_context},'>',$chg_output) or die "Failed to write to $chg_output $!\n";
+      print "Writing result file containing methylation information for C in CHG context to $chg_output\n";
+      print {$fhs{CHG_context}} "Bismark methylation extractor version $version\n";
+
+      ### C in CHH context
+      $chh_output =~ s/^/CHH_context_/;
+      $chh_output =~ s/$/.txt/ unless ($chh_output =~ /\.txt$/);
+      open ($fhs{CHH_context},'>',$chh_output) or die "Failed to write to $chh_output $!\n";
+      print "Writing result file containing methylation information for C in CHH context to $chh_output\n";
+      print {$fhs{CHH_context}} "Bismark methylation extractor version $version\n";   }
+
+    ### else we will write out 12 different output files, depending on where the (first) unique best alignment was found
+    else{
+      my $cpg_ot = my $cpg_ctot = my $cpg_ctob = my $cpg_ob = $output_filename;
+
+      ### For cytosines in CpG context
+      $cpg_ot =~ s/^/CpG_OT_/;
+      $cpg_ot =~ s/$/.txt/ unless ($cpg_ot =~ /\.txt$/);
+      open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n";
+      print "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n";
+      print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n";
+
+      $cpg_ctot =~ s/^/CpG_CTOT_/;
+      $cpg_ctot =~ s/$/.txt/ unless ($cpg_ctot =~ /\.txt$/);
+      open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n";
+      print "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n";
+      print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n";
+
+      $cpg_ctob =~ s/^/CpG_CTOB_/;
+      $cpg_ctob =~ s/$/.txt/ unless ($cpg_ctob =~ /\.txt$/);
+      open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n";
+      print "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n";
+      print {$fhs{2}->{CpG}}  "Bismark methylation extractor version $version\n";
+
+      $cpg_ob =~ s/^/CpG_OB_/;
+      $cpg_ob =~ s/$/.txt/ unless ($cpg_ob =~ /\.txt$/);
+      open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n";
+      print "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n";
+      print {$fhs{3}->{CpG}}  "Bismark methylation extractor version $version\n";
+
+      ### For cytosines in CHG context
+      my $chg_ot = my $chg_ctot = my $chg_ctob = my $chg_ob = $output_filename;
+
+      $chg_ot =~ s/^/CHG_OT_/;
+      $chg_ot =~ s/$/.txt/ unless ($chg_ot =~ /\.txt$/);
+      open ($fhs{0}->{CHG},'>',$chg_ot) or die "Failed to write to $chg_ot $!\n";
+      print "Writing result file containing methylation information for C in CHG context from the original top strand to $chg_ot\n";
+      print {$fhs{0}->{CHG}} "Bismark methylation extractor version $version\n";
+
+      $chg_ctot =~ s/^/CHG_CTOT_/;
+      $chg_ctot =~ s/$/.txt/ unless ($chg_ctot =~ /\.txt$/);
+      open ($fhs{1}->{CHG},'>',$chg_ctot) or die "Failed to write to $chg_ctot $!\n";
+      print "Writing result file containing methylation information for C in CHG context from the complementary to original top strand to $chg_ctot\n";
+      print {$fhs{1}->{CHG}} "Bismark methylation extractor version $version\n";
+
+      $chg_ctob =~ s/^/CHG_CTOB_/;
+      $chg_ctob =~ s/$/.txt/ unless ($chg_ctob =~ /\.txt$/);
+      open ($fhs{2}->{CHG},'>',$chg_ctob) or die "Failed to write to $chg_ctob $!\n";
+      print "Writing result file containing methylation information for C in CHG context from the complementary to original bottom strand to $chg_ctob\n";
+      print {$fhs{2}->{CHG}} "Bismark methylation extractor version $version\n";
+
+      $chg_ob =~ s/^/CHG_OB_/;
+      $chg_ob =~ s/$/.txt/ unless ($chg_ob =~ /\.txt$/);
+      open ($fhs{3}->{CHG},'>',$chg_ob) or die "Failed to write to $chg_ob $!\n";
+      print "Writing result file containing methylation information for C in CHG context from the original bottom strand to $chg_ob\n\n";
+      print {$fhs{3}->{CHG}} "Bismark methylation extractor version $version\n";
+
+      ### For cytosines in CHH context
+      my $chh_ot = my $chh_ctot = my $chh_ctob = my $chh_ob = $output_filename;
+
+      $chh_ot =~ s/^/CHH_OT_/;
+      $chh_ot =~ s/$/.txt/ unless ($chh_ot =~ /\.txt$/);
+      open ($fhs{0}->{CHH},'>',$chh_ot) or die "Failed to write to $chh_ot $!\n";
+      print "Writing result file containing methylation information for C in CHH context from the original top strand to $chh_ot\n";
+      print {$fhs{0}->{CHH}} "Bismark methylation extractor version $version\n";
+
+      $chh_ctot =~ s/^/CHH_CTOT_/;
+      $chh_ctot =~ s/$/.txt/ unless ($chh_ctot =~ /\.txt$/);
+      open ($fhs{1}->{CHH},'>',$chh_ctot) or die "Failed to write to $chh_ctot $!\n";
+      print "Writing result file containing methylation information for C in CHH context from the complementary to original top strand to $chh_ctot\n";
+      print {$fhs{1}->{CHH}} "Bismark methylation extractor version $version\n";
+
+      $chh_ctob =~ s/^/CHH_CTOB_/;
+      $chh_ctob =~ s/$/.txt/ unless ($chh_ctob =~ /\.txt$/);
+      open ($fhs{2}->{CHH},'>',$chh_ctob) or die "Failed to write to $chh_ctob $!\n";
+      print "Writing result file containing methylation information for C in CHH context from the complementary to original bottom strand to $chh_ctob\n";
+      print {$fhs{2}->{CHH}} "Bismark methylation extractor version $version\n";
+
+      $chh_ob =~ s/^/CHH_OB_/;
+      $chh_ob =~ s/$/.txt/ unless ($chh_ob =~ /\.txt$/);
+      open ($fhs{3}->{CHH},'>',$chh_ob) or die "Failed to write to $chh_ob $!\n";
+      print "Writing result file containing methylation information for C in CHH context from the original bottom strand to $chh_ob\n\n";
+      print {$fhs{3}->{CHH}} "Bismark methylation extractor version $version\n";
+    }
+
+    my $methylation_call_strings_processed = 0;
+    my $line_count = 0;
+
+    ### proceeding differently now for single-end or paired-end Bismark files
+
+    ### PROCESSING SINGLE-END RESULT FILES
+    if ($single){
+
+      ### also proceeding differently now for SAM format or vanilla Bismark format files
+      if ($vanilla){ # old vanilla Bismark output format
+	while (<IN>){
+	  ++$line_count;
+	  warn "Processed lines: $line_count\n" if ($line_count%500000==0);
+	
+	  ### $seq here is the chromosomal sequence (to use for the repeat analysis for example)
+	  my ($id,$strand,$chrom,$start,$seq,$meth_call,$read_conversion,$genome_conversion) = (split("\t"))[0,1,2,3,6,7,8,9];
+	
+	  ### we need to remove 2 bp of the genomic sequence as we were extracting read + 2bp long fragments to make a methylation call at the first or
+	  ### last position
+	  chomp $genome_conversion;
+
+	  my $index;
+	  if ($meth_call){
+
+	    if ($read_conversion eq 'CT' and $genome_conversion eq 'CT'){ ## original top strand
+	      $index = 0;
+	    }	
+	    elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT'){ ## complementary to original top strand
+	      $index = 1;
+	    }
+	    elsif ($read_conversion eq 'CT' and $genome_conversion eq 'GA'){ ## original bottom strand
+	      $index = 3;
+	    }
+	    elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA'){ ## complementary to original bottom strand
+	      $index = 2;
+	    }
+	    else {
+	      die "Unexpected combination of read and genome conversion: '$read_conversion' / '$genome_conversion'\n";
+	    }
+	
+	    ### Clipping off the first <int> number of bases from the methylation call string as specified with --ignore <int>
+	    if ($ignore){
+	      $meth_call = substr($meth_call,$ignore,length($meth_call)-$ignore);	
+	
+	      ### If we are clipping off some bases at the start we need to adjust the start position of the alignments accordingly!
+	      if ($strand eq '+'){
+		$start += $ignore;
+	      }
+	      elsif ($strand eq '-'){
+		$start += length($meth_call)-1; ## $meth_call is already shortened!
+	      }
+	      else {
+		die "Alignment did not have proper strand information: $strand\n";
+	      }
+	    }
+	    ### printing out the methylation state of every C in the read
+	    print_individual_C_methylation_states_single_end($meth_call,$chrom,$start,$id,$strand,$index);
+	
+	    ++$methylation_call_strings_processed; # 1 per single-end result
+	  }
+	}
+      }
+      else{ # processing single-end SAM format (default)
+	while (<IN>){
+	  ### SAM format can either start with header lines (starting with @) or start with alignments directly
+	  if (/^\@/){ # skipping header lines (starting with @)
+	    warn "skipping SAM header line:\t$_";
+	    next;
+	  }
+
+	  ++$line_count;
+	  warn "Processed lines: $line_count\n" if ($line_count%500000==0);
+	
+	  # example read in SAM format
+	  # 1_R1/1	67	5	103172224	255	40M	=	103172417	233	AATATTTTTTTTATTTTAAAATGTGTATTGATTTAAATTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:4	XX:Z:4T1T24TT7	XM:Z:....h.h........................hh.......	XR:Z:CT	XG:Z:CT
+	  ###
+
+	  # < 0.7.6 my ($id,$chrom,$start,$meth_call,$read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
+	  # < 0.7.6 $meth_call =~ s/^XM:Z://;
+	  # < 0.7.6 $read_conversion =~ s/^XR:Z://;
+	  # < 0.7.6 $genome_conversion =~ s/^XG:Z://;	
+
+	  my ($id,$chrom,$start,$cigar) = (split("\t"))[0,2,3,5];
+
+	  ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
+	  my $meth_call;  ### Thanks to Zachary Zeno for this solution
+	  my $read_conversion;
+	  my $genome_conversion;
+	
+	  while ( /(XM|XR|XG):Z:([^\t]+)/g ) {
+	    my $tag = $1;
+	    my $value = $2;
+
+	    if ($tag eq "XM"){
+	      $meth_call = $value;
+	      $meth_call =~ s/\r//;
+	    }
+	    elsif ($tag eq "XR") {
+	      $read_conversion = $value;
+	      $read_conversion =~ s/\r//;
+	    }
+	    elsif ($tag eq "XG") {
+	      $genome_conversion = $value;
+	      $genome_conversion =~ s/\r//;
+	    }
+	  }
+
+	  my $strand;
+	  chomp $genome_conversion;
+	  # print "$meth_call\n$read_conversion\n$genome_conversion\n";
+	
+	  my $index;
+	  if ($meth_call){
+	    if ($read_conversion eq 'CT' and $genome_conversion eq 'CT'){ ## original top strand
+	      $index = 0;
+	      $strand = '+';
+	    }	
+	    elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT'){ ## complementary to original top strand
+	      $index = 1;
+	      $strand = '-';
+	    }
+	    elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA'){ ## complementary to original bottom strand
+	      $index = 2;
+	      $strand = '+';
+	    }
+	    elsif ($read_conversion eq 'CT' and $genome_conversion eq 'GA'){ ## original bottom strand
+	      $index = 3;
+	      $strand = '-';
+	    }
+	    else {
+	      die "Unexpected combination of read and genome conversion: '$read_conversion' / '$genome_conversion'\n";
+	    }
+	
+	    ### If the read is in SAM format we need to reverse the methylation call if the read has been reverse-complemented for the output
+	    if ($strand eq '-'){
+	      $meth_call = reverse $meth_call;
+	    }
+	
+	    ### Clipping off the first <int> number of bases from the methylation call string as specified with --ignore <int>
+	    if ($ignore){
+	      # print "\n\n$meth_call\n";
+	      $meth_call = substr($meth_call,$ignore,length($meth_call)-$ignore);	
+	      # print "$meth_call\n";
+	      ### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly
+
+	      my @len = split (/\D+/,$cigar); # storing the length per operation
+	      my @ops = split (/\d+/,$cigar); # storing the operation
+	      shift @ops; # remove the empty first element
+	      die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+		
+	      my @comp_cigar; # building an array with all CIGAR operations
+	      foreach my $index (0..$#len){
+		foreach (1..$len[$index]){
+		  # print  "$ops[$index]";
+		  push @comp_cigar, $ops[$index];
+		}
+	      }
+	      # print "original CIGAR: $cigar\n";
+	      # print "original CIGAR: @comp_cigar\n";
+
+	      ### If we are clipping off some bases at the start we need to adjust the start position of the alignments accordingly!
+	      if ($strand eq '+'){
+	
+		my $D_count = 0; # counting all deletions that affect the ignored genomic position, i.e. Deletions and insertions
+		my $I_count = 0;
+
+		for (1..$ignore){
+		  my $op = shift @comp_cigar; # adjusting composite CIGAR string by removing $ignore operations from the start
+		  # print "$_ deleted $op\n";
+
+		  while ($op eq 'D'){ # repeating this for deletions (D)
+		    $D_count++;
+		    $op = shift @comp_cigar;
+		    # print "$_ deleted $op\n";
+		  }
+		  if ($op eq 'I'){ # adjusting the genomic position for insertions (I)
+		    $I_count++;
+		  }
+		}
+		$start += $ignore + $D_count - $I_count;
+		# print "start $start\t ignore: $ignore\t D count: $D_count I_count: $I_count\n";
+	      }
+	      elsif ($strand eq '-'){
+
+		for (1..$ignore){
+		  my $op = pop @comp_cigar; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
+		  while ($op eq 'D'){ # repeating this for deletions (D)
+		    $op = pop @comp_cigar;
+		  }
+		}
+
+		### For reverse strand alignments we need to determine the number of matching bases (M) or deletions (D) in the read from the CIGAR
+		### string to be able to work out the starting position of the read which is on the 3' end of the sequence
+ 		my $MD_count = 0; # counting all operations that affect the genomic position, i.e. M and D. Insertions do not affect the start position
+		foreach (@comp_cigar){
+		  ++$MD_count if ($_ eq 'M' or $_ eq 'D');
+		}
+		$start += $MD_count - 1;
+	      }
+	
+	      ### reconstituting shortened CIGAR string
+	      my $new_cigar;
+	      my $count = 0;
+	      my $last_op;
+	      # print "ignore adjusted: @comp_cigar\n";
+	      foreach my $op (@comp_cigar){
+		unless (defined $last_op){
+		  $last_op = $op;
+		  ++$count;
+		  next;
+		}
+		if ($last_op eq $op){
+		  ++$count;
+		}
+		else{
+		  $new_cigar .= "$count$last_op";
+		  $last_op = $op;
+		  $count = 1;
+		}
+	      }
+	      $new_cigar .= "$count$last_op"; # appending the last operation and count
+	      $cigar = $new_cigar;
+	      # print "ignore adjusted scalar: $cigar\n";
+	    }
+	  }
+	  ### printing out the methylation state of every C in the read
+	  print_individual_C_methylation_states_single_end($meth_call,$chrom,$start,$id,$strand,$index,$cigar);
+	
+	  ++$methylation_call_strings_processed; # 1 per single-end result
+	}
+      }
+    }
+
+    ### PROCESSING PAIRED-END RESULT FILES
+    elsif ($paired){
+
+      ### proceeding differently now for SAM format or vanilla Bismark format files
+      if ($vanilla){ # old vanilla Bismark paired-end output format
+	while (<IN>){
+	  ++$line_count;
+	  warn "processed line: $line_count\n" if ($line_count%500000==0);
+
+	  ### $seq here is the chromosomal sequence (to use for the repeat analysis for example)
+	  my ($id,$strand,$chrom,$start_read_1,$end_read_2,$seq_1,$meth_call_1,$seq_2,$meth_call_2,$first_read_conversion,$genome_conversion) = (split("\t"))[0,1,2,3,4,6,7,9,10,11,12,13];
+
+	  my $index;
+	  chomp $genome_conversion;
+	
+	  if ($first_read_conversion eq 'CT' and $genome_conversion eq 'CT'){
+	    $index = 0; ## this is OT
+	  }	
+	  elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'GA'){
+	    $index = 2; ## this is CTOB!!!
+	  }
+	  elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'CT'){
+	    $index = 1; ## this is CTOT!!!
+	  }
+	  elsif ($first_read_conversion eq 'CT' and $genome_conversion eq 'GA'){
+	    $index = 3; ## this is OB
+	  }
+	  else {
+	    die "Unexpected combination of read and genome conversion: $first_read_conversion / $genome_conversion\n";
+	  }
+	
+	  if ($meth_call_1 and $meth_call_2){
+	    ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>'
+	    if ($ignore){
+	      $meth_call_1 = substr($meth_call_1,$ignore,length($meth_call_1)-$ignore);
+	      $meth_call_2 = substr($meth_call_2,$ignore,length($meth_call_2)-$ignore);
+
+	      ### we also need to adjust the start and end positions of the alignments accordingly if '--ignore' was specified
+	      $start_read_1 += $ignore;
+	      $end_read_2   -= $ignore;
+	    }
+	    my $end_read_1;
+	    my $start_read_2;
+
+	    if ($strand eq '+'){
+
+	      $end_read_1 = $start_read_1+length($meth_call_1)-1;
+	      $start_read_2 = $end_read_2-length($meth_call_2)+1;
+		
+	      ## we first pass the first read which is in + orientation on the forward strand
+	      print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id,'+',$index,0,0);
+	
+	      # we next pass the second read which is in - orientation on the reverse strand
+	      ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
+	      print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$end_read_2,$id,'-',$index,$no_overlap,$end_read_1);
+	    }
+	
+	    else{
+	
+	      $end_read_1 = $start_read_1+length($meth_call_2)-1; # read 1 is the second reported read!
+	      $start_read_2 = $end_read_2-length($meth_call_1)+1; # read 2 is the first reported read!
+
+	      ## we first pass the first read which is in - orientation on the reverse strand
+	      print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$end_read_2,$id,'-',$index,0,0);
+
+	      # we next pass the second read which is in + orientation on the forward strand
+	      ### if --no_overlap was specified we also pass the end of read 2. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
+	      print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_1,$id,'+',$index,$no_overlap,$start_read_2);
+	    }
+	
+	    $methylation_call_strings_processed += 2; # paired-end = 2 methylation calls
+	  }	
+	}
+      }
+      else{ # Bismark paired-end SAM output format (default)
+	while (<IN>){
+	  ### SAM format can either start with header lines (starting with @) or start with alignments directly
+	  if (/^\@/){ # skipping header lines (starting with @)
+	    warn "skipping SAM header line:\t$_";
+	    next;
+	  }
+	
+	  ++$line_count;
+	  warn "Processed lines: $line_count\n" if ($line_count%500000==0);
+	
+	  # example paired-end reads in SAM format (2 consecutive lines)
+	  # 1_R1/1	67	5	103172224	255	40M	=	103172417	233	AATATTTTTTTTATTTTAAAATGTGTATTGATTTAAATTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:4	XX:Z:4T1T24TT7	XM:Z:....h.h........................hh.......	XR:Z:CT	XG:Z:CT
+	  # 1_R1/2	131	5	103172417	255	40M	=	103172224	-233	TATTTTTTTTTAGAGTATTTTTTAATGGTTATTAGATTTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:6	XX:Z:T5T1T9T9T7T3	XM:Z:h.....h.h.........h.........h.......h...	XR:Z:GA	XG:Z:CT
+	
+	  #  < version 0.7.6 my ($id_1,$chrom,$start_read_1,$meth_call_1,$first_read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
+
+	  my ($id_1,$chrom,$start_read_1,$cigar_1) = (split("\t"))[0,2,3,5]; ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
+	  my $meth_call_1;
+	  my $first_read_conversion;
+	  my $genome_conversion;
+	
+	  while ( /(XM|XR|XG):Z:([^\t]+)/g ) {
+	    my $tag = $1;
+	    my $value = $2;
+
+	    if ($tag eq "XM"){
+	      $meth_call_1 = $value;
+	      $meth_call_1 =~ s/\r//;
+	    }
+	    elsif ($tag eq "XR") {
+	      $first_read_conversion = $value;
+	      $first_read_conversion =~ s/\r//;
+	    }
+	    elsif ($tag eq "XG") {
+	      $genome_conversion = $value;
+	      $genome_conversion =~ s/\r//;
+	    }
+	  }
+
+	  $_ = <IN>; # reading in the paired read
+
+	  # < version 0.7.6 my ($id_2,$start_read_2,$meth_call_2,$second_read_conversion) = (split("\t"))[0,3,13,14];
+	  # < version 0.7.6 $meth_call_1 =~ s/^XM:Z://;
+	  # < version 0.7.6 $meth_call_2 =~ s/^XM:Z://;
+	  # < version 0.7.6 $first_read_conversion =~ s/^XR:Z://;
+	  # < version 0.7.6 $second_read_conversion =~ s/^XR:Z://;
+
+	  my ($id_2,$start_read_2,$cigar_2) = (split("\t"))[0,3,5]; ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
+
+	  my $meth_call_2;
+	  my $second_read_conversion;
+	
+	  while ( /(XM|XR):Z:([^\t]+)/g ) {
+	    my $tag = $1;
+	    my $value = $2;
+
+	    if ($tag eq "XM"){
+	      $meth_call_2 = $value;
+	      $meth_call_2 =~ s/\r//;
+	    }
+	    elsif ($tag eq "XR") {
+	      $second_read_conversion = $value;
+	      $second_read_conversion = s/\r//;
+	    }
+	  }
+	
+	  # < version 0.7.6 $genome_conversion =~ s/^XG:Z://;	
+	  chomp $genome_conversion; # in case it captured a new line character	
+
+	  # print join ("\t",$meth_call_1,$meth_call_2,$first_read_conversion,$second_read_conversion,$genome_conversion),"\n";
+
+	  my $index;
+	  my $strand;
+
+	  if ($first_read_conversion eq 'CT' and $genome_conversion eq 'CT'){
+	    $index = 0; ## this is OT
+	    $strand = '+';
+	  }	
+	  elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'CT'){
+	    $index = 1; ## this is CTOT
+	    $strand = '-';
+	  }
+	  elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'GA'){
+	    $index = 2; ## this is CTOB
+	    $strand = '+';
+	  }
+	  elsif ($first_read_conversion eq 'CT' and $genome_conversion eq 'GA'){
+	    $index = 3; ## this is OB
+	    $strand = '-';
+	  }
+	  else {
+	    die "Unexpected combination of read and genome conversion: $first_read_conversion / $genome_conversion\n";
+	  }
+
+	  ### reversing the methylation call of the read that was reverse-complemented
+	  if ($strand eq '+'){
+	    $meth_call_2 = reverse $meth_call_2;
+	  }
+	  else{
+	    $meth_call_1 = reverse $meth_call_1;
+	  }
+
+	  if ($meth_call_1 and $meth_call_2){
+
+	    my $end_read_1;
+	
+	    ### READ 1
+	    my @len_1 = split (/\D+/,$cigar_1); # storing the length per operation
+	    my @ops_1 = split (/\d+/,$cigar_1); # storing the operation
+	    shift @ops_1; # remove the empty first element
+	    die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len_1 == scalar @ops_1);
+	
+	    my @comp_cigar_1; # building an array with all CIGAR operations
+	    foreach my $index (0..$#len_1){
+	      foreach (1..$len_1[$index]){
+		# print  "$ops_1[$index]";
+		push @comp_cigar_1, $ops_1[$index];
+	      }
+	    }
+	    # print "original CIGAR read 1: $cigar_1\n";
+	    # print "original CIGAR read 1: @comp_cigar_1\n";
+
+	    ### READ 2
+	    my @len_2 = split (/\D+/,$cigar_2); # storing the length per operation
+	    my @ops_2 = split (/\d+/,$cigar_2); # storing the operation
+	    shift @ops_2; # remove the empty first element
+	    die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len_2 == scalar @ops_2);
+	    my @comp_cigar_2; # building an array with all CIGAR operations for read 2
+	    foreach my $index (0..$#len_2){
+	      foreach (1..$len_2[$index]){
+		# print  "$ops_2[$index]";
+		push @comp_cigar_2, $ops_2[$index];
+	      }
+	    }
+	    # print "original CIGAR read 2: $cigar_2\n";
+	    # print "original CIGAR read 2: @comp_cigar_2\n";
+	
+	    if ($ignore){
+	      ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>'	
+	      ### the methylation calls have already been reversed where necessary
+	      $meth_call_1 = substr($meth_call_1,$ignore,length($meth_call_1)-$ignore);
+	      $meth_call_2 = substr($meth_call_2,$ignore,length($meth_call_2)-$ignore);
+
+	      ### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly
+
+	      if ($strand eq '+'){
+		
+		### if the (read 1) strand information is '+', read 1 needs to be trimmed from the start
+		my $D_count_1 = 0; # counting all deletions that affect the ignored genomic position for read 1, i.e. Deletions and insertions
+		my $I_count_1 = 0;
+
+		for (1..$ignore){
+		  my $op = shift @comp_cigar_1; # adjusting composite CIGAR string of read 1 by removing $ignore operations from the start
+		  # print "$_ deleted $op\n";
+		
+		  while ($op eq 'D'){ # repeating this for deletions (D)
+		    $D_count_1++;
+		    $op = shift @comp_cigar_1;
+		    # print "$_ deleted $op\n";
+		  }
+		  if ($op eq 'I'){ # adjusting the genomic position for insertions (I)
+		    $I_count_1++;
+		  }
+		}
+		
+		$start_read_1 += $ignore + $D_count_1 - $I_count_1;
+		# print "start read 1 $start_read_1\t ignore: $ignore\t D count 1: $D_count_1\tI_count 1: $I_count_1\n";
+	
+		### if the (read 1) strand information is '+', read 2 needs to be trimmed from the back
+
+		for (1..$ignore){
+		  my $op = pop @comp_cigar_2; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
+		  while ($op eq 'D'){ # repeating this for deletions (D)
+		    $op = pop @comp_cigar_2;
+		  }
+		}
+		# the start position of reads mapping to the reverse strand is being adjusted further below
+	      }
+
+	      elsif ($strand eq '-'){
+	
+		### if the (read 1) strand information is '-', read 1 needs to be trimmed from the back
+		for (1..$ignore){
+		  my $op = pop @comp_cigar_1; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
+		  while ($op eq 'D'){ # repeating this for deletions (D)
+		    $op = pop @comp_cigar_1;
+		  }
+		}
+		# the start position of reads mapping to the reverse strand is being adjusted further below
+
+		### if the (read 1) strand information is '-', read 2 needs to be trimmed from the start
+		my $D_count_2 = 0; # counting all deletions that affect the ignored genomic position for read 2, i.e. Deletions and insertions
+		my $I_count_2 = 0;
+		
+		for (1..$ignore){
+		  my $op = shift @comp_cigar_2; # adjusting composite CIGAR string of read 2 by removing $ignore operations from the start
+		  # print "$_ deleted $op\n";
+		
+		  while ($op eq 'D'){ # repeating this for deletions (D)
+		    $D_count_2++;
+		    $op = shift @comp_cigar_2;
+		    # print "$_ deleted $op\n";
+		  }
+		  if ($op eq 'I'){ # adjusting the genomic position for insertions (I)
+		    $I_count_2++;
+		  }
+		}
+		
+		$start_read_2 += $ignore + $D_count_2 - $I_count_2;
+		# print "start read 2 $start_read_2\t ignore: $ignore\t D count 2: $D_count_2\tI_count 2: $I_count_2\n";
+	
+	      }
+	
+	      ### reconstituting shortened CIGAR string 1
+	      my $new_cigar_1;
+	      my $count_1 = 0;
+	      my $last_op_1;
+	      # print "ignore adjusted CIGAR 1: @comp_cigar_1\n";
+	      foreach my $op (@comp_cigar_1){
+		unless (defined $last_op_1){
+		  $last_op_1 = $op;
+		  ++$count_1;
+		  next;
+		}
+		if ($last_op_1 eq $op){
+		  ++$count_1;
+		}
+		else{
+		  $new_cigar_1 .= "$count_1$last_op_1";
+		  $last_op_1 = $op;
+		  $count_1 = 1;
+		}
+	      }
+	      $new_cigar_1 .= "$count_1$last_op_1"; # appending the last operation and count
+	      $cigar_1 = $new_cigar_1;
+	      # print "ignore adjusted CIGAR 1 scalar: $cigar_1\n";
+
+	      ### reconstituting shortened CIGAR string 2
+	      my $new_cigar_2;
+	      my $count_2 = 0;
+	      my $last_op_2;
+	      # print "ignore adjusted CIGAR 2: @comp_cigar_2\n";
+	      foreach my $op (@comp_cigar_2){
+		unless (defined $last_op_2){
+		  $last_op_2 = $op;
+		  ++$count_2;
+		  next;
+		}
+		if ($last_op_2 eq $op){
+		  ++$count_2;
+		}
+		else{
+		  $new_cigar_2 .= "$count_2$last_op_2";
+		  $last_op_2 = $op;
+		  $count_2 = 1;
+		}
+	      }
+	      $new_cigar_2 .= "$count_2$last_op_2"; # appending the last operation and count
+	      $cigar_2 = $new_cigar_2;
+	      # print "ignore adjusted CIGAR 2 scalar: $cigar_2\n";
+
+	    }
+	
+	    if ($strand eq '+'){
+	      ### adjusting the start position for all reads mapping to the reverse strand, in this case read 2
+	      @comp_cigar_2  = reverse@comp_cigar_2; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
+	      # print "reverse: @comp_cigar_2\n";
+	
+	      my $MD_count_1 = 0;
+	      foreach (@comp_cigar_1){
+		++$MD_count_1 if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
+	      }
+
+	      my $MD_count_2 = 0;
+	      foreach (@comp_cigar_2){
+		++$MD_count_2 if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
+	      }
+
+	      $end_read_1 = $start_read_1 + $MD_count_1 - 1;
+	      $start_read_2 += $MD_count_2 - 1; ## Passing on the start position on the reverse strand
+	    }
+	    else{
+	      ### adjusting the start position for all reads mapping to the reverse strand, in this case read 1
+	
+	      @comp_cigar_1  = reverse@comp_cigar_1; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
+	      # print "reverse: @comp_cigar_1\n";
+
+	      my $MD_count_1 = 0;
+	      foreach (@comp_cigar_1){
+		++$MD_count_1 if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
+	      }
+
+	      $end_read_1 = $start_read_1;	
+	      $start_read_1 +=  $MD_count_1 - 1; ### Passing on the start position on the reverse strand
+
+	    }
+
+	    if ($strand eq '+'){
+	      ## we first pass the first read which is in + orientation on the forward strand
+	      print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'+',$index,0,0,$cigar_1);
+	
+	      # we next pass the second read which is in - orientation on the reverse strand
+	      ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
+	      print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'-',$index,$no_overlap,$end_read_1,$cigar_2);
+	    }
+	
+	    else{
+	      ## we first pass the first read which is in - orientation on the reverse strand
+	      print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'-',$index,0,0,$cigar_1);
+	
+	      # we next pass the second read which is in + orientation on the forward strand
+	      ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
+	      print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'+',$index,$no_overlap,$end_read_1,$cigar_2);
+	    }
+	
+	    $methylation_call_strings_processed += 2; # paired-end = 2 methylation calls
+	  }	
+	}
+      }
+    }
+    else{
+      die "Single-end or paired-end reads not specified properly\n";
+    }
+
+    print "\n\nProcessed $line_count lines from $filename in total\n";
+    print "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
+    if ($report){
+      print REPORT "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
+    }
+    print_splitting_report ();
+  }
+}
+
+
+sub print_splitting_report{
+
+  ### Calculating methylation percentages if applicable
+
+  my $percent_meCpG;
+  if (($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}) > 0){
+    $percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
+  }
+
+  my $percent_meCHG;
+  if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
+    $percent_meCHG = sprintf("%.1f",100*$counting{total_meCHG_count}/($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}));
+  }
+
+  my $percent_meCHH;
+  if (($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}) > 0){
+    $percent_meCHH = sprintf("%.1f",100*$counting{total_meCHH_count}/($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}));
+  }
+
+  my $percent_non_CpG_methylation;
+  if ($merge_non_CpG){
+    if ( ($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}+$counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
+      $percent_non_CpG_methylation = sprintf("%.1f",100* ( $counting{total_meCHH_count}+$counting{total_meCHG_count} ) / ( $counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}+$counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count} ) );
+    }
+  }
+
+  if ($report){
+    ### detailed information about Cs analysed
+    print REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";
+
+    my $total_number_of_C = $counting{total_meCHG_count}+$counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
+    print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";
+
+    print REPORT "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
+    print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
+    print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
+
+    print REPORT "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+    print REPORT "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+    print REPORT "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+
+    ### calculating methylated CpG percentage if applicable
+    if ($percent_meCpG){
+      print REPORT "C methylated in CpG context:\t${percent_meCpG}%\n";
+    }
+    else{
+      print REPORT "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
+    }
+
+    ### 2-Context Output
+    if ($merge_non_CpG){
+      if ($percent_non_CpG_methylation){
+	print REPORT "C methylated in non-CpG context:\t${percent_non_CpG_methylation}%\n\n\n";
+      }
+      else{
+	print REPORT "Can't determine percentage of methylated Cs in non-CpG context if value was 0\n\n\n";
+      }
+    }
+
+    ### 3 Context Output
+    else{
+      ### calculating methylated CHG percentage if applicable
+      if ($percent_meCHG){
+	print REPORT "C methylated in CHG context:\t${percent_meCHG}%\n";
+      }
+      else{
+	print REPORT "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
+      }
+
+      ### calculating methylated CHH percentage if applicable
+      if ($percent_meCHH){
+	print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+      }
+      else{
+	print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
+      }
+    }
+  }
+
+  ### detailed information about Cs analysed for on-screen report
+  print "Final Cytosine Methylation Report\n",'='x33,"\n";
+
+  my $total_number_of_C = $counting{total_meCHG_count}+$counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
+  print "Total number of C's analysed:\t$total_number_of_C\n\n";
+
+  print "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
+  print "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
+  print "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
+
+  print "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+  print "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+  print "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+
+  ### printing methylated CpG percentage if applicable
+  if ($percent_meCpG){
+    print "C methylated in CpG context:\t${percent_meCpG}%\n";
+  }
+  else{
+    print "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
+  }
+
+  ### 2-Context Output
+  if ($merge_non_CpG){
+    if ($percent_non_CpG_methylation){
+      print "C methylated in non-CpG context:\t${percent_non_CpG_methylation}%\n\n\n";
+    }
+    else{
+      print "Can't determine percentage of methylated Cs in non-CpG context if value was 0\n\n\n";
+    }
+  }
+
+  ### 3-Context Output
+  else{
+    ### printing methylated CHG percentage if applicable
+    if ($percent_meCHG){
+      print "C methylated in CHG context:\t${percent_meCHG}%\n";
+    }
+    else{
+      print "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
+    }
+
+    ### printing methylated CHH percentage if applicable
+    if ($percent_meCHH){
+      print "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+    }
+    else{
+      print "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
+    }
+  }
+}
+
+
+
+
+
+sub print_individual_C_methylation_states_paired_end_files{
+
+  my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$no_overlap,$end_read_1,$cigar) = @_;
+  my @methylation_calls = split(//,$meth_call);
+
+  #################################################################
+  ### . for bases not involving cytosines                       ###
+  ### X for methylated C in CHG context (was protected)         ###
+  ### x for not methylated C in CHG context (was converted)     ###
+  ### H for methylated C in CHH context (was protected)         ###
+  ### h for not methylated C in CHH context (was converted)     ###
+  ### Z for methylated C in CpG context (was protected)         ###
+  ### z for not methylated C in CpG context (was converted)     ###
+  #################################################################
+
+  my $methyl_CHG_count = 0;
+  my $methyl_CHH_count = 0;
+  my $methyl_CpG_count = 0;
+  my $unmethylated_CHG_count = 0;
+  my $unmethylated_CHH_count = 0;
+  my $unmethylated_CpG_count = 0;
+
+  my @len;
+  my @ops;
+  my $pos_offset = 0; # this is only relevant for SAM reads with insertions or deletions
+  my $cigar_offset = 0; # again, this is only relevant for SAM reads containing indels
+  my @comp_cigar;
+
+  if ($cigar){ # parsing CIGAR string
+    @len = split (/\D+/,$cigar); # storing the length per operation
+    @ops = split (/\d+/,$cigar); # storing the operation
+    shift @ops; # remove the empty first element
+    die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+
+    foreach my $index (0..$#len){
+      foreach (1..$len[$index]){
+	# print  "$ops[$index]";
+	push @comp_cigar, $ops[$index];
+      }
+    }
+    # warn "\nDetected CIGAR string: $cigar\n";
+    # warn "Length of methylation call: ",length $meth_call,"\n";
+    # warn "number of operations: ",scalar @ops,"\n";
+    # warn "number of length digits: ",scalar @len,"\n\n";
+    # print @comp_cigar,"\n";
+    # print "$meth_call\n\n";
+    # sleep (1);
+  }
+
+
+  if ($strand eq '-') {
+
+    ### the  CIGAR string needs to be reversed, the methylation call has already been reversed above
+    @comp_cigar  = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
+    #  print "reverse CIGAR string: @comp_cigar\n";
+
+    ### the start position of paired-end files has already been corrected, see above
+  }
+
+  ### THIS IS AN OPTIONAL 2-CONTEXT (CpG and non-CpG) SECTION IF --merge_non_CpG was specified
+
+  if ($merge_non_CpG) {
+
+    if ($no_overlap) {
+
+      ### single-file CpG and non-CpG context output
+      if ($full) {
+	if ($strand eq '+') {
+	  for my $index (0..$#methylation_calls) {
+	
+	    if ($cigar){ # only needed for SAM files
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+	
+	    ### Returning as soon as the methylation calls start overlapping
+	    if ($start+$index+$pos_offset >= $end_read_1) {
+	      return;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.'){}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} elsif ($strand eq '-') {
+	  for my $index (0..$#methylation_calls) {
+	
+	    if ($cigar){ # only needed for SAM files
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+	
+	    ### Returning as soon as the methylation calls start overlapping
+	    if ($start-$index+$pos_offset <= $end_read_1) {
+	      return;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} else {
+	  die "The read orientation was neither + nor -: '$strand'\n";
+	}
+      }
+
+      ### strand-specific methylation output
+      else {
+	if ($strand eq '+') {
+	  for my $index (0..$#methylation_calls) {
+
+	    if ($cigar){ # only needed for SAM files
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+
+	    ### Returning as soon as the methylation calls start overlapping
+	    if ($start+$index+$pos_offset >= $end_read_1) {
+	      return;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }	
+	  }
+	} elsif ($strand eq '-') {
+	  for my $index (0..$#methylation_calls) {
+
+	    if ($cigar){ # only needed for SAM files
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+
+	    ### Returning as soon as the methylation calls start overlapping
+	    if ($start-$index+$pos_offset <= $end_read_1) {
+	      return;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} else {
+	  die "The strand orientation was neither + nor -: '$strand'/n";
+	}
+      }
+    }
+
+    ### this is the default paired-end procedure allowing overlaps and using every single C position
+    ### Still within the 2-CONTEXT ONLY optional section
+    else {
+      ### single-file CpG and non-CpG context output
+      if ($full) {
+	if ($strand eq '+') {
+	  for my $index (0..$#methylation_calls) {
+
+	    if ($cigar){ # only needed for SAM files
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} elsif ($strand eq '-') {
+	  for my $index (0..$#methylation_calls) {
+
+	    if ($cigar){ # only needed for SAM files
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} else {
+	  die "The strand orientation as neither + nor -: '$strand'\n";
+	}
+      }
+
+      ### strand-specific methylation output
+      ### still within the 2-CONTEXT optional section
+      else {
+	if ($strand eq '+') {
+	  for my $index (0..$#methylation_calls) {
+
+	    if ($cigar){ # only needed for SAM files
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} elsif ($strand eq '-') {
+	  for my $index (0..$#methylation_calls) {
+
+	    if ($cigar){ # only needed for SAM files
+	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	      $cigar_offset += $cigar_mod;
+	      $pos_offset += $pos_mod;
+	    }
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      $counting{total_meCHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      $counting{total_unmethylated_CHG_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      $counting{total_meCpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      $counting{total_unmethylated_CpG_count}++;
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      $counting{total_meCHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      $counting{total_unmethylated_CHH_count}++;
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} else {
+	  die "The strand orientation as neither + nor -: '$strand'\n";
+	}
+      }
+    }
+  }
+
+  ############################################
+  ### THIS IS THE DEFAULT 3-CONTEXT OUTPUT ###
+  ############################################
+
+  elsif ($no_overlap) {
+    ### single-file CpG, CHG and CHH context output
+    if ($full) {
+      if ($strand eq '+') {
+	for my $index (0..$#methylation_calls) {
+	
+	  if ($cigar){ # only needed for SAM files
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  ### Returning as soon as the methylation calls start overlapping
+	  if ($start+$index+$pos_offset >= $end_read_1) {
+	    return;
+	  }
+	
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } elsif ($strand eq '-') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar){ # only needed for SAM files
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  ### Returning as soon as the methylation calls start overlapping
+	  if ($start-$index+$pos_offset <= $end_read_1) {
+	    return;
+	  }
+	
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } else {
+	die "The strand orientation as neither + nor -: '$strand'\n";
+      }
+    }
+
+    ### strand-specific methylation output
+    else {
+      if ($strand eq '+') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar){ # only needed for SAM files
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  ### Returning as soon as the methylation calls start overlapping
+	  if ($start+$index+$pos_offset >= $end_read_1) {
+	    return;
+	  }
+	
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }	
+	}
+      } elsif ($strand eq '-') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar){ # only needed for SAM files
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  ### Returning as soon as the methylation calls start overlapping
+	  if ($start-$index+$pos_offset <= $end_read_1) {
+	    return;
+	  }
+	
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } else {
+	die "The strand orientation as neither + nor -: '$strand'\n";
+      }
+    }
+  }
+
+  ### this is the default paired-end procedure allowing overlaps and using every single C position
+  else {
+    ### single-file CpG, CHG and CHH context output
+    if ($full) {
+      if ($strand eq '+') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar){ # only needed for SAM files
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } elsif ($strand eq '-') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar){ # only needed for SAM files
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } else {
+	die "The strand orientation as neither + nor -: '$strand'\n";
+      }
+    }
+
+    ### strand-specific methylation output
+    else {
+      if ($strand eq '+') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar){ # only needed for SAM files
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } elsif ($strand eq '-') {
+	for my $index (0..$#methylation_calls) {
+
+	  if ($cigar){ # only needed for SAM files
+	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	    $cigar_offset += $cigar_mod;
+	    $pos_offset += $pos_mod;
+	  }
+
+	  if ($methylation_calls[$index] eq 'X') {
+	    $counting{total_meCHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'x') {
+	    $counting{total_unmethylated_CHG_count}++;
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    $counting{total_meCpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'z') {
+	    $counting{total_unmethylated_CpG_count}++;
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'H') {
+	    $counting{total_meCHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  } elsif ($methylation_calls[$index] eq 'h') {
+	    $counting{total_unmethylated_CHH_count}++;
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  }
+	  elsif ($methylation_calls[$index] eq '.') {}
+	  else{
+	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  }
+	}
+      } else {
+	die "The strand orientation as neither + nor -: '$strand'\n";
+      }
+    }
+  }
+}
+
+sub check_cigar_string {
+  my ($index,$cigar_offset,$pos_offset,$strand,$comp_cigar) = @_;
+  # print "$index\t$cigar_offset\t$pos_offset\t$strand\t";
+  my ($new_cigar_offset,$new_pos_offset) = (0,0);
+
+  if ($strand eq '+') {
+    #  print "### $strand strand @$comp_cigar[$index + $cigar_offset]\t";
+
+    if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
+      #  warn "position needs no adjustment\n";
+    }
+
+    elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){ # insertion in the read sequence
+      $new_pos_offset -= 1; # we need to subtract the length of inserted bases from the genomic position
+      # warn "adjusted genomic position by -1 bp (insertion)\n";
+    }
+
+    elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
+      $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
+      $new_pos_offset += 1; # we need to add the length of deleted bases to get the genomic position
+      # warn "adjusted genomic position by +1 bp (deletion). Now looping through the CIGAR string until we hit another M or I\n";
+
+      while ( ($index + $cigar_offset + $new_cigar_offset)  < (scalar @$comp_cigar) ){	
+	if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
+	  #  warn "position needs no adjustment\n";
+	  last;
+	}
+	elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){
+	  $new_pos_offset -= 1; # we need to subtract the length of inserted bases from the genomic position
+	  # warn "adjusted genomic position by another -1 bp (insertion)\n";
+	  last;
+	}
+	elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
+	  $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
+	  $new_pos_offset += 1; # we need to add the length of deleted bases to get the genomic position
+	  # warn "adjusted genomic position by another +1 bp (deletion)\n";
+	}
+	else{
+	  die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
+	}
+      }
+    }
+    else{
+      die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
+    }
+  }
+
+  elsif ($strand eq '-') {
+    # print "### $strand strand @$comp_cigar[$index + $cigar_offset]\t";
+
+    if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
+     # warn "position needs no adjustment\n";
+    }
+
+    elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){ # insertion in the read sequence
+      $new_pos_offset += 1; # we need to add the length of inserted bases to the genomic position
+      # warn "adjusted genomic position by +1 bp (insertion)\n";
+    }
+
+    elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
+      $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
+      $new_pos_offset -= 1; # we need to subtract the length of deleted bases to get the genomic position
+      # warn "adjusted genomic position by -1 bp (deletion). Now looping through the CIGAR string until we hit another M or I\n";
+
+      while ( ($index + $cigar_offset + $new_cigar_offset)  < (scalar @$comp_cigar) ){	
+	if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
+	  # warn "Found new 'M' operation; position needs no adjustment\n";
+	  last;
+	}
+	elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){
+	  $new_pos_offset += 1; # we need to subtract the length of inserted bases from the genomic position
+	  # warn "Found new 'I' operation; adjusted genomic position by another +1 bp (insertion)\n";
+	  last;
+	}
+	elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
+	  $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
+	  $new_pos_offset -= 1; # we need to subtract the length of deleted bases to get the genomic position
+	  # warn "adjusted genomic position by another -1 bp (deletion)\n";
+	}
+	else{
+	  die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
+	}
+      }
+    }
+    else{
+      die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
+    }
+  }
+  # print "new cigar offset: $new_cigar_offset\tnew pos offset: $new_pos_offset\n";
+  return ($new_cigar_offset,$new_pos_offset);
+}
+
+sub print_individual_C_methylation_states_single_end{
+
+  my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$cigar) = @_;
+  my @methylation_calls = split(//,$meth_call);
+
+  #################################################################
+  ### . for bases not involving cytosines                       ###
+  ### X for methylated C in CHG context (was protected)         ###
+  ### x for not methylated C in CHG context (was converted)     ###
+  ### H for methylated C in CHH context (was protected)         ###
+  ### h for not methylated C in CHH context (was converted)     ###
+  ### Z for methylated C in CpG context (was protected)         ###
+  ### z for not methylated C in CpG context (was converted)     ###
+  #################################################################
+
+  my $methyl_CHG_count = 0;
+  my $methyl_CHH_count = 0;
+  my $methyl_CpG_count = 0;
+  my $unmethylated_CHG_count = 0;
+  my $unmethylated_CHH_count = 0;
+  my $unmethylated_CpG_count = 0;
+
+  my @len;
+  my @ops;
+  my $pos_offset = 0; # this is only relevant for SAM reads with insertions or deletions
+  my $cigar_offset = 0; # again, this is only relevant for SAM reads containing indels
+
+  my @comp_cigar;
+
+  if ($cigar){ # parsing CIGAR string
+    @len = split (/\D+/,$cigar); # storing the length per operation
+    @ops = split (/\d+/,$cigar); # storing the operation
+    shift @ops; # remove the empty first element
+    die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+
+    foreach my $index (0..$#len){
+      foreach (1..$len[$index]){
+	# print  "$ops[$index]";
+	push @comp_cigar, $ops[$index];
+      }
+    }
+    # warn "\nDetected CIGAR string: $cigar\n";
+    # warn "Length of methylation call: ",length $meth_call,"\n";
+    # warn "number of operations: ",scalar @ops,"\n";
+    # warn "number of length digits: ",scalar @len,"\n\n";
+    # print @comp_cigar,"\n";
+    # print "$meth_call\n\n";
+    # sleep (1);
+  }
+
+  ### adjusting the start position for all reads mapping to the reverse strand
+  if ($strand eq '-') {
+
+    @comp_cigar  = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
+    # print @comp_cigar,"\n";
+
+    unless ($ignore){  ### if --ignore was specified the start position has already been corrected
+
+      if ($cigar){ ### SAM format
+	my $MD_count = 0;
+	foreach (@comp_cigar){
+	  ++$MD_count if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
+	}
+	$start += $MD_count - 1;
+      }
+      else{ ### vanilla format
+	$start += length($meth_call)-1;
+      }
+    }
+  }
+
+  ### THIS IS THE CpG and Non-CpG SECTION (OPTIONAL)
+
+  ### single-file CpG and other-context output
+  if ($full and $merge_non_CpG) {
+    if ($strand eq '+') {
+      for my $index (0..$#methylation_calls) {
+
+	if ($cigar){ # only needed for SAM files
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {
+	}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    elsif ($strand eq '-') {
+
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+	if ($cigar){ # only needed for SAM files
+	  # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.'){
+	}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    else {
+      die "The strand information was neither + nor -: $strand\n";
+    }
+  }
+
+  ### strand-specific methylation output
+  elsif ($merge_non_CpG) {
+    if ($strand eq '+') {
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+	if ($cigar){ # only needed for SAM files
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {
+	}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    elsif ($strand eq '-') {
+
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+    	if ($cigar){ # only needed for SAM files
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {
+	}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    else {
+      die "The strand information was neither + nor -: $strand\n";
+    }
+  }
+
+  ### THIS IS THE 3-CONTEXT (CpG, CHG and CHH) DEFAULT SECTION
+
+  elsif ($full) {
+    if ($strand eq '+') {
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+	if ($cigar){ # only needed for SAM files
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    elsif ($strand eq '-') {
+
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+	if ($cigar){ # only needed for SAM files
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+	
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    else {
+      die "The read had a strand orientation which was neither + nor -: $strand\n";
+    }
+  }
+
+  ### strand-specific methylation output
+  else {
+    if ($strand eq '+') {
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+	if ($cigar){ # only needed for SAM files
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    elsif ($strand eq '-') {
+
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+
+	if ($cigar){ # only needed for SAM files
+	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
+	  $cigar_offset += $cigar_mod;
+	  $pos_offset += $pos_mod;
+	}
+
+	if ($methylation_calls[$index] eq 'X') {
+	  $counting{total_meCHG_count}++;
+	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'x') {
+	  $counting{total_unmethylated_CHG_count}++;
+	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'Z') {
+	  $counting{total_meCpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'z') {
+	  $counting{total_unmethylated_CpG_count}++;
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'H') {
+	  $counting{total_meCHH_count}++;
+	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	} elsif ($methylation_calls[$index] eq 'h') {
+	  $counting{total_unmethylated_CHH_count}++;
+	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	}
+	elsif ($methylation_calls[$index] eq '.') {}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    else {
+      die "The strand information was neither + nor -: $strand\n";
+    }
+  }
+}
+
+sub print_helpfile{
+
+ print << 'HOW_TO';
+
+
+DESCRIPTION
+
+The following is a brief description of all options to control the Bismark
+methylation extractor. The script reads in a bisulfite read alignment results file 
+produced by the Bismark bisulfite mapper and extracts the methylation information
+for individual cytosines. This information is found in the methylation call field
+which can contain the following characters:
+
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+       ~~~   X   for methylated C in CHG context (was protected)     ~~~
+       ~~~   x   for not methylated C CHG (was converted)            ~~~
+       ~~~   H   for methylated C in CHH context (was protected)     ~~~
+       ~~~   h   for not methylated C in CHH context (was converted) ~~~
+       ~~~   Z   for methylated C in CpG context (was protected)     ~~~
+       ~~~   z   for not methylated C in CpG context (was converted) ~~~
+       ~~~   .   for any bases not involving cytosines               ~~~
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The methylation extractor outputs result files for cytosines in CpG, CHG and CHH
+context (this distinction is actually already made in Bismark itself). As the methylation
+information for every C analysed can produce files which easily have tens or even hundreds of
+millions of lines, file sizes can become very large and more difficult to handle. The C
+methylation info additionally splits cytosine methylation calls up into one of the four possible
+strands a given bisulfite read aligned against:
+
+             OT      original top strand
+             CTOT    complementary to original top strand
+
+             OB      original bottom strand
+             CTOB    complementary to original bottom strand
+
+Thus, by default twelve individual output files are being generated per input file (unless
+--comprehensive is specified, see below). The output files can be imported into a genome
+viewer, such as SeqMonk, and re-combined into a single data group if desired (in fact
+unless the bisulfite reads were generated preserving directionality it doesn't make any
+sense to look at the data in a strand-specific manner). Strand-specific output files can
+optionally be skipped, in which case only three output files for CpG, CHG or CHH context
+will be generated. For both the strand-specific and comprehensive outputs there is also
+the option to merge both non-CpG contexts (CHG and CHH) into one single non-CpG context.
+
+
+The output files are in the following format (tab delimited):
+
+<sequence_id>     <strand>      <chromosome>     <position>     <methylation call>
+
+
+USAGE: methylation_extractor [options] <filenames>
+
+
+ARGUMENTS:
+
+<filenames>              A space-separated list of Bismark result files in SAM format from
+                         which methylation information is extracted for every cytosine in
+                         the reads. For alignment files in the older custom Bismark output
+                         see option '--vanilla'.
+
+OPTIONS:
+
+-s/--single-end          Input file(s) are Bismark result file(s) generated from single-end
+                         read data. Specifying either --single-end or --paired-end is
+                         mandatory.
+
+-p/--paired-end          Input file(s) are Bismark result file(s) generated from paired-end
+                         read data. Specifying either --paired-end or --single-end is
+                         mandatory.
+
+--vanilla                The Bismark result input file(s) are in the old custom Bismark format
+                         (up to version 0.5.x) and not in SAM format which is the default as
+                         of Bismark version 0.6.x or higher. Default: OFF.
+
+--no_overlap             For paired-end reads it is theoretically possible that read_1 and
+                         read_2 overlap. This option avoids scoring overlapping methylation
+                         calls twice. Whilst this removes a bias towards more methylation calls
+                         towards the center of sequenced fragments it can de facto remove
+                         a good proportion of the data.
+
+--ignore <int>           Ignore the first <int> bp at the 5' end of each read when processing the
+                         methylation call string. This can remove e.g. a restriction enzyme site
+                         at the start of each read.
+
+--comprehensive          Specifying this option will merge all four possible strand-specific 
+                         methylation info into context-dependent output files. The default 
+                         contexts are:
+                          - CpG context
+                          - CHG context
+                          - CHH context
+
+--merge_non_CpG          This will produce two output files (in --comprehensive mode) or eight
+                         strand-specific output files (default) for Cs in
+                          - CpG context
+                          - non-CpG context
+
+--report                 Prints out a short methylation summary as well as the paramaters used to run
+                         this script.
+
+--version                Displays version information.
+
+-h/--help                Displays this help file and exits.
+
+
+OUTPUT:
+
+The output is in the form:
+
+<seq-ID> <methylation state*> <chromosome> <start position (= end position)> <methylation call>
+
+* Methylated cytosines receive a '+' orientation,
+* Unmethylated cytosines receive a '-' orientation.
+
+
+This script was last modified on 31 July 2012.
+
+HOW_TO
+}