bismark: bismark comparison

comparison bismark @ 0:62c6da72dd4a draft

Uploaded

author	bgruening
date	Sat, 06 Jul 2013 09:57:36 -0400
parents
children	91f07ff056ca

comparison

equal deleted inserted replaced

--1:000000000000
+:62c6da72dd4a
+#!/usr/bin/perl --
+use strict;
+use warnings;
+use IO::Handle;
+use Cwd;
+$|++;
+use Getopt::Long;
+## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)
+## This program is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+## You should have received a copy of the GNU General Public License
+## along with this program. If not, see <http://www.gnu.org/licenses/>.
+my $parent_dir = getcwd;
+my $bismark_version = 'v0.7.12';
+my $command_line = join (" ",@ARGV);
+### before processing the command line we will replace --solexa1.3-quals with --phred64-quals as the '.' in the option name will cause Getopt::Long to fail
+foreach my $arg (@ARGV){
+if ($arg eq '--solexa1.3-quals'){
+$arg = '--phred64-quals';
+}
+}
+my @filenames;   # will be populated by processing the command line
+my ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_file_format,$bowtie_options,$directional,$unmapped,$ambiguous,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$upto,$temp_dir,$non_bs_mm,$insertion_open,$insertion_extend,$deletion_open,$deletion_extend,$gzip,$bam,$samtools_path,$pbat) = process_command_line();
+my @fhs;         # stores alignment process names, bisulfite index location, bowtie filehandles and the number of times sequences produced an alignment
+my %chromosomes; # stores the chromosome sequences of the mouse genome
+my %counting;    # counting various events
+my $seqID_contains_tabs;
+foreach my $filename (@filenames){
+chdir $parent_dir or die "Unable to move to initial working directory $!\n";
+### resetting the counting hash and fhs
+reset_counters_and_fhs($filename);
+$seqID_contains_tabs = 0;
+### PAIRED-END ALIGNMENTS
+if ($filename =~ ','){
+my ($C_to_T_infile_1,$G_to_A_infile_1); # to be made from mate1 file
+$fhs[0]->{name} = 'CTread1GAread2CTgenome';
+$fhs[1]->{name} = 'GAread1CTread2GAgenome';
+$fhs[2]->{name} = 'GAread1CTread2CTgenome';
+$fhs[3]->{name} = 'CTread1GAread2GAgenome';
+warn "\nPaired-end alignments will be performed\n",'='x39,"\n\n";
+my ($filename_1,$filename_2) = (split (/,/,$filename));
+warn "The provided filenames for paired-end alignments are $filename_1 and $filename_2\n";
+### additional variables only for paired-end alignments
+my ($C_to_T_infile_2,$G_to_A_infile_2); # to be made from mate2 file
+### FastA format
+if ($sequence_file_format eq 'FASTA'){
+warn "Input files are in FastA format\n";
+if ($directional){
+	($C_to_T_infile_1) = biTransformFastAFiles_paired_end ($filename_1,1); # also passing the read number
+	($G_to_A_infile_2) = biTransformFastAFiles_paired_end ($filename_2,2);
+	$fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	$fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	$fhs[1]->{inputfile_1} = undef;
+	$fhs[1]->{inputfile_2} = undef;
+	$fhs[2]->{inputfile_1} = undef;
+	$fhs[2]->{inputfile_2} = undef;
+	$fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	$fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+}
+else{
+	($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastAFiles_paired_end ($filename_1,1); # also passing the read number
+	($C_to_T_infile_2,$G_to_A_infile_2) = biTransformFastAFiles_paired_end ($filename_2,2);
+	$fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	$fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	$fhs[1]->{inputfile_1} = $G_to_A_infile_1;
+	$fhs[1]->{inputfile_2} = $C_to_T_infile_2;
+	$fhs[2]->{inputfile_1} = $G_to_A_infile_1;
+	$fhs[2]->{inputfile_2} = $C_to_T_infile_2;
+	$fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	$fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+}
+if ($bowtie2){
+	paired_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+}
+else{
+	paired_end_align_fragments_to_bisulfite_genome_fastA ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+}
+}
+### FastQ format
+else{
+warn "Input files are in FastQ format\n";
+if ($directional){
+	if ($bowtie2){
+	  ($C_to_T_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
+	  ($G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
+	  $fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	  $fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	  $fhs[1]->{inputfile_1} = undef;
+	  $fhs[1]->{inputfile_2} = undef;
+	  $fhs[2]->{inputfile_1} = undef;
+	  $fhs[2]->{inputfile_2} = undef;
+	  $fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	  $fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+	}
+	else{ # Bowtie 1 alignments
+	  if ($gzip){
+	    ($C_to_T_infile_1) = biTransformFastQFiles_paired_end_bowtie1_gzip ($filename_1,$filename_2); # passing both reads at the same time
+	    $fhs[0]->{inputfile_1} = $C_to_T_infile_1; # this file contains both read 1 and read 2 in tab delimited format
+	    $fhs[0]->{inputfile_2} = undef; # no longer needed
+	    $fhs[1]->{inputfile_1} = undef;
+	    $fhs[1]->{inputfile_2} = undef;
+	    $fhs[2]->{inputfile_1} = undef;
+	    $fhs[2]->{inputfile_2} = undef;
+	    $fhs[3]->{inputfile_1} = $C_to_T_infile_1; # this file contains both read 1 and read 2 in tab delimited format
+	    $fhs[3]->{inputfile_2} = undef; # no longer needed
+	  }
+	  else{
+	    ($C_to_T_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
+	    ($G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
+	    $fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	    $fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	    $fhs[1]->{inputfile_1} = undef;
+	    $fhs[1]->{inputfile_2} = undef;
+	    $fhs[2]->{inputfile_1} = undef;
+	    $fhs[2]->{inputfile_2} = undef;
+	    $fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	    $fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+	  }
+	}
+}
+elsif($pbat){ # PBAT-Seq
+	### At the moment we are only performing uncompressed FastQ alignments with Bowtie1
+	($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
+	($C_to_T_infile_2,$G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
+	$fhs[0]->{inputfile_1} = undef;
+	$fhs[0]->{inputfile_2} = undef;
+	$fhs[1]->{inputfile_1} = $G_to_A_infile_1;
+	$fhs[1]->{inputfile_2} = $C_to_T_infile_2;
+	$fhs[2]->{inputfile_1} = $G_to_A_infile_1;
+	$fhs[2]->{inputfile_2} = $C_to_T_infile_2;
+	$fhs[3]->{inputfile_1} = undef;
+	$fhs[3]->{inputfile_2} = undef;
+}
+else{
+	if ($bowtie2){
+	  ($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
+	  ($C_to_T_infile_2,$G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
+	  $fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	  $fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	  $fhs[1]->{inputfile_1} = $G_to_A_infile_1;
+	  $fhs[1]->{inputfile_2} = $C_to_T_infile_2;
+	  $fhs[2]->{inputfile_1} = $G_to_A_infile_1;
+	  $fhs[2]->{inputfile_2} = $C_to_T_infile_2;
+	  $fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	  $fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+	}
+	else{ # Bowtie 1 alignments
+	  if ($gzip){
+	    ($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastQFiles_paired_end_bowtie1_gzip ($filename_1,$filename_2); # passing both reads at the same time
+	    $fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	    $fhs[0]->{inputfile_2} = undef; # not needed for compressed temp files
+	    $fhs[1]->{inputfile_1} = $G_to_A_infile_1;
+	    $fhs[1]->{inputfile_2} = undef;
+	    $fhs[2]->{inputfile_1} = $G_to_A_infile_1;
+	    $fhs[2]->{inputfile_2} = undef;
+	    $fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	    $fhs[3]->{inputfile_2} = undef; # not needed for compressed temp files
+	  }
+	  else{ #uncompressed temp files
+	    ($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
+	    ($C_to_T_infile_2,$G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
+	    $fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	    $fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	    $fhs[1]->{inputfile_1} = $G_to_A_infile_1;
+	    $fhs[1]->{inputfile_2} = $C_to_T_infile_2;
+	    $fhs[2]->{inputfile_1} = $G_to_A_infile_1;
+	    $fhs[2]->{inputfile_2} = $C_to_T_infile_2;
+	    $fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	    $fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+	  }
+	}
+}
+if ($bowtie2){
+	paired_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+}
+else{
+	paired_end_align_fragments_to_bisulfite_genome_fastQ ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+}
+}
+start_methylation_call_procedure_paired_ends($filename_1,$filename_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+}
+### Else we are performing SINGLE-END ALIGNMENTS
+else{
+warn "\nSingle-end alignments will be performed\n",'='x39,"\n\n";
+### Initialising bisulfite conversion filenames
+my ($C_to_T_infile,$G_to_A_infile);
+### FastA format
+if ($sequence_file_format eq 'FASTA'){
+warn "Inut file is in FastA format\n";
+if ($directional){
+	($C_to_T_infile) = biTransformFastAFiles ($filename);
+	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
+}
+else{
+	($C_to_T_infile,$G_to_A_infile) = biTransformFastAFiles ($filename);
+	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
+	$fhs[2]->{inputfile} = $fhs[3]->{inputfile} = $G_to_A_infile;
+}
+### Creating 4 different bowtie filehandles and storing the first entry
+if ($bowtie2){
+	single_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 ($C_to_T_infile,$G_to_A_infile);
+}
+else{
+	single_end_align_fragments_to_bisulfite_genome_fastA ($C_to_T_infile,$G_to_A_infile);
+}
+}
+## FastQ format
+else{
+warn "Input file is in FastQ format\n";
+if ($directional){
+	($C_to_T_infile) = biTransformFastQFiles ($filename);
+	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
+}
+elsif($pbat){
+	($G_to_A_infile) = biTransformFastQFiles ($filename);
+	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $G_to_A_infile; # PBAT-Seq only uses the G to A converted files
+}
+else{
+	($C_to_T_infile,$G_to_A_infile) = biTransformFastQFiles ($filename);
+	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
+	$fhs[2]->{inputfile} = $fhs[3]->{inputfile} = $G_to_A_infile;
+}
+### Creating up to 4 different bowtie filehandles and storing the first entry
+if ($bowtie2){
+	single_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 ($C_to_T_infile,$G_to_A_infile);
+}
+elsif ($pbat){
+	single_end_align_fragments_to_bisulfite_genome_fastQ (undef,$G_to_A_infile);
+}
+else{
+	single_end_align_fragments_to_bisulfite_genome_fastQ ($C_to_T_infile,$G_to_A_infile);
+}
+}
+start_methylation_call_procedure_single_ends($filename,$C_to_T_infile,$G_to_A_infile);
+}
+}
+sub start_methylation_call_procedure_single_ends {
+my ($sequence_file,$C_to_T_infile,$G_to_A_infile) = @_;
+my ($dir,$filename);
+if ($sequence_file =~ /\//){
+($dir,$filename) = $sequence_file =~ m/(.*\/)(.*)$/;
+}
+else{
+$filename = $sequence_file;
+}
+### printing all alignments to a results file
+my $outfile = $filename;
+if ($bowtie2){ # SAM format is the default for Bowtie 2
+$outfile =~ s/$/_bt2_bismark.sam/;
+}
+elsif ($vanilla){ # vanilla custom Bismark output single-end output (like Bismark versions 0.5.X)
+$outfile =~ s/$/_bismark.txt/;
+}
+else{ # SAM is the default output
+$outfile =~ s/$/_bismark.sam/;
+}
+$bam = 0 unless (defined $bam);
+if ($bam == 1){ ### Samtools is installed, writing out BAM directly
+$outfile =~ s/sam/bam/;
+open (OUT,"| $samtools_path view -bSh 2>/dev/null - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+}
+elsif($bam == 2){ ### no Samtools found on system. Using GZIP compression instead
+$outfile .= '.gz';
+open (OUT,"| gzip -c - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+}
+else{ # uncompressed ouput, default
+open (OUT,'>',"$output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+}
+warn "\n>>> Writing bisulfite mapping results to $output_dir$outfile <<<\n\n";
+sleep(1);
+if ($vanilla){
+print OUT "Bismark version: $bismark_version\n";
+}
+### printing alignment and methylation call summary to a report file
+my $reportfile = $filename;
+if ($bowtie2){
+$reportfile =~ s/$/_bt2_bismark_SE_report.txt/;
+}
+else{
+$reportfile =~ s/$/_bismark_SE_report.txt/;
+}
+open (REPORT,'>',"$output_dir$reportfile") or die "Failed to write to $reportfile: $!\n";
+print REPORT "Bismark report for: $sequence_file (version: $bismark_version)\n";
+if ($unmapped){
+my $unmapped_file = $filename;
+$unmapped_file =~ s/$/_unmapped_reads.txt/;
+open (UNMAPPED,'>',"$output_dir$unmapped_file") or die "Failed to write to $unmapped_file: $!\n";
+print "Unmapped sequences will be written to $output_dir$unmapped_file\n";
+}
+if ($ambiguous){
+my $ambiguous_file = $filename;
+$ambiguous_file =~ s/$/_ambiguous_reads.txt/;
+open (AMBIG,'>',"$output_dir$ambiguous_file") or die "Failed to write to $ambiguous_file: $!\n";
+print "Ambiguously mapping sequences will be written to $output_dir$ambiguous_file\n";
+}
+if ($directional){
+print REPORT "Option '--directional' specified: alignments to complementary strands will be ignored (i.e. not performed!)\n";
+}
+print REPORT "Bowtie was run against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+### if 2 or more files are provided we can hold the genome in memory and don't need to read it in a second time
+unless (%chromosomes){
+my $cwd = getcwd; # storing the path of the current working directory
+print "Current working directory is: $cwd\n\n";
+read_genome_into_memory($cwd);
+}
+unless ($vanilla or $sam_no_hd){
+generate_SAM_header();
+}
+### Input file is in FastA format
+if ($sequence_file_format eq 'FASTA'){
+process_single_end_fastA_file_for_methylation_call($sequence_file,$C_to_T_infile,$G_to_A_infile);
+}
+### Input file is in FastQ format
+else{
+process_single_end_fastQ_file_for_methylation_call($sequence_file,$C_to_T_infile,$G_to_A_infile);
+}
+}
+sub start_methylation_call_procedure_paired_ends {
+my ($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+my ($dir_1,$filename_1);
+if ($sequence_file_1 =~ /\//){
+($dir_1,$filename_1) = $sequence_file_1 =~ m/(.*\/)(.*)$/;
+}
+else{
+$filename_1 = $sequence_file_1;
+}
+my ($dir_2,$filename_2);
+if  ($sequence_file_2 =~ /\//){
+($dir_2,$filename_2) = $sequence_file_2 =~ m/(.*\/)(.*)$/;
+}
+else{
+$filename_2 = $sequence_file_2;
+}
+### printing all alignments to a results file
+my $outfile = $filename_1;
+if ($bowtie2){ # SAM format is the default Bowtie 2 output
+$outfile =~ s/$/_bismark_bt2_pe.sam/;
+}
+elsif ($vanilla){ # vanilla custom Bismark paired-end output (like Bismark versions 0.5.X)
+$outfile =~ s/$/_bismark_pe.txt/;
+}
+else{ # SAM format is the default Bowtie 1 output
+$outfile =~ s/$/_bismark_pe.sam/;
+}
+$bam = 0 unless (defined $bam);
+if ($bam == 1){ ### Samtools is installed, writing out BAM directly
+$outfile =~ s/sam/bam/;
+open (OUT,"| $samtools_path view -bSh 2>/dev/null - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+}
+elsif($bam == 2){ ### no Samtools found on system. Using GZIP compression instead
+$outfile .= '.gz';
+open (OUT,"| gzip -c - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+}
+else{ # uncompressed ouput, default
+open (OUT,'>',"$output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+}
+warn "\n>>> Writing bisulfite mapping results to $outfile <<<\n\n";
+sleep(1);
+if ($vanilla){
+print OUT "Bismark version: $bismark_version\n";
+}
+### printing alignment and methylation call summary to a report file
+my $reportfile = $filename_1;
+if ($bowtie2){
+$reportfile =~ s/$/_bismark_bt2_PE_report.txt/;
+}
+else{
+$reportfile =~ s/$/_bismark_PE_report.txt/;
+}
+open (REPORT,'>',"$output_dir$reportfile") or die "Failed to write to $reportfile: $!\n";
+print REPORT "Bismark report for: $sequence_file_1 and $sequence_file_2 (version: $bismark_version)\n";
+print REPORT "Bowtie was run against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+### Unmapped read output
+if ($unmapped){
+my $unmapped_1 = $filename_1;
+my $unmapped_2 = $filename_2;
+$unmapped_1 =~ s/$/_unmapped_reads_1.txt/;
+$unmapped_2 =~ s/$/_unmapped_reads_2.txt/;
+open (UNMAPPED_1,'>',"$output_dir$unmapped_1") or die "Failed to write to $unmapped_1: $!\n";
+open (UNMAPPED_2,'>',"$output_dir$unmapped_2") or die "Failed to write to $unmapped_2: $!\n";
+print "Unmapped sequences will be written to $unmapped_1 and $unmapped_2\n";
+}
+if ($ambiguous){
+my $amb_1 = $filename_1;
+my $amb_2 = $filename_2;
+$amb_1 =~ s/$/_ambiguous_reads_1.txt/;
+$amb_2 =~ s/$/_ambiguous_reads_2.txt/;
+open (AMBIG_1,'>',"$output_dir$amb_1") or die "Failed to write to $amb_1: $!\n";
+open (AMBIG_2,'>',"$output_dir$amb_2") or die "Failed to write to $amb_2: $!\n";
+print "Ambiguously mapping sequences will be written to $amb_1 and $amb_2\n";
+}
+if ($directional){
+print REPORT "Option '--directional' specified: alignments to complementary strands will be ignored (i.e. not performed)\n";
+}
+### if 2 or more files are provided we might still hold the genome in memory and don't need to read it in a second time
+unless (%chromosomes){
+my $cwd = getcwd; # storing the path of the current working directory
+print "Current working directory is: $cwd\n\n";
+read_genome_into_memory($cwd);
+}
+unless ($vanilla or $sam_no_hd){
+generate_SAM_header();
+}
+### Input files are in FastA format
+if ($sequence_file_format eq 'FASTA'){
+process_fastA_files_for_paired_end_methylation_calls($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+}
+### Input files are in FastQ format
+else{
+process_fastQ_files_for_paired_end_methylation_calls($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+}
+}
+sub print_final_analysis_report_single_end{
+my ($C_to_T_infile,$G_to_A_infile) = @_;
+### All sequences from the original sequence file have been analysed now
+### deleting temporary C->T or G->A infiles
+if ($directional){
+my $deletion_successful =  unlink "$temp_dir$C_to_T_infile";
+if ($deletion_successful == 1){
+warn "\nSuccessfully deleted the temporary file $temp_dir$C_to_T_infile\n\n";
+}
+else{
+warn "Could not delete temporary file $C_to_T_infile properly $!\n";
+}
+}
+elsif ($pbat){
+my $deletion_successful =  unlink "$temp_dir$G_to_A_infile";
+if ($deletion_successful == 1){
+warn "\nSuccessfully deleted the temporary file $temp_dir$G_to_A_infile\n\n";
+}
+else{
+warn "Could not delete temporary file $G_to_A_infile properly $!\n";
+}
+}
+else{
+my $deletion_successful =  unlink "$temp_dir$C_to_T_infile","$temp_dir$G_to_A_infile";
+if ($deletion_successful == 2){
+warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile and $temp_dir$G_to_A_infile\n\n";
+}
+else{
+warn "Could not delete temporary files properly $!\n";
+}
+}
+### printing a final report for the alignment procedure
+print REPORT "Final Alignment report\n",'='x22,"\n";
+warn "Final Alignment report\n",'='x22,"\n";
+#  foreach my $index (0..$#fhs){
+#    print "$fhs[$index]->{name}\n";
+#    print "$fhs[$index]->{seen}\talignments on the correct strand in total\n";
+#    print "$fhs[$index]->{wrong_strand}\talignments were discarded (nonsensical alignments)\n\n";
+#  }
+### printing a final report for the methylation call procedure
+warn "Sequences analysed in total:\t$counting{sequences_count}\n";
+print REPORT "Sequences analysed in total:\t$counting{sequences_count}\n";
+my $percent_alignable_sequences;
+if ($counting{sequences_count} == 0){
+$percent_alignable_sequences = 0;
+}
+else{
+$percent_alignable_sequences = sprintf ("%.1f",$counting{unique_best_alignment_count}*100/$counting{sequences_count});
+}
+warn "Number of alignments with a unique best hit from the different alignments:\t$counting{unique_best_alignment_count}\nMapping efficiency:\t${percent_alignable_sequences}%\n\n";
+print REPORT "Number of alignments with a unique best hit from the different alignments:\t$counting{unique_best_alignment_count}\nMapping efficiency:\t${percent_alignable_sequences}%\n";
+### percentage of low complexity reads overruled because of low complexity (thereby creating a bias for highly methylated reads),
+### only calculating the percentage if there were any overruled alignments
+if ($counting{low_complexity_alignments_overruled_count}){
+my $percent_overruled_low_complexity_alignments = sprintf ("%.1f",$counting{low_complexity_alignments_overruled_count}*100/$counting{sequences_count});
+#   print REPORT "Number of low complexity alignments which were overruled to have a unique best hit rather than discarding them:\t$counting{low_complexity_alignments_overruled_count}\t(${percent_overruled_low_complexity_alignments}%)\n";
+}
+print "Sequences with no alignments under any condition:\t$counting{no_single_alignment_found}\n";
+print "Sequences did not map uniquely:\t$counting{unsuitable_sequence_count}\n";
+print "Sequences which were discarded because genomic sequence could not be extracted:\t$counting{genomic_sequence_could_not_be_extracted_count}\n\n";
+print "Number of sequences with unique best (first) alignment came from the bowtie output:\n";
+print join ("\n","CT/CT:\t$counting{CT_CT_count}\t((converted) top strand)","CT/GA:\t$counting{CT_GA_count}\t((converted) bottom strand)","GA/CT:\t$counting{GA_CT_count}\t(complementary to (converted) top strand)","GA/GA:\t$counting{GA_GA_count}\t(complementary to (converted) bottom strand)"),"\n\n";
+print REPORT "Sequences with no alignments under any condition:\t$counting{no_single_alignment_found}\n";
+print REPORT "Sequences did not map uniquely:\t$counting{unsuitable_sequence_count}\n";
+print REPORT "Sequences which were discarded because genomic sequence could not be extracted:\t$counting{genomic_sequence_could_not_be_extracted_count}\n\n";
+print REPORT "Number of sequences with unique best (first) alignment came from the bowtie output:\n";
+print REPORT join ("\n","CT/CT:\t$counting{CT_CT_count}\t((converted) top strand)","CT/GA:\t$counting{CT_GA_count}\t((converted) bottom strand)","GA/CT:\t$counting{GA_CT_count}\t(complementary to (converted) top strand)","GA/GA:\t$counting{GA_GA_count}\t(complementary to (converted) bottom strand)"),"\n\n";
+if ($directional){
+print "Number of alignments to (merely theoretical) complementary strands being rejected in total:\t$counting{alignments_rejected_count}\n\n";
+print REPORT "Number of alignments to (merely theoretical) complementary strands being rejected in total:\t$counting{alignments_rejected_count}\n\n";
+}
+### detailed information about Cs analysed
+warn "Final Cytosine Methylation Report\n",'='x33,"\n";
+my $total_number_of_C = $counting{total_meCHH_count}+$counting{total_meCHG_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CpG_count};
+warn "Total number of C's analysed:\t$total_number_of_C\n\n";
+warn "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
+warn "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
+warn "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
+warn "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+warn "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+warn "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+print REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";
+print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";
+print REPORT "Total methylated C's in CpG context:\t $counting{total_meCpG_count}\n";
+print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
+print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
+print REPORT "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+print REPORT "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+print REPORT "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+my $percent_meCHG;
+if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
+$percent_meCHG = sprintf("%.1f",100*$counting{total_meCHG_count}/($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}));
+}
+my $percent_meCHH;
+if (($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}) > 0){
+$percent_meCHH = sprintf("%.1f",100*$counting{total_meCHH_count}/($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}));
+}
+my $percent_meCpG;
+if (($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}) > 0){
+$percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
+}
+### printing methylated CpG percentage if applicable
+if ($percent_meCpG){
+warn "C methylated in CpG context:\t${percent_meCpG}%\n";
+print REPORT "C methylated in CpG context:\t${percent_meCpG}%\n";
+}
+else{
+warn "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
+print REPORT "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
+}
+### printing methylated C percentage (CHG context) if applicable
+if ($percent_meCHG){
+warn "C methylated in CHG context:\t${percent_meCHG}%\n";
+print REPORT "C methylated in CHG context:\t${percent_meCHG}%\n";
+}
+else{
+warn "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
+print REPORT "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
+}
+### printing methylated C percentage (CHH context) if applicable
+if ($percent_meCHH){
+warn "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+}
+else{
+warn "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
+print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
+}
+if ($seqID_contains_tabs){
+warn "The sequence IDs in the provided file contain tab-stops which might prevent sequence alignments. If this happened, please replace all tab characters within the seqID field with spaces before running Bismark.\n\n";
+print REPORT "The sequence IDs in the provided file contain tab-stops which might prevent sequence alignments. If this happened, please replace all tab characters within the seqID field with spaces before running Bismark.\n\n";
+}
+}
+sub print_final_analysis_report_paired_ends{
+my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+### All sequences from the original sequence file have been analysed now, therefore deleting temporary C->T or G->A infiles
+if ($directional){
+if ($G_to_A_infile_2){
+my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1","$temp_dir$G_to_A_infile_2";
+if ($deletion_successful == 2){
+	warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile_1 and $temp_dir$G_to_A_infile_2\n\n";
+}
+else{
+	warn "Could not delete temporary files $temp_dir$C_to_T_infile_1 and $temp_dir$G_to_A_infile_2 properly: $!\n";
+}
+}
+else{ # for paired-end FastQ infiles with Bowtie1 there is only one file to delete
+my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1";
+if ($deletion_successful == 1){
+	warn "\nSuccessfully deleted the temporary file $temp_dir$C_to_T_infile_1\n\n";
+}
+else{
+	warn "Could not delete temporary file $temp_dir$C_to_T_infile_1 properly: $!\n";
+}
+}
+}
+else{
+if ($G_to_A_infile_2 and $C_to_T_infile_2){
+my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1","$temp_dir$G_to_A_infile_1","$temp_dir$C_to_T_infile_2","$temp_dir$G_to_A_infile_2";
+if ($deletion_successful == 4){
+	warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile_1, $temp_dir$G_to_A_infile_1, $temp_dir$C_to_T_infile_2 and $temp_dir$G_to_A_infile_2\n\n";
+}
+else{
+	warn "Could not delete temporary files properly: $!\n";
+}
+}
+else{ # for paired-end FastQ infiles with Bowtie1 there are only two files to delete
+my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1","$temp_dir$G_to_A_infile_1";
+if ($deletion_successful == 2){
+	warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile_1 and $temp_dir$G_to_A_infile_1\n\n";
+}
+else{
+	warn "Could not delete temporary files properly: $!\n";
+}
+}
+}
+### printing a final report for the alignment procedure
+warn "Final Alignment report\n",'='x22,"\n";
+print REPORT "Final Alignment report\n",'='x22,"\n";
+#  foreach my $index (0..$#fhs){
+#    print "$fhs[$index]->{name}\n";
+#    print "$fhs[$index]->{seen}\talignments on the correct strand in total\n";
+#    print "$fhs[$index]->{wrong_strand}\talignments were discarded (nonsensical alignments)\n\n";
+#  }
+### printing a final report for the methylation call procedure
+warn "Sequence pairs analysed in total:\t$counting{sequences_count}\n";
+print REPORT "Sequence pairs analysed in total:\t$counting{sequences_count}\n";
+my $percent_alignable_sequence_pairs;
+if ($counting{sequences_count} == 0){
+$percent_alignable_sequence_pairs = 0;
+}
+else{
+$percent_alignable_sequence_pairs = sprintf ("%.1f",$counting{unique_best_alignment_count}*100/$counting{sequences_count});
+}
+print "Number of paired-end alignments with a unique best hit:\t$counting{unique_best_alignment_count}\nMapping efficiency:\t${percent_alignable_sequence_pairs}%\n\n";
+print REPORT "Number of paired-end alignments with a unique best hit:\t$counting{unique_best_alignment_count}\nMapping efficiency:\t${percent_alignable_sequence_pairs}% \n";
+print "Sequence pairs with no alignments under any condition:\t$counting{no_single_alignment_found}\n";
+print "Sequence pairs did not map uniquely:\t$counting{unsuitable_sequence_count}\n";
+print "Sequence pairs which were discarded because genomic sequence could not be extracted:\t$counting{genomic_sequence_could_not_be_extracted_count}\n\n";
+print "Number of sequence pairs with unique best (first) alignment came from the bowtie output:\n";
+print join ("\n","CT/GA/CT:\t$counting{CT_GA_CT_count}\t((converted) top strand)","GA/CT/CT:\t$counting{GA_CT_CT_count}\t(complementary to (converted) top strand)","GA/CT/GA:\t$counting{GA_CT_GA_count}\t(complementary to (converted) bottom strand)","CT/GA/GA:\t$counting{CT_GA_GA_count}\t((converted) bottom strand)"),"\n\n";
+print REPORT "Sequence pairs with no alignments under any condition:\t$counting{no_single_alignment_found}\n";
+print REPORT "Sequence pairs did not map uniquely:\t$counting{unsuitable_sequence_count}\n";
+print REPORT "Sequence pairs which were discarded because genomic sequence could not be extracted:\t$counting{genomic_sequence_could_not_be_extracted_count}\n\n";
+print REPORT "Number of sequence pairs with unique best (first) alignment came from the bowtie output:\n";
+print REPORT join ("\n","CT/GA/CT:\t$counting{CT_GA_CT_count}\t((converted) top strand)","GA/CT/CT:\t$counting{GA_CT_CT_count}\t(complementary to (converted) top strand)","GA/CT/GA:\t$counting{GA_CT_GA_count}\t(complementary to (converted) bottom strand)","CT/GA/GA:\t$counting{CT_GA_GA_count}\t((converted) bottom strand)"),"\n\n";
+### detailed information about Cs analysed
+if ($directional){
+print "Number of alignments to (merely theoretical) complementary strands being rejected in total:\t$counting{alignments_rejected_count}\n\n";
+print REPORT "Number of alignments to (merely theoretical) complementary strands being rejected in total:\t$counting{alignments_rejected_count}\n\n";
+}
+warn "Final Cytosine Methylation Report\n",'='x33,"\n";
+print REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";
+my $total_number_of_C = $counting{total_meCHG_count}+ $counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
+warn "Total number of C's analysed:\t$total_number_of_C\n\n";
+warn "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
+warn "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
+warn "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
+warn "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+warn "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+warn "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";
+print REPORT "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
+print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
+print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
+print REPORT "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+print REPORT "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+print REPORT "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+my $percent_meCHG;
+if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
+$percent_meCHG = sprintf("%.1f",100*$counting{total_meCHG_count}/($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}));
+}
+my $percent_meCHH;
+if (($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}) > 0){
+$percent_meCHH = sprintf("%.1f",100*$counting{total_meCHH_count}/($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}));
+}
+my $percent_meCpG;
+if (($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}) > 0){
+$percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
+}
+### printing methylated CpG percentage if applicable
+if ($percent_meCpG){
+warn "C methylated in CpG context:\t${percent_meCpG}%\n";
+print REPORT "C methylated in CpG context:\t${percent_meCpG}%\n";
+}
+else{
+warn "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
+print REPORT "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
+}
+### printing methylated C percentage in CHG context if applicable
+if ($percent_meCHG){
+warn "C methylated in CHG context:\t${percent_meCHG}%\n";
+print REPORT "C methylated in CHG context:\t${percent_meCHG}%\n";
+}
+else{
+warn "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
+print REPORT "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
+}
+### printing methylated C percentage in CHH context if applicable
+if ($percent_meCHH){
+warn "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+}
+else{
+warn "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
+print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
+}
+}
+sub process_single_end_fastA_file_for_methylation_call{
+my ($sequence_file,$C_to_T_infile,$G_to_A_infile) = @_;
+### this is a FastA sequence file; we need the actual sequence to compare it against the genomic sequence in order to make a methylation call.
+### Now reading in the sequence file sequence by sequence and see if the current sequence was mapped to one (or both) of the converted genomes in either
+### the C->T or G->A version
+### gzipped version of the infile
+if ($sequence_file =~ /\.gz$/){
+open (IN,"zcat $sequence_file |") or die $!;
+}
+else{
+open (IN,$sequence_file) or die $!;
+}
+my $count = 0;
+warn "\nReading in the sequence file $sequence_file\n";
+while (1) {
+# last if ($counting{sequences_count} > 100);
+my $identifier = <IN>;
+my $sequence = <IN>;
+last unless ($identifier and $sequence);
+$identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
+++$count;
+if ($skip){
+next unless ($count > $skip);
+}
+if ($upto){
+last if ($count > $upto);
+}
+$counting{sequences_count}++;
+if ($counting{sequences_count}%100000==0) {
+warn "Processed $counting{sequences_count} sequences so far\n";
+}
+chomp $sequence;
+chomp $identifier;
+$identifier =~ s/^>//; # deletes the > at the beginning of FastA headers
+my $return;
+if ($bowtie2){
+$return = check_bowtie_results_single_end_bowtie2 (uc$sequence,$identifier);
+}
+else{
+$return = check_bowtie_results_single_end(uc$sequence,$identifier); # default Bowtie 1
+}
+unless ($return){
+$return = 0;
+}
+# print the sequence to ambiguous.out if --ambiguous was specified
+if ($ambiguous and $return == 2){
+print AMBIG ">$identifier\n";
+print AMBIG "$sequence\n";
+}
+# print the sequence to <unmapped.out> file if --un was specified
+elsif ($unmapped and $return == 1){
+print UNMAPPED ">$identifier\n";
+print UNMAPPED "$sequence\n";
+}
+}
+print "Processed $counting{sequences_count} sequences in total\n\n";
+print_final_analysis_report_single_end($C_to_T_infile,$G_to_A_infile);
+}
+sub process_single_end_fastQ_file_for_methylation_call{
+my ($sequence_file,$C_to_T_infile,$G_to_A_infile) = @_;
+### this is the Illumina sequence file; we need the actual sequence to compare it against the genomic sequence in order to make a methylation call.
+### Now reading in the sequence file sequence by sequence and see if the current sequence was mapped to one (or both) of the converted genomes in either
+### the C->T or G->A version
+### gzipped version of the infile
+if ($sequence_file =~ /\.gz$/){
+open (IN,"zcat $sequence_file |") or die $!;
+}
+else{
+open (IN,$sequence_file) or die $!;
+}
+my $count = 0;
+warn "\nReading in the sequence file $sequence_file\n";
+while (1) {
+my $identifier = <IN>;
+my $sequence = <IN>;
+my $identifier_2 = <IN>;
+my $quality_value = <IN>;
+last unless ($identifier and $sequence and $identifier_2 and $quality_value);
+$identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
+++$count;
+if ($skip){
+next unless ($count > $skip);
+}
+if ($upto){
+last if ($count > $upto);
+}
+$counting{sequences_count}++;
+if ($counting{sequences_count}%1000000==0) {
+warn "Processed $counting{sequences_count} sequences so far\n";
+}
+chomp $sequence;
+chomp $identifier;
+chomp $quality_value;
+$identifier =~ s/^\@//;  # deletes the @ at the beginning of Illumin FastQ headers
+my $return;
+if ($bowtie2){
+$return = check_bowtie_results_single_end_bowtie2 (uc$sequence,$identifier,$quality_value);
+}
+else{
+$return = check_bowtie_results_single_end(uc$sequence,$identifier,$quality_value); # default Bowtie 1
+}
+unless ($return){
+$return = 0;
+}
+# print the sequence to ambiguous.out if --ambiguous was specified
+if ($ambiguous and $return == 2){
+print AMBIG "\@$identifier\n";
+print AMBIG "$sequence\n";
+print AMBIG $identifier_2;
+print AMBIG "$quality_value\n";
+}
+# print the sequence to <unmapped.out> file if --un was specified
+elsif ($unmapped and $return == 1){
+print UNMAPPED "\@$identifier\n";
+print UNMAPPED "$sequence\n";
+print UNMAPPED $identifier_2;
+print UNMAPPED "$quality_value\n";
+}
+}
+print "Processed $counting{sequences_count} sequences in total\n\n";
+print_final_analysis_report_single_end($C_to_T_infile,$G_to_A_infile);
+}
+sub process_fastA_files_for_paired_end_methylation_calls{
+my ($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+### Processing the two FastA sequence files; we need the actual sequences of both reads to compare them against the genomic sequence in order to
+### make a methylation call. The sequence idetifier per definition needs to be the same for a sequence pair used for paired-end mapping.
+### Now reading in the sequence files sequence by sequence and see if the current sequences produced an alignment to one (or both) of the
+### converted genomes (either the C->T or G->A version)
+### gzipped version of the infiles
+if ($sequence_file_1 =~ /\.gz$/ and $sequence_file_2 =~ /\.gz$/){
+open (IN1,"zcat $sequence_file_1 |") or die "Failed to open zcat pipe to $sequence_file_1 $!\n";
+open (IN2,"zcat $sequence_file_2 |") or die "Failed to open zcat pipe to $sequence_file_2 $!\n";
+}
+else{
+open (IN1,$sequence_file_1) or die $!;
+open (IN2,$sequence_file_2) or die $!;
+}
+warn "\nReading in the sequence files $sequence_file_1 and $sequence_file_2\n";
+### Both files are required to have the exact same number of sequences, therefore we can process the sequences jointly one by one
+my $count = 0;
+while (1) {
+# reading from the first input file
+my $identifier_1 = <IN1>;
+my $sequence_1 = <IN1>;
+# reading from the second input file
+my $identifier_2 = <IN2>;
+my $sequence_2 = <IN2>;
+last unless ($identifier_1 and $sequence_1 and $identifier_2 and $sequence_2);
+$identifier_1 = fix_IDs($identifier_1); # this is to avoid problems with truncated read ID when they contain white spaces
+$identifier_2 = fix_IDs($identifier_2);
+++$count;
+if ($skip){
+next unless ($count > $skip);
+}
+if ($upto){
+last if ($count > $upto);
+}
+$counting{sequences_count}++;
+if ($counting{sequences_count}%100000==0) {
+warn "Processed $counting{sequences_count} sequences so far\n";
+}
+my $orig_identifier_1 = $identifier_1;
+my $orig_identifier_2 = $identifier_2;
+chomp $sequence_1;
+chomp $identifier_1;
+chomp $sequence_2;
+chomp $identifier_2;
+$identifier_1 =~ s/^>//; # deletes the > at the beginning of FastA headers
+my $return;
+if ($bowtie2){
+$return = check_bowtie_results_paired_ends_bowtie2 (uc$sequence_1,uc$sequence_2,$identifier_1);
+}
+else{
+$return = check_bowtie_results_paired_ends (uc$sequence_1,uc$sequence_2,$identifier_1);
+}
+unless ($return){
+$return = 0;
+}
+# print the sequences to ambiguous_1 and _2 if --ambiguous was specified
+if ($ambiguous and $return == 2){
+print AMBIG_1 $orig_identifier_1;
+print AMBIG_1 "$sequence_1\n";
+print AMBIG_2 $orig_identifier_2;
+print AMBIG_2 "$sequence_2\n";
+}
+# print the sequences to unmapped_1.out and unmapped_2.out if --un was specified
+elsif ($unmapped and $return == 1){
+print UNMAPPED_1 $orig_identifier_1;
+print UNMAPPED_1 "$sequence_1\n";
+print UNMAPPED_2 $orig_identifier_2;
+print UNMAPPED_2 "$sequence_2\n";
+}
+}
+warn "Processed $counting{sequences_count} sequences in total\n\n";
+close OUT or die $!;
+print_final_analysis_report_paired_ends($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+}
+sub process_fastQ_files_for_paired_end_methylation_calls{
+my ($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+### Processing the two Illumina sequence files; we need the actual sequence of both reads to compare them against the genomic sequence in order to
+### make a methylation call. The sequence identifier per definition needs to be same for a sequence pair used for paired-end alignments.
+### Now reading in the sequence files sequence by sequence and see if the current sequences produced a paired-end alignment to one (or both)
+### of the converted genomes (either C->T or G->A version)
+### gzipped version of the infiles
+if ($sequence_file_1 =~ /\.gz$/ and $sequence_file_2 =~ /\.gz$/){
+open (IN1,"zcat $sequence_file_1 |") or die "Failed to open zcat pipe to $sequence_file_1 $!\n";
+open (IN2,"zcat $sequence_file_2 |") or die "Failed to open zcat pipe to $sequence_file_2 $!\n";
+}
+else{
+open (IN1,$sequence_file_1) or die $!;
+open (IN2,$sequence_file_2) or die $!;
+}
+my $count = 0;
+warn "\nReading in the sequence files $sequence_file_1 and $sequence_file_2\n";
+### Both files are required to have the exact same number of sequences, therefore we can process the sequences jointly one by one
+while (1) {
+# reading from the first input file
+my $identifier_1 = <IN1>;
+my $sequence_1 = <IN1>;
+my $ident_1 = <IN1>;         # not needed
+my $quality_value_1 = <IN1>; # not needed
+# reading from the second input file
+my $identifier_2 = <IN2>;
+my $sequence_2 = <IN2>;
+my $ident_2 = <IN2>;         # not needed
+my $quality_value_2 = <IN2>; # not needed
+last unless ($identifier_1 and $sequence_1 and $quality_value_1 and $identifier_2 and $sequence_2 and $quality_value_2);
+$identifier_1 = fix_IDs($identifier_1); # this is to avoid problems with truncated read ID when they contain white spaces
+$identifier_2 = fix_IDs($identifier_2);
+++$count;
+if ($skip){
+next unless ($count > $skip);
+}
+if ($upto){
+last if ($count > $upto);
+}
+$counting{sequences_count}++;
+if ($counting{sequences_count}%100000==0) {
+warn "Processed $counting{sequences_count} sequences so far\n";
+}
+my $orig_identifier_1 = $identifier_1;
+my $orig_identifier_2 = $identifier_2;
+chomp $sequence_1;
+chomp $identifier_1;
+chomp $sequence_2;
+chomp $identifier_2;
+chomp $quality_value_1;
+chomp $quality_value_2;
+$identifier_1 =~ s/^\@//;  # deletes the @ at the beginning of the FastQ ID
+my $return;
+if ($bowtie2){
+$return = check_bowtie_results_paired_ends_bowtie2 (uc$sequence_1,uc$sequence_2,$identifier_1,$quality_value_1,$quality_value_2);
+}
+else{
+$return = check_bowtie_results_paired_ends (uc$sequence_1,uc$sequence_2,$identifier_1,$quality_value_1,$quality_value_2);
+}
+unless ($return){
+$return = 0;
+}
+# print the sequences to ambiguous_1 and _2 if --ambiguous was specified
+if ($ambiguous and $return == 2){
+# seq_1
+print AMBIG_1 $orig_identifier_1;
+print AMBIG_1 "$sequence_1\n";
+print AMBIG_1 $ident_1;
+print AMBIG_1 "$quality_value_1\n";
+	# seq_2
+print AMBIG_2 $orig_identifier_2;
+print AMBIG_2 "$sequence_2\n";
+print AMBIG_2 $ident_2;
+print AMBIG_2 "$quality_value_2\n";
+}
+# print the sequences to unmapped_1.out and unmapped_2.out if --un was specified
+elsif ($unmapped and $return == 1){
+# seq_1
+print UNMAPPED_1 $orig_identifier_1;
+print UNMAPPED_1 "$sequence_1\n";
+print UNMAPPED_1 $ident_1;
+print UNMAPPED_1 "$quality_value_1\n";
+# seq_2
+print UNMAPPED_2 $orig_identifier_2;
+print UNMAPPED_2 "$sequence_2\n";
+print UNMAPPED_2 $ident_2;
+print UNMAPPED_2 "$quality_value_2\n";
+}
+}
+warn "Processed $counting{sequences_count} sequences in total\n\n";
+close OUT or die $!;
+print_final_analysis_report_paired_ends($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+}
+sub check_bowtie_results_single_end{
+my ($sequence,$identifier,$quality_value) = @_;
+unless ($quality_value){ # FastA sequences get assigned a quality value of Phred 40 throughout
+$quality_value = 'I'x(length$sequence);
+}
+my %mismatches = ();
+### reading from the bowtie output files to see if this sequence aligned to a bisulfite converted genome
+foreach my $index (0..$#fhs){
+### skipping this index if the last alignment has been set to undefined already (i.e. end of bowtie output)
+next unless ($fhs[$index]->{last_line} and defined $fhs[$index]->{last_seq_id});
+### if the sequence we are currently looking at produced an alignment we are doing various things with it
+if ($fhs[$index]->{last_seq_id} eq $identifier) {
+###############################################################
+### STEP I Now processing the alignment stored in last_line ###
+###############################################################
+my $valid_alignment_found_1 = decide_whether_single_end_alignment_is_valid($index,$identifier);
+### sequences can fail at this point if there was only 1 seq in the wrong orientation, or if there were 2 seqs, both in the wrong orientation
+### we only continue to extract useful information about this alignment if 1 was returned
+if ($valid_alignment_found_1 == 1){
+	### Bowtie outputs which made it this far are in the correct orientation, so we can continue to analyse the alignment itself
+	### need to extract the chromosome number from the bowtie output (which is either XY_cf (complete forward) or XY_cr (complete reverse)
+	my ($id,$strand,$mapped_chromosome,$position,$bowtie_sequence,$mismatch_info) = (split (/\t/,$fhs[$index]->{last_line},-1))[0,1,2,3,4,7];
+	unless($mismatch_info){
+	  $mismatch_info = '';
+	}
+	chomp $mismatch_info;
+	my $chromosome;
+	if ($mapped_chromosome =~ s/_(CT|GA)_converted$//){
+	  $chromosome = $mapped_chromosome;
+	}
+	else{
+	  die "Chromosome number extraction failed for $mapped_chromosome\n";
+	}
+	### Now extracting the number of mismatches to the converted genome
+	my $number_of_mismatches;
+	if ($mismatch_info eq ''){
+	  $number_of_mismatches = 0;
+	}
+	elsif ($mismatch_info =~ /^\d/){
+	  my @mismatches = split (/,/,$mismatch_info);
+	  $number_of_mismatches = scalar @mismatches;
+	}
+	else{
+	  die "Something weird is going on with the mismatch field:\t>>> $mismatch_info <<<\n";
+	}
+	### creating a composite location variable from $chromosome and $position and storing the alignment information in a temporary hash table
+	my $alignment_location = join (":",$chromosome,$position);
+	### If a sequence aligns to exactly the same location twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+	### strand) were methylated and therefore protected. It is not needed to overwrite the same positional entry with a second entry for the same
+	### location (the genomic sequence extraction and methylation would not be affected by this, only the thing which would change is the index
+	### number for the found alignment)
+	unless (exists $mismatches{$number_of_mismatches}->{$alignment_location}){
+	  $mismatches{$number_of_mismatches}->{$alignment_location}->{seq_id}=$id;
+	  $mismatches{$number_of_mismatches}->{$alignment_location}->{bowtie_sequence}=$bowtie_sequence;
+	  $mismatches{$number_of_mismatches}->{$alignment_location}->{index}=$index;
+	  $mismatches{$number_of_mismatches}->{$alignment_location}->{chromosome}=$chromosome;
+	  $mismatches{$number_of_mismatches}->{$alignment_location}->{position}=$position;
+	}
+	$number_of_mismatches = undef;
+	##################################################################################################################################################
+	### STEP II Now reading in the next line from the bowtie filehandle. The next alignment can either be a second alignment of the same sequence or a
+	### a new sequence. In either case we will store the next line in @fhs ->{last_line}. In case the alignment is already the next entry, a 0 will
+	### be returned as $valid_alignment_found and it will then be processed in the next round only.
+	##################################################################################################################################################
+	my $newline = $fhs[$index]->{fh}-> getline();
+	if ($newline){
+	  my ($seq_id) = split (/\t/,$newline);
+	  $fhs[$index]->{last_seq_id} = $seq_id;
+	  $fhs[$index]->{last_line} = $newline;
+	}
+	else {
+	  # assigning undef to last_seq_id and last_line and jumping to the next index (end of bowtie output)
+	  $fhs[$index]->{last_seq_id} = undef;
+	  $fhs[$index]->{last_line} = undef;
+	  next;
+	}
+	my $valid_alignment_found_2 = decide_whether_single_end_alignment_is_valid($index,$identifier);
+	### we only continue to extract useful information about this second alignment if 1 was returned
+	if ($valid_alignment_found_2 == 1){
+	  ### If the second Bowtie output made it this far it is in the correct orientation, so we can continue to analyse the alignment itself
+	  ### need to extract the chromosome number from the bowtie output (which is either XY_cf (complete forward) or XY_cr (complete reverse)
+	  my ($id,$strand,$mapped_chromosome,$position,$bowtie_sequence,$mismatch_info) = (split (/\t/,$fhs[$index]->{last_line},-1))[0,1,2,3,4,7];
+	  unless($mismatch_info){
+	    $mismatch_info = '';
+	  }
+	  chomp $mismatch_info;
+	  my $chromosome;
+	  if ($mapped_chromosome =~ s/_(CT|GA)_converted$//){
+	    $chromosome = $mapped_chromosome;
+	  }
+	  else{
+	    die "Chromosome number extraction failed for $mapped_chromosome\n";
+	  }
+	  ### Now extracting the number of mismatches to the converted genome
+	  my $number_of_mismatches;
+	  if ($mismatch_info eq ''){
+	    $number_of_mismatches = 0;
+	  }
+	  elsif ($mismatch_info =~ /^\d/){
+	    my @mismatches = split (/,/,$mismatch_info);
+	    $number_of_mismatches = scalar @mismatches;
+	  }
+	  else{
+	    die "Something weird is going on with the mismatch field\n";
+	  }
+	  ### creating a composite location variable from $chromosome and $position and storing the alignment information in a temporary hash table
+	  ### extracting the chromosome number from the bowtie output (see above)
+	  my $alignment_location = join (":",$chromosome,$position);
+	  ### In the special case that two differently converted sequences align against differently converted genomes, but to the same position
+	  ### with the same number of mismatches (or perfect matches), the chromosome, position and number of mismatches are the same. In this
+	  ### case we are not writing the same entry out a second time.
+	  unless (exists $mismatches{$number_of_mismatches}->{$alignment_location}){
+	    $mismatches{$number_of_mismatches}->{$alignment_location}->{seq_id}=$id;
+	    $mismatches{$number_of_mismatches}->{$alignment_location}->{bowtie_sequence}=$bowtie_sequence;
+	    $mismatches{$number_of_mismatches}->{$alignment_location}->{index}=$index;
+	    $mismatches{$number_of_mismatches}->{$alignment_location}->{chromosome}=$chromosome;
+	    $mismatches{$number_of_mismatches}->{$alignment_location}->{position}=$position;
+	  }
+	  ####################################################################################################################################
+	  #### STEP III Now reading in one more line which has to be the next alignment to be analysed. Adding it to @fhs ->{last_line}    ###
+	  ####################################################################################################################################
+	  $newline = $fhs[$index]->{fh}-> getline();
+	  if ($newline){
+	    my ($seq_id) = split (/\t/,$newline);
+	    die "The same seq ID occurred more than twice in a row\n" if ($seq_id eq $identifier);
+	    $fhs[$index]->{last_seq_id} = $seq_id;
+	    $fhs[$index]->{last_line} = $newline;
+	    next;
+	  }
+	  else {
+	    # assigning undef to last_seq_id and last_line and jumping to the next index (end of bowtie output)
+	    $fhs[$index]->{last_seq_id} = undef;
+	    $fhs[$index]->{last_line} = undef;
+	    next;
+	  }
+	  ### still within the 2nd sequence in correct orientation found
+	}
+	### still withing the 1st sequence in correct orientation found
+}
+### still within the if (last_seq_id eq identifier) condition
+}
+### still within foreach index loop
+}
+### if there was not a single alignment found for a certain sequence we will continue with the next sequence in the sequence file
+unless(%mismatches){
+$counting{no_single_alignment_found}++;
+if ($unmapped){
+return 1; ### We will print this sequence out as unmapped sequence if --un unmapped.out has been specified
+}
+else{
+return;
+}
+}
+#######################################################################################################################################################
+#######################################################################################################################################################
+### We are now looking if there is a unique best alignment for a certain sequence. This means we are sorting in ascending order and look at the     ###
+### sequence with the lowest amount of mismatches. If there is only one single best position we are going to store the alignment information in the ###
+### meth_call variables, if there are multiple hits with the same amount of (lowest) mismatches we are discarding the sequence altogether           ###
+#######################################################################################################################################################
+#######################################################################################################################################################
+### Going to use the variable $sequence_fails as a 'memory' if a sequence could not be aligned uniquely (set to 1 then)
+my $sequence_fails = 0;
+### Declaring an empty hash reference which will store all information we need for the methylation call
+my $methylation_call_params; # hash reference!
+### sorting in ascending order
+foreach my $mismatch_number (sort {$a<=>$b} keys %mismatches){
+### if there is only 1 entry in the hash with the lowest number of mismatches we accept it as the best alignment
+if (scalar keys %{$mismatches{$mismatch_number}} == 1){
+for my $unique_best_alignment (keys %{$mismatches{$mismatch_number}}){
+	$methylation_call_params->{$identifier}->{bowtie_sequence} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{bowtie_sequence};
+	$methylation_call_params->{$identifier}->{chromosome} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{chromosome};
+	$methylation_call_params->{$identifier}->{position} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{position};
+	$methylation_call_params->{$identifier}->{index} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{index};
+	$methylation_call_params->{$identifier}->{number_of_mismatches} = $mismatch_number;
+}
+}
+elsif (scalar keys %{$mismatches{$mismatch_number}} == 3){
+### If there are 3 sequences with the same number of lowest mismatches we can discriminate 2 cases: (i) all 3 alignments are unique best hits and
+### come from different alignments processes (== indices) or (ii) one sequence alignment (== index) will give a unique best alignment, whereas a
+### second one will produce 2 (or potentially many) alignments for the same sequence but in a different conversion state or against a different genome
+### version (or both). This becomes especially relevant for highly converted sequences in which all Cs have been converted to Ts in the bisulfite
+### reaction. E.g.
+### CAGTCACGCGCGCGCG will become
+### TAGTTATGTGTGTGTG in the CT transformed version, which will ideally still give the correct alignment in the CT->CT alignment condition.
+### If the same read will then become G->A transformed as well however, the resulting sequence will look differently and potentially behave
+### differently in a GA->GA alignment and this depends on the methylation state of the original sequence!:
+### G->A conversion:
+### highly methylated: CAATCACACACACACA
+### highly converted : TAATTATATATATATA <== this sequence has a reduced complexity (only 2 bases left and not 3), and it is more likely to produce
+### an alignment with a low complexity genomic region than the one above. This would normally lead to the entire sequence being kicked out as the
+### there will be 3 alignments with the same number of lowest mismatches!! This in turn means that highly methylated and thereby not converted
+### sequences are more likely to pass the alignment step, thereby creating a bias for methylated reads compared to their non-methylated counterparts.
+### We do not want any bias, whatsover. Therefore if we have 1 sequence producing a unique best alignment and the second and third conditions
+### producing alignments only after performing an additional (theoretical) conversion we want to keep the best alignment with the lowest number of
+### additional transliterations performed. Thus we want to have a look at the level of complexity of the sequences producing the alignment.
+### In the above example the number of transliterations required to transform the actual sequence
+### to the C->T version would be TAGTTATGTGTGTGTG -> TAGTTATGTGTGTGTG = 0; (assuming this gives the correct alignment)
+### in the G->A case it would be TAGTTATGTGTGTGTG -> TAATTATATATATATA = 6; (assuming this gives multiple wrong alignments)
+### if the sequence giving a unique best alignment required a lower number of transliterations than the second best sequence yielding alignments
+### while requiring a much higher number of transliterations, we are going to accept the unique best alignment with the lowest number of performed
+### transliterations. As a threshold which does scale we will start with the number of tranliterations of the lowest best match x 2 must still be
+### smaller than the number of tranliterations of the second best sequence. Everything will be flagged with $sequence_fails = 1 and discarded.
+my @three_candidate_seqs;
+foreach my $composite_location (keys (%{$mismatches{$mismatch_number}}) ){
+	my $transliterations_performed;
+	if ($mismatches{$mismatch_number}->{$composite_location}->{index} == 0 or $mismatches{$mismatch_number}->{$composite_location}->{index} == 1){
+	  $transliterations_performed = determine_number_of_transliterations_performed($sequence,'CT');
+	}
+	elsif ($mismatches{$mismatch_number}->{$composite_location}->{index} == 2 or $mismatches{$mismatch_number}->{$composite_location}->{index} == 3){
+	  $transliterations_performed = determine_number_of_transliterations_performed($sequence,'GA');
+	}
+	else{
+	  die "unexpected index number range $!\n";
+	}
+	push @three_candidate_seqs,{
+				    index =>$mismatches{$mismatch_number}->{$composite_location}->{index},
+				    bowtie_sequence => $mismatches{$mismatch_number}->{$composite_location}->{bowtie_sequence},
+				    mismatch_number => $mismatch_number,
+				    chromosome => $mismatches{$mismatch_number}->{$composite_location}->{chromosome},
+				    position => $mismatches{$mismatch_number}->{$composite_location}->{position},
+				    seq_id => $mismatches{$mismatch_number}->{$composite_location}->{seq_id},
+				    transliterations_performed => $transliterations_performed,
+				   };
+}
+### sorting in ascending order for the lowest number of transliterations performed
+@three_candidate_seqs = sort {$a->{transliterations_performed} <=> $b->{transliterations_performed}} @three_candidate_seqs;
+my $first_array_element = $three_candidate_seqs[0]->{transliterations_performed};
+my $second_array_element = $three_candidate_seqs[1]->{transliterations_performed};
+my $third_array_element = $three_candidate_seqs[2]->{transliterations_performed};
+# print "$first_array_element\t$second_array_element\t$third_array_element\n";
+if (($first_array_element*2) < $second_array_element){
+	$counting{low_complexity_alignments_overruled_count}++;
+	### taking the index with the unique best hit and over ruling low complexity alignments with 2 hits
+	$methylation_call_params->{$identifier}->{bowtie_sequence} = $three_candidate_seqs[0]->{bowtie_sequence};
+	$methylation_call_params->{$identifier}->{chromosome} = $three_candidate_seqs[0]->{chromosome};
+	$methylation_call_params->{$identifier}->{position} = $three_candidate_seqs[0]->{position};
+	$methylation_call_params->{$identifier}->{index} = $three_candidate_seqs[0]->{index};
+	$methylation_call_params->{$identifier}->{number_of_mismatches} = $mismatch_number;
+	# print "Overruled low complexity alignments! Using $first_array_element and disregarding $second_array_element and $third_array_element\n";
+}
+else{
+	$sequence_fails = 1;
+}
+}
+else{
+$sequence_fails = 1;
+}
+### after processing the alignment with the lowest number of mismatches we exit
+last;
+}
+### skipping the sequence completely if there were multiple alignments with the same amount of lowest mismatches found at different positions
+if ($sequence_fails == 1){
+$counting{unsuitable_sequence_count}++;
+if ($ambiguous){
+return 2; # => exits to next sequence, and prints it out to multiple_alignments.out if --ambiguous has been specified
+}
+if ($unmapped){
+return 1; # => exits to next sequence, and prints it out to unmapped.out if --un has been specified
+}
+else{
+return 0; # => exits to next sequence (default)
+}
+}
+### --DIRECTIONAL
+### If the option --directional has been specified the user wants to consider only alignments to the original top strand or the original bottom strand. We will therefore
+### discard all alignments to strands complementary to the original strands, as they should not exist in reality due to the library preparation protocol
+if ($directional){
+if ( ($methylation_call_params->{$identifier}->{index} == 2) or ($methylation_call_params->{$identifier}->{index} == 3) ){
+#    warn "Alignment rejected! (index was: $methylation_call_params->{$identifier}->{index})\n";
+$counting{alignments_rejected_count}++;
+return 0;
+}
+}
+### If the sequence has not been rejected so far it will have a unique best alignment
+$counting{unique_best_alignment_count}++;
+if ($pbat){
+extract_corresponding_genomic_sequence_single_end_pbat($identifier,$methylation_call_params);
+}
+else{
+extract_corresponding_genomic_sequence_single_end($identifier,$methylation_call_params);
+}
+### check test to see if the genomic sequence we extracted has the same length as the observed sequence+2, and only then we perform the methylation call
+if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence}) != length($sequence)+2){
+warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{position}\n";
+$counting{genomic_sequence_could_not_be_extracted_count}++;
+return 0;
+}
+### otherwise we are set to perform the actual methylation call
+$methylation_call_params->{$identifier}->{methylation_call} = methylation_call($identifier,$sequence,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence},$methylation_call_params->{$identifier}->{read_conversion});
+print_bisulfite_mapping_result_single_end($identifier,$sequence,$methylation_call_params,$quality_value);
+return 0; ## otherwise 1 will be returned by default, which would print the sequence to unmapped.out
+}
+sub check_bowtie_results_single_end_bowtie2{
+my ($sequence,$identifier,$quality_value) = @_;
+unless ($quality_value){ # FastA sequences get assigned a quality value of Phred 40 throughout
+$quality_value = 'I'x(length$sequence);
+}
+# as of version Bowtie 2 2.0.0 beta7, when input reads are unpaired, Bowtie 2 no longer removes the trailing /1 or /2 from the read name.
+# $identifier =~ s/\/[1234567890]+$//; # some sequencers don't just have /1 or /2 at the end of read IDs
+# print "sequence $sequence\nid $identifier\nquality: '$quality_value'\n";
+my $alignment_ambiguous = 0;
+my %alignments = ();
+### reading from the Bowtie 2 output filehandles
+foreach my $index (0..$#fhs){
+#  print "Index: $index\n";
+#   print "$fhs[$index]->{last_line}\n";
+#   print "$fhs[$index]->{last_seq_id}\n";
+# sleep (1);
+### skipping this index if the last alignment has been set to undefined already (i.e. end of bowtie output)
+next unless ($fhs[$index]->{last_line} and defined $fhs[$index]->{last_seq_id});
+### if the sequence we are currently looking at produced an alignment we are doing various things with it
+# print "last seq id: $fhs[$index]->{last_seq_id} and identifier: $identifier\n";
+if ($fhs[$index]->{last_seq_id} eq $identifier) {
+#  SAM format specifications for Bowtie 2
+#  (1) Name of read that aligned
+#  (2) Sum of all applicable flags. Flags relevant to Bowtie are:
+#        1 The read is one of a pair
+#        2 The alignment is one end of a proper paired-end alignment
+#        4 The read has no reported alignments
+#        8 The read is one of a pair and has no reported alignments
+#       16 The alignment is to the reverse reference strand
+#       32 The other mate in the paired-end alignment is aligned to the reverse reference strand
+#       64 The read is mate 1 in a pair
+#      128 The read is mate 2 in a pair
+#      256 The read has multiple mapping states
+#  (3) Name of reference sequence where alignment occurs (unmapped reads have a *)
+#  (4) 1-based offset into the forward reference strand where leftmost character of the alignment occurs (0 for unmapped reads)
+#  (5) Mapping quality (255 means MAPQ is not available)
+#  (6) CIGAR string representation of alignment (* if unavailable)
+#  (7) Name of reference sequence where mate's alignment occurs. Set to = if the mate's reference sequence is the same as this alignment's, or * if there is no mate.
+#  (8) 1-based offset into the forward reference strand where leftmost character of the mate's alignment occurs. Offset is 0 if there is no mate.
+#  (9) Inferred fragment size. Size is negative if the mate's alignment occurs upstream of this alignment. Size is 0 if there is no mate.
+# (10) Read sequence (reverse-complemented if aligned to the reverse strand)
+# (11) ASCII-encoded read qualities (reverse-complemented if the read aligned to the reverse strand). The encoded quality values are on the Phred quality scale and the encoding is ASCII-offset by 33 (ASCII char !), similarly to a FASTQ file.
+# (12) Optional fields. Fields are tab-separated. bowtie2 outputs zero or more of these optional fields for each alignment, depending on the type of the alignment:
+# AS:i:<N> Alignment score. Can be negative. Can be greater than 0 in --local mode (but not in --end-to-end mode). Only present if SAM record is for an aligned read.
+# XS:i:<N> Alignment score for second-best alignment. Can be negative. Can be greater than 0 in --local mode (but not in --end-to-end mode). Only present if the SAM record is for an aligned read and more than one alignment was found for the read.
+# YS:i:<N> Alignment score for opposite mate in the paired-end alignment. Only present if the SAM record is for a read that aligned as part of a paired-end alignment.
+# XN:i:<N> The number of ambiguous bases in the reference covering this alignment. Only present if SAM record is for an aligned read.
+# XM:i:<N> The number of mismatches in the alignment. Only present if SAM record is for an aligned read.
+# XO:i:<N> The number of gap opens, for both read and reference gaps, in the alignment. Only present if SAM record is for an aligned read.
+# XG:i:<N> The number of gap extensions, for both read and reference gaps, in the alignment. Only present if SAM record is for an aligned read.
+# NM:i:<N> The edit distance; that is, the minimal number of one-nucleotide edits (substitutions, insertions and deletions) needed to transform the read string into the reference string. Only present if SAM record is for an aligned read.
+# YF:Z:<N> String indicating reason why the read was filtered out. See also: Filtering. Only appears for reads that were filtered out.
+# MD:Z:<S> A string representation of the mismatched reference bases in the alignment. See SAM format specification for details. Only present if SAM record is for an aligned read.
+my ($id,$flag,$mapped_chromosome,$position,$mapping_quality,$cigar,$bowtie_sequence,$qual) = (split (/\t/,$fhs[$index]->{last_line}))[0,1,2,3,4,5,9,10];
+### If a sequence has no reported alignments there will be a single output line with a bit-wise flag value of 4. We can store the next alignment and move on to the next Bowtie 2 instance
+if ($flag == 4){
+	## reading in the next alignment, which must be the next sequence
+	my $newline = $fhs[$index]->{fh}-> getline();
+	if ($newline){
+	  chomp $newline;
+	  my ($seq_id) = split (/\t/,$newline);
+	  $fhs[$index]->{last_seq_id} = $seq_id;
+	  $fhs[$index]->{last_line} = $newline;
+	  if ($seq_id eq $identifier){
+	    die "Sequence with ID $identifier did not produce any alignment, but next seq-ID was also $fhs[$index]->{last_seq_id}!\n";
+	  }
+	  next; # next instance
+	}
+	else{
+	  # assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
+	  $fhs[$index]->{last_seq_id} = undef;
+	  $fhs[$index]->{last_line} = undef;
+	  next;
+	}
+}
+# if there are one or more proper alignments we can extract the chromosome number
+my $chromosome;
+if ($mapped_chromosome =~ s/_(CT|GA)_converted$//){
+	$chromosome = $mapped_chromosome;
+}
+else{
+	die "Chromosome number extraction failed for $mapped_chromosome\n";
+}
+### We will use the optional field to determine the best alignment. Later on we extract the number of mismatches and/or indels from the CIGAR string
+my ($alignment_score,$second_best,$MD_tag);
+my @fields = split (/\t/,$fhs[$index]->{last_line});
+foreach (11..$#fields){
+	if ($fields[$_] =~ /AS:i:(.*)/){
+	  $alignment_score = $1;
+	}
+	elsif ($fields[$_] =~ /XS:i:(.*)/){
+	  $second_best = $1;
+	}
+	elsif ($fields[$_] =~ /MD:Z:(.*)/){
+	  $MD_tag = $1;
+	}
+}
+#      warn "First  best alignment_score is: '$alignment_score'\n";
+#     warn "MD tag is: '$MD_tag'\n";
+die "Failed to extract alignment score ($alignment_score) and MD tag ($MD_tag)!\n" unless (defined $alignment_score and defined $MD_tag);
+if (defined $second_best){
+	#	warn "second best alignment_score is: '$second_best'\n\n";
+	# If the first alignment score is the same as the alignment score of the second best hit we are going to boot this sequence altogether
+	if ($alignment_score == $second_best){
+	  $alignment_ambiguous = 1;
+	  ## need to read and discard all additional ambiguous reads until we reach the next sequence
+	  until ($fhs[$index]->{last_seq_id} ne $identifier){
+	    my $newline = $fhs[$index]->{fh}-> getline();
+	    if ($newline){
+	      chomp $newline;
+	      my ($seq_id) = split (/\t/,$newline);
+	      $fhs[$index]->{last_seq_id} = $seq_id;
+	      $fhs[$index]->{last_line} = $newline;
+	    }
+	    else{
+	      # assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
+	      $fhs[$index]->{last_seq_id} = undef;
+	      $fhs[$index]->{last_line} = undef;
+	      last; # break free in case we have reached the end of the alignment output
+	    }
+	  }
+	  #  warn "Index: $index\tThe current Seq-ID is $identifier, skipped all ambiguous sequences until the next ID which is: $fhs[$index]->{last_seq_id}\n";
+	}
+	else{ # the next best alignment has a lower alignment score than the current read, so we can safely store the current alignment
+	  my $alignment_location = join (":",$chromosome,$position);
+	  ### If a sequence aligns to exactly the same location with a perfect match twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+	  ### strand) were methylated and therefore protected. Alternatively it will align better in one condition than in the other. In any case, it is not needed to overwrite
+	  ### the same positional entry with a second entry for the same location, as the genomic sequence extraction and methylation call would not be affected by this. The only
+	  ### thing which would change is the index number for the found alignment). We will continue to assign these alignments to the first indexes 0 and 1, i.e. OT and OB
+	  unless (exists $alignments{$alignment_location}){
+	    $alignments{$alignment_location}->{seq_id} = $id;
+	    $alignments{$alignment_location}->{alignment_score} = $alignment_score;
+	    $alignments{$alignment_location}->{bowtie_sequence} = $bowtie_sequence;
+	    $alignments{$alignment_location}->{index} = $index;
+	    $alignments{$alignment_location}->{chromosome} = $chromosome;
+	    $alignments{$alignment_location}->{position} = $position;
+	    $alignments{$alignment_location}->{CIGAR} = $cigar;
+	    $alignments{$alignment_location}->{MD_tag} = $MD_tag;
+	  }
+	  ### now reading and discarding all (inferior) alignments of this sequencing read until we hit the next sequence
+	  until ($fhs[$index]->{last_seq_id} ne $identifier){
+	    my $newline = $fhs[$index]->{fh}-> getline();
+	    if ($newline){
+	      chomp $newline;
+	      my ($seq_id) = split (/\t/,$newline);
+	      $fhs[$index]->{last_seq_id} = $seq_id;
+	      $fhs[$index]->{last_line} = $newline;
+	    }
+	    else{
+	      # assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
+	      $fhs[$index]->{last_seq_id} = undef;
+	      $fhs[$index]->{last_line} = undef;
+	      last; # break free in case we have reached the end of the alignment output
+	    }
+	  }
+	  #  warn "Index: $index\tThe current Seq-ID is $identifier, skipped all ambiguous sequences until the next ID which is: $fhs[$index]->{last_seq_id}\n";
+	}
+}
+else{ # there is no second best hit, so we can just store this one and read in the next sequence
+	my $alignment_location = join (":",$chromosome,$position);
+	### If a sequence aligns to exactly the same location with a perfect match twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+	### strand) were methylated and therefore protected. Alternatively it will align better in one condition than in the other. In any case, it is not needed to overwrite
+	### the same positional entry with a second entry for the same location, as the genomic sequence extraction and methylation call would not be affected by this. The only
+	### thing which would change is the index number for the found alignment). We will continue to assign these alignments to the first indexes 0 and 1, i.e. OT and OB
+	unless (exists $alignments{$alignment_location}){
+	  $alignments{$alignment_location}->{seq_id} = $id;
+	  $alignments{$alignment_location}->{alignment_score} = $alignment_score;
+	  $alignments{$alignment_location}->{bowtie_sequence} = $bowtie_sequence;
+	  $alignments{$alignment_location}->{index} = $index;
+	  $alignments{$alignment_location}->{chromosome} = $chromosome;
+	  $alignments{$alignment_location}->{position} = $position;
+	  $alignments{$alignment_location}->{MD_tag} = $MD_tag;
+	  $alignments{$alignment_location}->{CIGAR} = $cigar;
+	}
+	my $newline = $fhs[$index]->{fh}-> getline();
+	if ($newline){
+	  chomp $newline;
+	  my ($seq_id) = split (/\t/,$newline);
+	  $fhs[$index]->{last_seq_id} = $seq_id;
+	  $fhs[$index]->{last_line} = $newline;
+	  if ($seq_id eq $identifier){
+	    die "Sequence with ID $identifier did not have a second best alignment, but next seq-ID was also $fhs[$index]->{last_seq_id}!\n";
+	  }
+	}
+	else{
+	  # assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
+	  $fhs[$index]->{last_seq_id} = undef;
+	  $fhs[$index]->{last_line} = undef;
+	}
+}
+}
+}
+### if the read produced several ambiguous alignments already now can returning already now. If --ambiguous or --unmapped was specified the read sequence will be printed out.
+if ($alignment_ambiguous == 1){
+$counting{unsuitable_sequence_count}++;
+### report that the sequence has multiple hits with bitwise flag 256. We can print the sequence to the result file straight away and skip everything else
+# my $ambiguous_read_output = join("\t",$identifier,'256','*','0','0','*','*','0','0',$sequence,$quality_value);
+# print "$ambiguous_read_output\n";
+if ($ambiguous){
+return 2; # => exits to next sequence, and prints it out to _ambiguous_reads.txt if '--ambiguous' was specified
+}
+elsif ($unmapped){
+return 1; # => exits to next sequence, and prints it out to _unmapped_reads.txt if '--unmapped' but not '--ambiguous' was specified
+}
+else{
+return 0;
+}
+}
+### if there was no alignment found for a certain sequence at all we continue with the next sequence in the sequence file
+unless(%alignments){
+$counting{no_single_alignment_found}++;
+# my $unmapped_read_output = join("\t",$identifier,'4','*','0','0','*','*','0','0',$sequence,$quality_value);
+# print  "$unmapped_read_output\n";
+if ($unmapped){
+return 1; # => exits to next sequence, and prints it out to _unmapped_reads.txt if '--unmapped' was specified
+}
+else{
+return 0; # default
+}
+}
+#######################################################################################################################################################
+### If the sequence was not rejected so far we are now looking if there is a unique best alignment among all alignment instances. If there is only one
+### single best position we are going to store the alignment information in the $meth_call variable. If there are multiple hits with the same (highest)
+### alignment score we are discarding the sequence altogether.
+### For end-to-end alignments the maximum alignment score can be 0, each mismatch can receive penalties up to 6, and each gap receives penalties for
+### opening (5) and extending (3 per bp) the gap.
+#######################################################################################################################################################
+my $methylation_call_params; # hash reference which will store all information we need for the methylation call
+my $sequence_fails = 0; # Going to use $sequence_fails as a 'memory' if a sequence could not be aligned uniquely (set to 1 then)
+### print contents of %alignments for debugging
+#   if (scalar keys %alignments > 1){
+#     print "\n******\n";
+#     foreach my $alignment_location (sort {$a cmp $b} keys %alignments){
+#       print "Loc:  $alignment_location\n";
+#       print "ID:   $alignments{$alignment_location}->{seq_id}\n";
+#       print "AS:   $alignments{$alignment_location}->{alignment_score}\n";
+#       print "Seq:  $alignments{$alignment_location}->{bowtie_sequence}\n";
+#       print "Index $alignments{$alignment_location}->{index}\n";
+#       print "Chr:  $alignments{$alignment_location}->{chromosome}\n";
+#       print "pos:  $alignments{$alignment_location}->{position}\n";
+#       print "MD:   $alignments{$alignment_location}->{MD_tag}\n\n";
+#     }
+#     print "\n******\n";
+#   }
+### if there is only 1 entry in the hash with we accept it as the best alignment
+if (scalar keys %alignments == 1){
+for my $unique_best_alignment (keys %alignments){
+$methylation_call_params->{$identifier}->{bowtie_sequence} = $alignments{$unique_best_alignment}->{bowtie_sequence};
+$methylation_call_params->{$identifier}->{chromosome}      = $alignments{$unique_best_alignment}->{chromosome};
+$methylation_call_params->{$identifier}->{position}        = $alignments{$unique_best_alignment}->{position};
+$methylation_call_params->{$identifier}->{index}           = $alignments{$unique_best_alignment}->{index};
+$methylation_call_params->{$identifier}->{alignment_score} = $alignments{$unique_best_alignment}->{alignment_score};
+$methylation_call_params->{$identifier}->{MD_tag}          = $alignments{$unique_best_alignment}->{MD_tag};
+$methylation_call_params->{$identifier}->{CIGAR}           = $alignments{$unique_best_alignment}->{CIGAR};
+}
+}
+### otherwise we are going to find out if there is a best match among the multiple alignments, or whether there are 2 or more equally good alignments (in which case
+### we boot the sequence altogether
+elsif (scalar keys %alignments >= 2  and scalar keys %alignments <= 4){
+my $best_alignment_score;
+my $best_alignment_location;
+foreach my $alignment_location (sort {$alignments{$b}->{alignment_score} <=> $alignments{$a}->{alignment_score}} keys %alignments){
+# print "$alignments{$alignment_location}->{alignment_score}\n";
+unless (defined $best_alignment_score){
+	$best_alignment_score = $alignments{$alignment_location}->{alignment_score};
+	$best_alignment_location = $alignment_location;
+	# print "setting best alignment score: $best_alignment_score\n";
+}
+else{
+	### if the second best alignment has the same alignment score as the first one, the sequence will get booted
+	if ($alignments{$alignment_location}->{alignment_score} == $best_alignment_score){
+	  # warn "Same alignment score, the sequence will get booted!\n";
+	  $sequence_fails = 1;
+	  last; # exiting after the second alignment since we know that the sequence has ambiguous alignments
+	}
+	### else we are going to store the best alignment for further processing
+	else{
+	  $methylation_call_params->{$identifier}->{bowtie_sequence} = $alignments{$best_alignment_location}->{bowtie_sequence};
+	  $methylation_call_params->{$identifier}->{chromosome}      = $alignments{$best_alignment_location}->{chromosome};
+	  $methylation_call_params->{$identifier}->{position}        = $alignments{$best_alignment_location}->{position};
+	  $methylation_call_params->{$identifier}->{index}           = $alignments{$best_alignment_location}->{index};
+	  $methylation_call_params->{$identifier}->{alignment_score} = $alignments{$best_alignment_location}->{alignment_score};
+	  $methylation_call_params->{$identifier}->{MD_tag}          = $alignments{$best_alignment_location}->{MD_tag};
+	  $methylation_call_params->{$identifier}->{CIGAR}           = $alignments{$best_alignment_location}->{CIGAR};
+	  last; # exiting after processing the second alignment since the sequence produced a unique best alignment
+	}
+}
+}
+}
+else{
+die "There are too many potential hits for this sequence (1-4 expected, but found: ",scalar keys %alignments,")\n";;
+}
+### skipping the sequence completely if there were multiple alignments with the same best alignment score at different positions
+if ($sequence_fails == 1){
+$counting{unsuitable_sequence_count}++;
+### report that the sequence has multiple hits with bitwise flag 256. We can print the sequence to the result file straight away and skip everything else
+# my $ambiguous_read_output = join("\t",$identifier,'256','*','0','0','*','*','0','0',$sequence,$quality_value);
+# print OUT "$ambiguous_read_output\n";
+if ($ambiguous){
+return 2; # => exits to next sequence, and prints it out (in FastQ format) to _ambiguous_reads.txt if '--ambiguous' was specified
+}
+elsif ($unmapped){
+return 1; # => exits to next sequence, and prints it out (in FastQ format) to _unmapped_reads.txt if '--unmapped' but not '--ambiguous' was specified
+}
+else{
+return 0; # => exits to next sequence (default)
+}
+}
+### --DIRECTIONAL
+### If the option --directional has been specified the user wants to consider only alignments to the original top strand or the original bottom strand. We will therefore
+### discard all alignments to strands complementary to the original strands, as they should not exist in reality due to the library preparation protocol
+if ($directional){
+if ( ($methylation_call_params->{$identifier}->{index} == 2) or ($methylation_call_params->{$identifier}->{index} == 3) ){
+# warn "Alignment rejected! (index was: $methylation_call_params->{$identifier}->{index})\n";
+$counting{alignments_rejected_count}++;
+return 0;
+}
+}
+### If the sequence has not been rejected so far it has a unique best alignment
+$counting{unique_best_alignment_count}++;
+### Now we need to extract a genomic sequence that exactly corresponds to the reported alignment. This potentially means that we need to deal with insertions or deletions as well
+extract_corresponding_genomic_sequence_single_end_bowtie2 ($identifier,$methylation_call_params);
+### check test to see if the genomic sequence we extracted has the same length as the observed sequence+2, and only then we perform the methylation call
+if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence}) != length($sequence)+2){
+warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{position}\n";
+$counting{genomic_sequence_could_not_be_extracted_count}++;
+return 0;
+}
+### otherwise we are set to perform the actual methylation call
+$methylation_call_params->{$identifier}->{methylation_call} = methylation_call($identifier,$sequence,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence},$methylation_call_params->{$identifier}->{read_conversion});
+print_bisulfite_mapping_result_single_end_bowtie2 ($identifier,$sequence,$methylation_call_params,$quality_value);
+return 0; ## if a sequence got this far we do not want to print it to unmapped or ambiguous.out
+}
+sub determine_number_of_transliterations_performed{
+my ($sequence,$read_conversion) = @_;
+my $number_of_transliterations;
+if ($read_conversion eq 'CT'){
+$number_of_transliterations = $sequence =~ tr/C/T/;
+}
+elsif ($read_conversion eq 'GA'){
+$number_of_transliterations = $sequence =~ tr/G/A/;
+}
+else{
+die "Read conversion mode of the read was not specified $!\n";
+}
+return $number_of_transliterations;
+}
+sub decide_whether_single_end_alignment_is_valid{
+my ($index,$identifier) = @_;
+# extracting from Bowtie 1 format
+my ($id,$strand) = (split (/\t/,$fhs[$index]->{last_line}))[0,1];
+### ensuring that the entry is the correct sequence
+if (($id eq $fhs[$index]->{last_seq_id}) and ($id eq $identifier)){
+### checking the orientation of the alignment. We need to discriminate between 8 different conditions, however only 4 of them are theoretically
+### sensible alignments
+my $orientation = ensure_sensical_alignment_orientation_single_end ($index,$strand);
+### If the orientation was correct can we move on
+if ($orientation == 1){
+return 1; ### 1st possibility for a sequence to pass
+}
+### If the alignment was in the wrong orientation we need to read in a new line
+elsif($orientation == 0){
+my $newline = $fhs[$index]->{fh}->getline();
+if ($newline){
+		($id,$strand) = (split (/\t/,$newline))[0,1];
+	### ensuring that the next entry is still the correct sequence
+	if ($id eq $identifier){
+	  ### checking orientation again
+	  $orientation = ensure_sensical_alignment_orientation_single_end ($index,$strand);
+	  ### If the orientation was correct can we move on
+	  if ($orientation == 1){
+	    $fhs[$index]->{last_seq_id} = $id;
+	    $fhs[$index]->{last_line} = $newline;
+	    return 1; ### 2nd possibility for a sequence to pass
+	  }
+	  ### If the alignment was in the wrong orientation again we need to read in yet another new line and store it in @fhs
+	  elsif ($orientation == 0){
+	    $newline = $fhs[$index]->{fh}->getline();
+	    if ($newline){
+	      my ($seq_id) = split (/\t/,$newline);
+	      ### check if the next line still has the same seq ID (must not happen), and if not overwrite the current seq-ID and bowtie output with
+	      ### the same fields of the just read next entry
+	      die "Same seq ID 3 or more times in a row!(should be 2 max) $!" if ($seq_id eq $identifier);
+	      $fhs[$index]->{last_seq_id} = $seq_id;
+	      $fhs[$index]->{last_line} = $newline;
+	      return 0; # not processing anything this round as the alignment currently stored in last_line was in the wrong orientation
+	    }
+	    else{
+	      # assigning undef to last_seq_id and last_line (end of bowtie output)
+	      $fhs[$index]->{last_seq_id} = undef;
+	      $fhs[$index]->{last_line} = undef;
+	      return 0; # not processing anything as the alignment currently stored in last_line was in the wrong orientation
+	    }
+	  }
+	  else{
+	    die "The orientation of the alignment must be either correct or incorrect\n";
+	  }
+	}
+	### the sequence we just read in is already the next sequence to be analysed -> store it in @fhs
+	else{
+	  $fhs[$index]->{last_seq_id} = $id;
+	  $fhs[$index]->{last_line} = $newline;
+	  return 0; # processing the new alignment result only in the next round
+	}
+}
+else {
+	# assigning undef to last_seq_id and last_line (end of bowtie output)
+	$fhs[$index]->{last_seq_id} = undef;
+	$fhs[$index]->{last_line} = undef;
+	return 0; # not processing anything as the alignment currently stored in last_line was in the wrong orientation
+}
+}
+else{
+die "The orientation of the alignment must be either correct or incorrect\n";
+}
+}
+### the sequence stored in @fhs as last_line is already the next sequence to be analysed -> analyse next round
+else{
+return 0;
+}
+}
+#########################
+### BOWTIE 1 | PAIRED-END
+#########################
+sub check_bowtie_results_paired_ends{
+my ($sequence_1,$sequence_2,$identifier,$quality_value_1,$quality_value_2) = @_;
+### quality values are not given for FastA files, so they are initialised with a Phred quality of 40
+unless ($quality_value_1){
+$quality_value_1 = 'I'x(length$sequence_1);
+}
+unless ($quality_value_2){
+$quality_value_2 = 'I'x(length$sequence_2);
+}
+#  warn "$identifier\n$fhs[0]->{last_seq_id}\n$fhs[1]->{last_seq_id}\n$fhs[2]->{last_seq_id}\n$fhs[3]->{last_seq_id}\n\n";
+#  sleep (1);
+my %mismatches = ();
+### reading from the bowtie output files to see if this sequence pair aligned to a bisulfite converted genome
+### for paired end reads we are reporting alignments to the OT strand first (index 0), then the OB strand (index 3!!), similiar to the single end way.
+### alignments to the complementary strands are reported afterwards (CTOT got index 1, and CTOB got index 2).
+### This is needed so that alignments which either contain no single C or G or reads which contain only protected Cs are reported to the original strands (OT and OB)
+### Before the complementary strands. Remember that it does not make any difference for the methylation calls, but it will matter if alignment to the complementary
+### strands are not being reported by specifying --directional
+foreach my $index (0,3,1,2){
+### skipping this index if the last alignment has been set to undefined already (i.e. end of bowtie output)
+next unless ($fhs[$index]->{last_line_1} and $fhs[$index]->{last_line_2} and defined $fhs[$index]->{last_seq_id});
+### if the sequence pair we are currently looking at produced an alignment we are doing various things with it
+if ($fhs[$index]->{last_seq_id} eq $identifier) {
+# print "$identifier\n$fhs[$index]->{last_seq_id}\n\n";
+##################################################################################
+### STEP I Processing the entry which is stored in last_line_1 and last_line_2 ###
+##################################################################################
+my $valid_alignment_found = decide_whether_paired_end_alignment_is_valid($index,$identifier);
+### sequences can fail at this point if there was only 1 alignment in the wrong orientation, or if there were 2 aligments both in the wrong
+### orientation. We only continue to extract useful information about this alignment if 1 was returned
+if ($valid_alignment_found == 1){
+	### Bowtie outputs which made it this far are in the correct orientation, so we can continue to analyse the alignment itself.
+	### we store the useful information in %mismatches
+	my ($id_1,$strand_1,$mapped_chromosome_1,$position_1,$bowtie_sequence_1,$mismatch_info_1) = (split (/\t/,$fhs[$index]->{last_line_1},-1))[0,1,2,3,4,7];
+	my ($id_2,$strand_2,$mapped_chromosome_2,$position_2,$bowtie_sequence_2,$mismatch_info_2) = (split (/\t/,$fhs[$index]->{last_line_2},-1))[0,1,2,3,4,7];
+	chomp $mismatch_info_1;
+	chomp $mismatch_info_2;
+	### need to extract the chromosome number from the bowtie output (which is either XY_CT_converted or XY_GA_converted
+	my ($chromosome_1,$chromosome_2);
+	if ($mapped_chromosome_1 =~ s/_(CT|GA)_converted$//){
+	  $chromosome_1 = $mapped_chromosome_1;
+	}
+	else{
+	  die "Chromosome number extraction failed for $mapped_chromosome_1\n";
+	}
+	if ($mapped_chromosome_2 =~ s/_(CT|GA)_converted$//){
+	  $chromosome_2 = $mapped_chromosome_2;
+	}
+	else{
+	  die "Chromosome number extraction failed for $mapped_chromosome_2\n";
+	}
+	### Now extracting the number of mismatches to the converted genome
+	my $number_of_mismatches_1;
+	my $number_of_mismatches_2;
+	if ($mismatch_info_1 eq ''){
+	  $number_of_mismatches_1 = 0;
+	}
+	elsif ($mismatch_info_1 =~ /^\d/){
+	  my @mismatches = split (/,/,$mismatch_info_1);
+	  $number_of_mismatches_1 = scalar @mismatches;
+	}
+	else{
+	  die "Something weird is going on with the mismatch field\n";
+	}
+	if ($mismatch_info_2 eq ''){
+	  $number_of_mismatches_2 = 0;
+	}
+	elsif ($mismatch_info_2 =~ /^\d/){
+	  my @mismatches = split (/,/,$mismatch_info_2);
+	  $number_of_mismatches_2 = scalar @mismatches;
+	}
+	else{
+	  die "Something weird is going on with the mismatch field\n";
+	}
+	### To decide whether a sequence pair has a unique best alignment we will look at the lowest sum of mismatches from both alignments
+	my $sum_of_mismatches = $number_of_mismatches_1+$number_of_mismatches_2;
+	### creating a composite location variable from $chromosome and $position and storing the alignment information in a temporary hash table
+	die "Position 1 is higher than position 2" if ($position_1 > $position_2);
+	die "Paired-end alignments need to be on the same chromosome\n" unless ($chromosome_1 eq $chromosome_2);
+	my $alignment_location = join(":",$chromosome_1,$position_1,$position_2);
+	### If a sequence aligns to exactly the same location twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+	### strand) were methylated and therefore protected. It is not needed to overwrite the same positional entry with a second entry for the same
+	### location (the genomic sequence extraction and methylation would not be affected by this, only the thing which would change is the index
+	### number for the found alignment)
+	unless (exists $mismatches{$sum_of_mismatches}->{$alignment_location}){
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{seq_id}=$id_1; # either is fine
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{bowtie_sequence_1}=$bowtie_sequence_1;
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{bowtie_sequence_2}=$bowtie_sequence_2;
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{index}=$index;
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{chromosome}=$chromosome_1; # either is fine
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{start_seq_1}=$position_1;
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{start_seq_2}=$position_2;
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{number_of_mismatches_1} = $number_of_mismatches_1;
+	  $mismatches{$sum_of_mismatches}->{$alignment_location}->{number_of_mismatches_2} = $number_of_mismatches_2;
+	}
+	###################################################################################################################################################
+	### STEP II Now reading in the next 2 lines from the bowtie filehandle. If there are 2 next lines in the alignments filehandle it can either    ###
+	### be a second alignment of the same sequence pair or a new sequence pair. In any case we will just add it to last_line_1 and last_line _2.    ###
+	### If it is the alignment of the next sequence pair, 0 will be returned as $valid_alignment_found, so it will not be processed any further in  ###
+	### this round                                                                                                                                  ###
+	###################################################################################################################################################
+	my $newline_1 = $fhs[$index]->{fh}-> getline();
+	my $newline_2 = $fhs[$index]->{fh}-> getline();
+	if ($newline_1 and $newline_2){
+	  my ($seq_id_1) = split (/\t/,$newline_1);
+	  my ($seq_id_2) = split (/\t/,$newline_2);
+	  if ($seq_id_1 =~ s/\/1$//){ # removing the read /1 tag
+	    $fhs[$index]->{last_seq_id} = $seq_id_1;
+	  }
+	  elsif ($seq_id_2 =~ s/\/1$//){ # removing the read /1 tag
+	    $fhs[$index]->{last_seq_id} = $seq_id_2;
+	  }
+	  else{
+	    die "Either read 1 or read 2 needs to end on '/1'\n";
+	  }
+	  $fhs[$index]->{last_line_1} = $newline_1;
+	  $fhs[$index]->{last_line_2} = $newline_2;
+	}
+	else {
+	  # assigning undef to last_seq_id and both last_lines and jumping to the next index (end of bowtie output)
+	  $fhs[$index]->{last_seq_id} = undef;
+	  $fhs[$index]->{last_line_1} = undef;
+	  $fhs[$index]->{last_line_2} = undef;
+	  next; # jumping to the next index
+	}
+	### Now processing the entry we just stored in last_line_1 and last_line_2
+	$valid_alignment_found = decide_whether_paired_end_alignment_is_valid($index,$identifier);
+	### only processing the alignment further if 1 was returned. 0 will be returned either if the alignment is already the next sequence pair to
+	### be analysed or if it was a second alignment of the current sequence pair but in the wrong orientation
+	if ($valid_alignment_found == 1){
+	  ### we store the useful information in %mismatches
+	  ($id_1,$strand_1,$mapped_chromosome_1,$position_1,$bowtie_sequence_1,$mismatch_info_1) = (split (/\t/,$fhs[$index]->{last_line_1}))[0,1,2,3,4,7];
+	  ($id_2,$strand_2,$mapped_chromosome_2,$position_2,$bowtie_sequence_2,$mismatch_info_2) = (split (/\t/,$fhs[$index]->{last_line_2}))[0,1,2,3,4,7];
+	  chomp $mismatch_info_1;
+	  chomp $mismatch_info_2;
+	  ### need to extract the chromosome number from the bowtie output (which is either _CT_converted or _GA_converted)
+	  if ($mapped_chromosome_1 =~ s/_(CT|GA)_converted$//){
+	    $chromosome_1 = $mapped_chromosome_1;
+	  }
+	  else{
+	    die "Chromosome number extraction failed for $mapped_chromosome_1\n";
+	  }
+	  if ($mapped_chromosome_2 =~ s/_(CT|GA)_converted$//){
+	    $chromosome_2 = $mapped_chromosome_2;
+	  }
+	  else{
+	    die "Chromosome number extraction failed for $mapped_chromosome_2\n";
+	  }
+	  $number_of_mismatches_1='';
+	  $number_of_mismatches_2='';
+	  ### Now extracting the number of mismatches to the converted genome
+	  if ($mismatch_info_1 eq ''){
+	    $number_of_mismatches_1 = 0;
+	  }
+	  elsif ($mismatch_info_1 =~ /^\d/){
+	    my @mismatches = split (/,/,$mismatch_info_1);
+	    $number_of_mismatches_1 = scalar @mismatches;
+	  }
+	  else{
+	    die "Something weird is going on with the mismatch field\n";
+	  }
+	  if ($mismatch_info_2 eq ''){
+	    $number_of_mismatches_2 = 0;
+	  }
+	  elsif ($mismatch_info_2 =~ /^\d/){
+	    my @mismatches = split (/,/,$mismatch_info_2);
+	    $number_of_mismatches_2 = scalar @mismatches;
+	  }
+	  else{
+	    die "Something weird is going on with the mismatch field\n";
+	  }
+	  ### To decide whether a sequence pair has a unique best alignment we will look at the lowest sum of mismatches from both alignments
+	  $sum_of_mismatches = $number_of_mismatches_1+$number_of_mismatches_2;
+	  ### creating a composite location variable from $chromosome and $position and storing the alignment information in a temporary hash table
+	  die "position 1 is greater than position 2" if ($position_1 > $position_2);
+	  die "Paired-end alignments need to be on the same chromosome\n" unless ($chromosome_1 eq $chromosome_2);
+	  $alignment_location = join(":",$chromosome_1,$position_1,$position_2);
+	  ### If a sequence aligns to exactly the same location twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+	  ### strand) were methylated and therefore protected. It is not needed to overwrite the same positional entry with a second entry for the same
+	  ### location (the genomic sequence extraction and methylation would not be affected by this, only the thing which would change is the index
+	  ### number for the found alignment)
+	  unless (exists $mismatches{$sum_of_mismatches}->{$alignment_location}){
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{seq_id}=$id_1; # either is fine
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{bowtie_sequence_1}=$bowtie_sequence_1;
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{bowtie_sequence_2}=$bowtie_sequence_2;
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{index}=$index;
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{chromosome}=$chromosome_1; # either is fine
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{start_seq_1}=$position_1;
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{start_seq_2}=$position_2;
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{number_of_mismatches_1} = $number_of_mismatches_1;
+	    $mismatches{$sum_of_mismatches}->{$alignment_location}->{number_of_mismatches_2} = $number_of_mismatches_2;
+	  }
+	  ###############################################################################################################################################
+	  ### STEP III Now reading in two more lines. These have to be the next entry and we will just add assign them to last_line_1 and last_line_2 ###
+	  ###############################################################################################################################################
+	  $newline_1 = $fhs[$index]->{fh}-> getline();
+	  $newline_2 = $fhs[$index]->{fh}-> getline();
+	  if ($newline_1 and $newline_2){
+	    my ($seq_id_1) = split (/\t/,$newline_1);
+	    my ($seq_id_2) = split (/\t/,$newline_2);
+	    if ($seq_id_1 =~ s/\/1$//){ # removing the read /1 tag
+	      $fhs[$index]->{last_seq_id} = $seq_id_1;
+	    }
+	    if ($seq_id_2 =~ s/\/1$//){ # removing the read /1 tag
+	      $fhs[$index]->{last_seq_id} = $seq_id_2;
+	    }
+	    $fhs[$index]->{last_line_1} = $newline_1;
+	    $fhs[$index]->{last_line_2} = $newline_2;
+	  }
+	  else {
+	    # assigning undef to last_seq_id and both last_lines and jumping to the next index (end of bowtie output)
+	    $fhs[$index]->{last_seq_id} = undef;
+	    $fhs[$index]->{last_line_1} = undef;
+	    $fhs[$index]->{last_line_2} = undef;
+	    next; # jumping to the next index
+	  }
+	  ### within the 2nd sequence pair alignment in correct orientation found
+	}
+	### within the 1st sequence pair alignment in correct orientation found
+}
+### still within the (last_seq_id eq identifier) condition
+}
+### still within foreach index loop
+}
+### if there was no single alignment found for a certain sequence we will continue with the next sequence in the sequence file
+unless(%mismatches){
+$counting{no_single_alignment_found}++;
+return 1; ### We will print this sequence out as unmapped sequence if --un unmapped.out has been specified
+}
+### Going to use the variable $sequence_pair_fails as a 'memory' if a sequence could not be aligned uniquely (set to 1 then)
+my $sequence_pair_fails = 0;
+### Declaring an empty hash reference which will store all information we need for the methylation call
+my $methylation_call_params; # hash reference!
+### We are now looking if there is a unique best alignment for a certain sequence. This means we are sorting in ascending order and look at the
+### sequence with the lowest amount of mismatches. If there is only one single best position we are going to store the alignment information in the
+### meth_call variables, if there are multiple hits with the same amount of (lowest) mismatches we are discarding the sequence altogether
+foreach my $mismatch_number (sort {$a<=>$b} keys %mismatches){
+#dev print "Number of mismatches: $mismatch_number\t$identifier\t$sequence_1\t$sequence_2\n";
+foreach my $entry (keys (%{$mismatches{$mismatch_number}}) ){
+#dev print "$mismatch_number\t$entry\t$mismatches{$mismatch_number}->{$entry}->{index}\n";
+# print join("\t",$mismatch_number,$mismatches{$mismatch_number}->{$entry}->{seq_id},$sequence,$mismatches{$mismatch_number}->{$entry}->{bowtie_sequence},$mismatches{$mismatch_number}->{$entry}->{chromosome},$mismatches{$mismatch_number}->{$entry}->{position},$mismatches{$mismatch_number}->{$entry}->{index}),"\n";
+}
+if (scalar keys %{$mismatches{$mismatch_number}} == 1){
+#  print "Unique best alignment for sequence pair $sequence_1\t$sequence_1\n";
+for my $unique_best_alignment (keys %{$mismatches{$mismatch_number}}){
+	$methylation_call_params->{$identifier}->{seq_id} = $identifier;
+	$methylation_call_params->{$identifier}->{bowtie_sequence_1} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{bowtie_sequence_1};
+	$methylation_call_params->{$identifier}->{bowtie_sequence_2} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{bowtie_sequence_2};
+	$methylation_call_params->{$identifier}->{chromosome} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{chromosome};
+	$methylation_call_params->{$identifier}->{start_seq_1} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{start_seq_1};
+	$methylation_call_params->{$identifier}->{start_seq_2} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{start_seq_2};
+	$methylation_call_params->{$identifier}->{alignment_end} = ($mismatches{$mismatch_number}->{$unique_best_alignment}->{start_seq_2}+length($mismatches{$mismatch_number}->{$unique_best_alignment}->{bowtie_sequence_2}));
+	$methylation_call_params->{$identifier}->{index} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{index};
+	$methylation_call_params->{$identifier}->{number_of_mismatches_1} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{number_of_mismatches_1};
+	$methylation_call_params->{$identifier}->{number_of_mismatches_2} = $mismatches{$mismatch_number}->{$unique_best_alignment}->{number_of_mismatches_2};
+}
+}
+else{
+$sequence_pair_fails = 1;
+}
+### after processing the alignment with the lowest number of mismatches we exit
+last;
+}
+### skipping the sequence completely if there were multiple alignments with the same amount of lowest mismatches found at different positions
+if ($sequence_pair_fails == 1){
+$counting{unsuitable_sequence_count}++;
+if ($ambiguous){
+return 2; # => exits to next sequence pair, and prints both seqs out to multiple_alignments_1 and -2 if --ambiguous has been specified
+}
+if ($unmapped){
+return 1; # => exits to next sequence pair, and prints both seqs out to unmapped_1 and _2  if --un has been specified
+}
+else{
+return 0; # => exits to next sequence (default)
+}
+}
+### --DIRECTIONAL
+### If the option --directional has been specified the user wants to consider only alignments to the original top strand or the original bottom strand. We will therefore
+### discard all alignments to strands complementary to the original strands, as they should not exist in reality due to the library preparation protocol
+if ($directional){
+if ( ($methylation_call_params->{$identifier}->{index} == 1) or ($methylation_call_params->{$identifier}->{index} == 2) ){
+#    warn "Alignment rejected! (index was: $methylation_call_params->{$identifier}->{index})\n";
+$counting{alignments_rejected_count}++;
+return 0;
+}
+}
+### If the sequence has not been rejected so far it does have a unique best alignment
+$counting{unique_best_alignment_count}++;
+extract_corresponding_genomic_sequence_paired_ends($identifier,$methylation_call_params);
+### check test to see if the genomic sequences we extracted has the same length as the observed sequences +2, and only then we perform the methylation call
+if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1}) != length($sequence_1)+2){
+warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{start_seq_1}\n";
+$counting{genomic_sequence_could_not_be_extracted_count}++;
+return 0;
+}
+if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2}) != length($sequence_2)+2){
+warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{start_seq_2}\n";
+$counting{genomic_sequence_could_not_be_extracted_count}++;
+return 0;
+}
+### otherwise we are set to perform the actual methylation call
+$methylation_call_params->{$identifier}->{methylation_call_1} = methylation_call($identifier,$sequence_1,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1},$methylation_call_params->{$identifier}->{read_conversion_1});
+$methylation_call_params->{$identifier}->{methylation_call_2} = methylation_call($identifier,$sequence_2,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2},$methylation_call_params->{$identifier}->{read_conversion_2});
+print_bisulfite_mapping_results_paired_ends($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2);
+return 0; ## otherwise 1 will be returned by default, which would print the sequence pair to unmapped_1 and _2
+}
+#########################
+### BOWTIE 2 | PAIRED-END
+#########################
+sub check_bowtie_results_paired_ends_bowtie2{
+my ($sequence_1,$sequence_2,$identifier,$quality_value_1,$quality_value_2) = @_;
+### quality values are not given for FastA files, so they are initialised with a Phred quality of 40
+unless ($quality_value_1){
+$quality_value_1 = 'I'x(length$sequence_1);
+}
+unless ($quality_value_2){
+$quality_value_2 = 'I'x(length$sequence_2);
+}
+# print "$identifier\n$fhs[0]->{last_seq_id}\n$fhs[1]->{last_seq_id}\n$fhs[2]->{last_seq_id}\n$fhs[3]->{last_seq_id}\n\n";
+my %alignments;
+my $alignment_ambiguous = 0;
+### reading from the Bowtie 2 output filehandles
+### for paired end reads we are reporting alignments to the OT strand first (index 0), then the OB strand (index 3!!), similiar to the single end way.
+### alignments to the complementary strands are reported afterwards (CTOT got index 1, and CTOB got index 2).
+### This is needed so that alignments which either contain no single C or G or reads which contain only protected Cs are reported to the original strands (OT and OB)
+### Before the complementary strands. Remember that it does not make any difference for the methylation calls, but it will matter if alignments to the complementary
+### strands are not being reported when '--directional' is specified
+foreach my $index (0,3,1,2){
+### skipping this index if the last alignment has been set to undefined already (i.e. end of bowtie output)
+next unless ($fhs[$index]->{last_line_1} and $fhs[$index]->{last_line_2} and defined $fhs[$index]->{last_seq_id});
+### if the sequence pair we are currently looking at produced an alignment we are doing various things with it
+if ($fhs[$index]->{last_seq_id} eq $identifier) {
+my ($id_1,$flag_1,$mapped_chromosome_1,$position_1,$mapping_quality_1,$cigar_1,$bowtie_sequence_1,$qual_1) = (split (/\t/,$fhs[$index]->{last_line_1}))[0,1,2,3,4,5,9,10];
+my ($id_2,$flag_2,$mapped_chromosome_2,$position_2,$mapping_quality_2,$cigar_2,$bowtie_sequence_2,$qual_2) = (split (/\t/,$fhs[$index]->{last_line_2}))[0,1,2,3,4,5,9,10];
+#  print "Index: $index\t$fhs[$index]->{last_line_1}\n";
+#  print "Index: $index\t$fhs[$index]->{last_line_2}\n";
+#  print join ("\t",$id_1,$flag_1,$mapped_chromosome_1,$position_1,$mapping_quality_1,$cigar_1,$bowtie_sequence_1,$qual_1),"\n";
+#  print join ("\t",$id_2,$flag_2,$mapped_chromosome_2,$position_2,$mapping_quality_2,$cigar_2,$bowtie_sequence_2,$qual_2),"\n";
+$id_1 =~ s/\/1$//;
+$id_2 =~ s/\/2$//;
+#  SAM format specifications for Bowtie 2
+#  (1) Name of read that aligned
+#  (2) Sum of all applicable flags. Flags relevant to Bowtie are:
+#        1 The read is one of a pair
+#        2 The alignment is one end of a proper paired-end alignment
+#        4 The read has no reported alignments
+#        8 The read is one of a pair and has no reported alignments
+#       16 The alignment is to the reverse reference strand
+#       32 The other mate in the paired-end alignment is aligned to the reverse reference strand
+#       64 The read is mate 1 in a pair
+#      128 The read is mate 2 in a pair
+#      256 The read has multiple mapping states
+#  (3) Name of reference sequence where alignment occurs (unmapped reads have a *)
+#  (4) 1-based offset into the forward reference strand where leftmost character of the alignment occurs (0 for unmapped reads)
+#  (5) Mapping quality (255 means MAPQ is not available)
+#  (6) CIGAR string representation of alignment (* if unavailable)
+#  (7) Name of reference sequence where mate's alignment occurs. Set to = if the mate's reference sequence is the same as this alignment's, or * if there is no mate.
+#  (8) 1-based offset into the forward reference strand where leftmost character of the mate's alignment occurs. Offset is 0 if there is no mate.
+#  (9) Inferred fragment size. Size is negative if the mate's alignment occurs upstream of this alignment. Size is 0 if there is no mate.
+# (10) Read sequence (reverse-complemented if aligned to the reverse strand)
+# (11) ASCII-encoded read qualities (reverse-complemented if the read aligned to the reverse strand). The encoded quality values are on the Phred quality scale and the encoding is ASCII-offset by 33 (ASCII char !), similarly to a FASTQ file.
+# (12) Optional fields. Fields are tab-separated. bowtie2 outputs zero or more of these optional fields for each alignment, depending on the type of the alignment:
+# AS:i:<N> Alignment score. Can be negative. Can be greater than 0 in --local mode (but not in --end-to-end mode). Only present if SAM record is for an aligned read.
+# XS:i:<N> Alignment score for second-best alignment. Can be negative. Can be greater than 0 in --local mode (but not in --end-to-end mode). Only present if the SAM record is for an aligned read and more than one alignment was found for the read.
+# YS:i:<N> Alignment score for opposite mate in the paired-end alignment. Only present if the SAM record is for a read that aligned as part of a paired-end alignment.
+# XN:i:<N> The number of ambiguous bases in the reference covering this alignment. Only present if SAM record is for an aligned read.
+# XM:i:<N> The number of mismatches in the alignment. Only present if SAM record is for an aligned read.
+# XO:i:<N> The number of gap opens, for both read and reference gaps, in the alignment. Only present if SAM record is for an aligned read.
+# XG:i:<N> The number of gap extensions, for both read and reference gaps, in the alignment. Only present if SAM record is for an aligned read.
+# NM:i:<N> The edit distance; that is, the minimal number of one-nucleotide edits (substitutions, insertions and deletions) needed to transform the read string into the reference string. Only present if SAM record is for an aligned read.
+# YF:Z:<N> String indicating reason why the read was filtered out. See also: Filtering. Only appears for reads that were filtered out.
+# MD:Z:<S> A string representation of the mismatched reference bases in the alignment. See SAM format specification for details. Only present if SAM record is for an aligned read.
+### If a sequence has no reported alignments there will be a single output line per sequence with a bit-wise flag value of 77 for read 1 (1+4+8+64), or 141 for read 2 (1+4+8+128).
+### We can store the next alignment and move on to the next Bowtie 2 instance
+if ($flag_1 == 77 and $flag_2 == 141){
+	## reading in the next alignment, which must be the next sequence
+	my $newline_1 = $fhs[$index]->{fh}-> getline();
+	my $newline_2 = $fhs[$index]->{fh}-> getline();
+	if ($newline_1 and $newline_2){
+	  chomp $newline_1;
+	  chomp $newline_2;
+	  my ($seq_id_1) = split (/\t/,$newline_1);
+	  my ($seq_id_2) = split (/\t/,$newline_2);
+	  $seq_id_1 =~ s/\/1$//;
+	  $seq_id_2 =~ s/\/2$//;
+	  $fhs[$index]->{last_seq_id} = $seq_id_1;
+	  $fhs[$index]->{last_line_1} = $newline_1;
+	  $fhs[$index]->{last_line_2} = $newline_2;
+	  #  print "current sequence ($identifier) did not map, reading in next sequence\n";
+	  #  print "$index\t$fhs[$index]->{last_seq_id}\n";
+	  #  print "$index\t$fhs[$index]->{last_line_1}\n";
+	  #  print "$index\t$fhs[$index]->{last_line_2}\n";
+	  next; # next instance
+	}
+	else{
+	  # assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
+	  $fhs[$index]->{last_seq_id} = undef;
+	  $fhs[$index]->{last_line_1} = undef;
+	  $fhs[$index]->{last_line_2} = undef;
+	  next;
+	}
+}
+### If there are one or more proper alignments we can extract the chromosome number
+my ($chromosome_1,$chromosome_2);
+if ($mapped_chromosome_1 =~ s/_(CT|GA)_converted$//){
+	$chromosome_1 = $mapped_chromosome_1;
+}
+else{
+	die "Chromosome number extraction failed for $mapped_chromosome_1\n";
+}
+if ($mapped_chromosome_2 =~ s/_(CT|GA)_converted$//){
+	$chromosome_2 = $mapped_chromosome_2;
+}
+else{
+	die "Chromosome number extraction failed for $mapped_chromosome_2\n";
+}
+die "Paired-end alignments need to be on the same chromosome\n" unless ($chromosome_1 eq $chromosome_2);
+### We will use the optional fields to determine the best alignments. Later on we extract the number of mismatches and/or indels from the CIGAR string
+my ($alignment_score_1,$alignment_score_2,$second_best_1,$second_best_2,$MD_tag_1,$MD_tag_2);
+my @fields_1 = split (/\t/,$fhs[$index]->{last_line_1});
+my @fields_2 = split (/\t/,$fhs[$index]->{last_line_2});
+foreach (11..$#fields_1){
+	if ($fields_1[$_] =~ /AS:i:(.*)/){
+	  $alignment_score_1 = $1;
+	}
+	elsif ($fields_1[$_] =~ /XS:i:(.*)/){
+	  $second_best_1 = $1;
+	}
+	elsif ($fields_1[$_] =~ /MD:Z:(.*)/){
+	  $MD_tag_1 = $1;
+	}
+}
+foreach (11..$#fields_2){
+	if ($fields_2[$_] =~ /AS:i:(.*)/){
+	  $alignment_score_2 = $1;
+	}
+	elsif ($fields_2[$_] =~ /XS:i:(.*)/){
+	  $second_best_2 = $1;
+	}
+	elsif ($fields_2[$_] =~ /MD:Z:(.*)/){
+	  $MD_tag_2 = $1;
+	}
+}
+die "Failed to extract alignment score 1 ($alignment_score_1) and MD tag ($MD_tag_1)!\nlast alignment 1: $fhs[$index]->{last_line_1}\nlast alignment 2: $fhs[$index]->{last_line_2}\n" unless (defined $alignment_score_1 and defined $MD_tag_1);
+die "Failed to extract alignment score 2 ($alignment_score_2) and MD tag ($MD_tag_2)!\nlast alignment 1: $fhs[$index]->{last_line_1}\nlast alignment 2: $fhs[$index]->{last_line_2}\n" unless (defined $alignment_score_2 and defined $MD_tag_2);
+# warn "First read 1 alignment score is: '$alignment_score_1'\n";
+# warn "First read 2 alignment score is: '$alignment_score_2'\n";
+# warn "MD tag 1 is: '$MD_tag_1'\n";
+# warn "MD tag 2 is: '$MD_tag_2'\n";
+### To decide whether a sequence pair has a unique best alignment we will look at the highest sum of alignment scores from both alignments
+my $sum_of_alignment_scores_1 = $alignment_score_1 + $alignment_score_2 ;
+# print "sum of alignment scores: $sum_of_alignment_scores_1\n\n";
+if (defined $second_best_1 and defined $second_best_2){
+	my $sum_of_alignment_scores_second_best = $second_best_1 + $second_best_2;
+	# warn "Second best alignment_score_1 is: '$second_best_1'\n";
+	# warn "Second best alignment_score_2 is: '$second_best_2'\n";
+	# warn "Second best alignment sum of alignment scores is: '$sum_of_alignment_scores_second_best'\n";
+	# If the first alignment score for the first read pair is the same as the alignment score of the second best hit we are going to boot this sequence pair altogether
+	if ($sum_of_alignment_scores_1 == $sum_of_alignment_scores_second_best){
+	  $alignment_ambiguous = 1;
+	  # print "This read will be chucked (AS==XS detected)!\n";
+	  ## need to read and discard all additional ambiguous reads until we reach the next sequence
+	  until ($fhs[$index]->{last_seq_id} ne $identifier){
+	    my $newline_1 = $fhs[$index]->{fh}-> getline();
+	    my $newline_2 = $fhs[$index]->{fh}-> getline();
+	    if ($newline_1 and $newline_2){
+	      chomp $newline_1;
+	      chomp $newline_2;
+	      my ($seq_id_1) = split (/\t/,$newline_1);
+	      my ($seq_id_2) = split (/\t/,$newline_2);
+	      $seq_id_1 =~ s/\/1$//;
+	      $seq_id_2 =~ s/\/2$//;
+	      # print "New Seq IDs:\t$seq_id_1\t$seq_id_2\n";
+	      $fhs[$index]->{last_seq_id} = $seq_id_1;
+	      $fhs[$index]->{last_line_1} = $newline_1;
+	      $fhs[$index]->{last_line_2} = $newline_2;
+		}
+	    else{
+	      # assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
+	      $fhs[$index]->{last_seq_id} = undef;
+	      $fhs[$index]->{last_line_1} = undef;
+	      $fhs[$index]->{last_line_2} = undef;
+	      last; # break free if the end of the alignment output was reached
+	    }
+	  }
+	  #  if ($fhs[$index]->{last_seq_id}){
+	  #    warn "Index: $index\tThis Seq-ID is $identifier, skipped all ambiguous sequences until the next ID which is: $fhs[$index]->{last_seq_id}\n";
+	  #  }
+	}
+	else{ # the next best alignment has a lower alignment score than the current read, so we can safely store the current alignment
+	  my $alignment_location;
+	  if ($position_1 <= $position_2){
+	    $alignment_location = join(":",$chromosome_1,$position_1,$position_2);
+	  }
+	  elsif($position_2 < $position_1){
+	    $alignment_location = join(":",$chromosome_1,$position_2,$position_1);
+	  }
+	  ### If a sequence aligns to exactly the same location twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+	  ### strand) were methylated and therefore protected. Alternatively it will align better in one condition than in the other. In any case, it is not needed to overwrite
+	  ### the same positional entry with a second entry for the same location, as the genomic sequence extraction and methylation call would not be affected by this. The only
+	  ### thing which would change is the index number for the found alignment). We will continue to assign these alignments to the first indexes 0 and 3, i.e. OT and OB
+	  unless (exists $alignments{$alignment_location}){
+	    $alignments{$alignment_location}->{seq_id} = $id_1;
+	    $alignments{$alignment_location}->{alignment_score_1} = $alignment_score_1;
+	    $alignments{$alignment_location}->{alignment_score_2} = $alignment_score_2;
+	    $alignments{$alignment_location}->{sum_of_alignment_scores} = $sum_of_alignment_scores_1;
+	    $alignments{$alignment_location}->{bowtie_sequence_1} = $bowtie_sequence_1;
+	    $alignments{$alignment_location}->{bowtie_sequence_2} = $bowtie_sequence_2;
+	    $alignments{$alignment_location}->{index} = $index;
+	    $alignments{$alignment_location}->{chromosome} = $chromosome_1; # either is fine
+	    $alignments{$alignment_location}->{position_1} = $position_1;
+	    $alignments{$alignment_location}->{position_2} = $position_2;
+	    $alignments{$alignment_location}->{mismatch_info_1} = $MD_tag_1;
+	    $alignments{$alignment_location}->{mismatch_info_2} = $MD_tag_2;
+	    $alignments{$alignment_location}->{CIGAR_1} = $cigar_1;
+	    $alignments{$alignment_location}->{CIGAR_2} = $cigar_2;
+	    $alignments{$alignment_location}->{flag_1} = $flag_1;
+	    $alignments{$alignment_location}->{flag_2} = $flag_2;
+	  }
+	  # warn "added best of several alignments to \%alignments hash\n";
+	  ### now reading and discarding all (inferior) alignments of this read pair until we hit the next sequence
+	  until ($fhs[$index]->{last_seq_id} ne $identifier){
+	    my $newline_1 = $fhs[$index]->{fh}-> getline();
+	    my $newline_2 = $fhs[$index]->{fh}-> getline();
+	    if ($newline_1 and $newline_2){
+	      chomp $newline_1;
+	      chomp $newline_2;
+	      my ($seq_id_1) = split (/\t/,$newline_1);
+	      my ($seq_id_2) = split (/\t/,$newline_2);
+	      $seq_id_1 =~ s/\/1$//;
+	      $seq_id_2 =~ s/\/2$//;
+	      # print "New Seq IDs:\t$seq_id_1\t$seq_id_2\n";
+	      $fhs[$index]->{last_seq_id} = $seq_id_1;
+	      $fhs[$index]->{last_line_1} = $newline_1;
+	      $fhs[$index]->{last_line_2} = $newline_2;
+	    }
+	    else{
+	      # assigning undef to last_seq_id and last_line_1 and _2 and jumping to the next index (end of Bowtie 2 output)
+	      $fhs[$index]->{last_seq_id} = undef;
+	      $fhs[$index]->{last_line_1} = undef;
+	      $fhs[$index]->{last_line_2} = undef;
+	      last; # break free if the end of the alignment output was reached
+	    }
+	  }
+	  # if($fhs[$index]->{last_seq_id}){
+	  #   warn "Index: $index\tThis Seq-ID is $identifier, skipped all other alignments until the next ID was reached which is: $fhs[$index]->{last_seq_id}\n";
+	  # }
+	}
+}
+else{ # there is no second best hit, so we can just store this one and read in the next sequence
+	my $alignment_location = join(":",$chromosome_1,$position_1,$position_2);
+	# print "$alignment_location\n";
+	### If a sequence aligns to exactly the same location with a perfect match twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
+	### strand) were methylated and therefore protected. Alternatively it will align better in one condition than in the other. In any case, it is not needed to overwrite
+	### the same positional entry with a second entry for the same location, as the genomic sequence extraction and methylation call would not be affected by this. The only
+	### thing which would change is the index number for the found alignment). We will continue to assign these alignments to the first indexes 0 and 3, i.e. OT and OB
+	unless (exists $alignments{$alignment_location}){
+	  $alignments{$alignment_location}->{seq_id} = $id_1;
+	  $alignments{$alignment_location}->{alignment_score_1} = $alignment_score_1;
+	  $alignments{$alignment_location}->{alignment_score_2} = $alignment_score_2;
+	  $alignments{$alignment_location}->{sum_of_alignment_scores} = $sum_of_alignment_scores_1;
+	  $alignments{$alignment_location}->{bowtie_sequence_1} = $bowtie_sequence_1;
+	  $alignments{$alignment_location}->{bowtie_sequence_2} = $bowtie_sequence_2;
+	  $alignments{$alignment_location}->{index} = $index;
+	  $alignments{$alignment_location}->{chromosome} = $chromosome_1; # either is fine
+	  $alignments{$alignment_location}->{position_1} = $position_1;
+	  $alignments{$alignment_location}->{position_2} = $position_2;
+	  $alignments{$alignment_location}->{mismatch_info_1} = $MD_tag_1;
+	  $alignments{$alignment_location}->{mismatch_info_2} = $MD_tag_2;
+	  $alignments{$alignment_location}->{CIGAR_1} = $cigar_1;
+	  $alignments{$alignment_location}->{CIGAR_2} = $cigar_2;
+	  $alignments{$alignment_location}->{flag_1} = $flag_1;
+	  $alignments{$alignment_location}->{flag_2} = $flag_2;
+	}
+	# warn "added unique alignment to \%alignments hash\n";
+	# Now reading and storing the next read pair
+	my $newline_1 = $fhs[$index]->{fh}-> getline();
+	my $newline_2 = $fhs[$index]->{fh}-> getline();
+	if ($newline_1 and $newline_2){
+	  chomp $newline_1;
+	  chomp $newline_2;
+	  # print "$newline_1\n";
+	  # print "$newline_2\n";
+	  my ($seq_id_1) = split (/\t/,$newline_1);
+	  my ($seq_id_2) = split (/\t/,$newline_2);
+	  $seq_id_1 =~ s/\/1$//;
+	  $seq_id_2 =~ s/\/2$//;
+	  # print "New Seq IDs:\t$seq_id_1\t$seq_id_2\n";
+	  $fhs[$index]->{last_seq_id} = $seq_id_1;
+	  $fhs[$index]->{last_line_1} = $newline_1;
+	  $fhs[$index]->{last_line_2} = $newline_2;
+	  if ($seq_id_1 eq $identifier){
+	    die "Sequence with ID $identifier did not have a second best alignment, but next seq-ID was also $fhs[$index]->{last_seq_id}!\n";
+	  }
+	}
+	else{
+	  # assigning undef to last_seq_id and last_line_1 and _2 and jumping to the next index (end of Bowtie 2 output)
+	  $fhs[$index]->{last_seq_id} = undef;
+	  $fhs[$index]->{last_line_1} = undef;
+	  $fhs[$index]->{last_line_2} = undef;
+	}
+}
+}
+}
+### if the read produced several ambiguous alignments for a single instance of Bowtie 2 we can return already now. If --ambiguous was specified the read sequence will be printed out in FastQ format
+if ($alignment_ambiguous == 1){
+$counting{unsuitable_sequence_count}++;
+### report that the sequence pair has multiple hits with bitwise flag 256. We can print the sequence to the result file straight away and skip everything else
+#  my $ambiguous_read_1 = join("\t",$identifier.'/1','256','*','0','0','*','*','0','0',$sequence_1,$quality_value_1);
+#  my $ambiguous_read_2 = join("\t",$identifier.'/2','256','*','0','0','*','*','0','0',$sequence_2,$quality_value_2);
+#  print "$ambiguous_read_1\n";
+#  print "$ambiguous_read_2\n";
+if ($ambiguous){
+return 2; # => exits to next sequence pair, and prints it out to _ambiguous_reads_1.txt and _ambiguous_reads_2.txt if '--ambiguous' was specified
+}
+elsif ($unmapped){
+return 1; # => exits to next sequence pair, and prints it out to _unmapped_reads_1.txt and _unmapped_reads_2.txt if '--unmapped' but not '--ambiguous' was specified
+}
+else{
+return 0;
+}
+}
+### if no alignment was found for a certain sequence at all we continue with the next sequence in the sequence file
+unless (%alignments){
+$counting{no_single_alignment_found}++;
+# my $unmapped_read_1 = join("\t",$identifier.'/1','77','*','0','0','*','*','0','0',$sequence_1,$quality_value_1);
+# my $unmapped_read_2 = join("\t",$identifier.'/2','141','*','0','0','*','*','0','0',$sequence_2,$quality_value_2);
+# print "$unmapped_read_1\n";
+# print "$unmapped_read_2\n";
+if ($unmapped){
+return 1; # => exits to next sequence pair, and prints it out to _unmapped_reads_1.txt and _unmapped_read_2.txt if '--unmapped' was specified
+}
+else{
+return 0;
+}
+}
+#######################################################################################################################################################
+### If the sequence pair was not rejected so far we are now looking if there is a unique best alignment among all alignment instances. If there is only one
+### single best position we are going to store the alignment information in the $meth_call variable. If there are multiple hits with the same (highest)
+### alignment score we are discarding the sequence pair altogether.
+### For end-to-end alignments the maximum alignment score is 0, each mismatch receives a penalty of 6, and each gap receives penalties for opening (5)
+### and extending (3 per bp) the gap.
+#######################################################################################################################################################
+### Declaring an empty hash reference which will store all information we need for the methylation call
+my $methylation_call_params; # hash reference
+my $sequence_pair_fails = 0; # using $sequence_pair_fails as a 'memory' if a sequence could not be aligned uniquely (set to 1 then)
+### print contents of %alignments for debugging
+##  if (scalar keys %alignments >= 1){
+#     print "\n******\n";
+#     foreach my $alignment_location (sort {$a cmp $b} keys %alignments){
+#       print "Loc:  $alignment_location\n";
+#       print "ID:      $alignments{$alignment_location}->{seq_id}\n";
+#       print "AS_1:    $alignments{$alignment_location}->{alignment_score_1}\n";
+#       print "AS_2:    $alignments{$alignment_location}->{alignment_score_2}\n";
+#       print "Seq_1:   $alignments{$alignment_location}->{bowtie_sequence_1}\n";
+#       print "Seq_2:   $alignments{$alignment_location}->{bowtie_sequence_2}\n";
+#       print "Index    $alignments{$alignment_location}->{index}\n";
+#       print "Chr:     $alignments{$alignment_location}->{chromosome}\n";
+#       print "Pos_1:   $alignments{$alignment_location}->{position_1}\n";
+#       print "Pos_2:   $alignments{$alignment_location}->{position_2}\n";
+#       print "CIGAR_1: $alignments{$alignment_location}->{CIGAR_1}\n";
+#       print "CIGAR_2: $alignments{$alignment_location}->{CIGAR_2}\n";
+#       print "MD_1:    $alignments{$alignment_location}->{mismatch_info_1}\n";
+#       print "MD_2:    $alignments{$alignment_location}->{mismatch_info_2}\n";
+#       print "Flag 1:  $alignments{$alignment_location}->{flag_1}\n";
+#       print "Flag 2:  $alignments{$alignment_location}->{flag_2}\n";
+#    }
+#    print "\n******\n";
+#  }
+### if there is only 1 entry in the %alignments hash we accept it as the best alignment
+if (scalar keys %alignments == 1){
+for my $unique_best_alignment (keys %alignments){
+$methylation_call_params->{$identifier}->{bowtie_sequence_1} = $alignments{$unique_best_alignment}->{bowtie_sequence_1};
+$methylation_call_params->{$identifier}->{bowtie_sequence_2} = $alignments{$unique_best_alignment}->{bowtie_sequence_2};
+$methylation_call_params->{$identifier}->{chromosome}        = $alignments{$unique_best_alignment}->{chromosome};
+$methylation_call_params->{$identifier}->{position_1}        = $alignments{$unique_best_alignment}->{position_1};
+$methylation_call_params->{$identifier}->{position_2}        = $alignments{$unique_best_alignment}->{position_2};
+$methylation_call_params->{$identifier}->{index}             = $alignments{$unique_best_alignment}->{index};
+$methylation_call_params->{$identifier}->{alignment_score_1} = $alignments{$unique_best_alignment}->{alignment_score_1};
+$methylation_call_params->{$identifier}->{alignment_score_2} = $alignments{$unique_best_alignment}->{alignment_score_2};
+$methylation_call_params->{$identifier}->{sum_of_alignment_scores} = $alignments{$unique_best_alignment}->{sum_of_alignment_scores};
+$methylation_call_params->{$identifier}->{mismatch_info_1}   = $alignments{$unique_best_alignment}->{mismatch_info_1};
+$methylation_call_params->{$identifier}->{mismatch_info_2}   = $alignments{$unique_best_alignment}->{mismatch_info_2};
+$methylation_call_params->{$identifier}->{CIGAR_1}           = $alignments{$unique_best_alignment}->{CIGAR_1};
+$methylation_call_params->{$identifier}->{CIGAR_2}           = $alignments{$unique_best_alignment}->{CIGAR_2};
+$methylation_call_params->{$identifier}->{flag_1}            = $alignments{$unique_best_alignment}->{flag_1};
+$methylation_call_params->{$identifier}->{flag_2}            = $alignments{$unique_best_alignment}->{flag_2};
+}
+}
+### otherwise we are going to find out if there is a best match among the multiple alignments, or whether there are 2 or more equally good alignments (in which case
+### we boot the sequence pair altogether)
+elsif (scalar keys %alignments >= 2  and scalar keys %alignments <= 4){
+my $best_sum_of_alignment_scores;
+my $best_alignment_location;
+foreach my $alignment_location (sort {$alignments{$b}->{sum_of_alignment_scores} <=> $alignments{$a}->{sum_of_alignment_scores}} keys %alignments){
+# print "$alignments{$alignment_location}->{sum_of_alignment_scores}\n";
+unless (defined $best_sum_of_alignment_scores){
+	$best_sum_of_alignment_scores = $alignments{$alignment_location}->{sum_of_alignment_scores};
+	$best_alignment_location = $alignment_location;
+	# print "setting best alignment score to: $best_sum_of_alignment_scores\n";
+}
+else{
+	### if the second best alignment has the same sum of alignment scores as the first one, the sequence pair will get booted
+	if ($alignments{$alignment_location}->{sum_of_alignment_scores} == $best_sum_of_alignment_scores){
+	  # warn "Same sum of alignment scores for 2 different alignments, the sequence pair will get booted!\n";
+	  $sequence_pair_fails = 1;
+	  last; # exiting since we know that the sequence has ambiguous alignments
+	}
+	### else we are going to store the best alignment for further processing
+	else{
+	  $methylation_call_params->{$identifier}->{bowtie_sequence_1} = $alignments{$best_alignment_location}->{bowtie_sequence_1};
+	  $methylation_call_params->{$identifier}->{bowtie_sequence_2} = $alignments{$best_alignment_location}->{bowtie_sequence_2};
+	  $methylation_call_params->{$identifier}->{chromosome}        = $alignments{$best_alignment_location}->{chromosome};
+	  $methylation_call_params->{$identifier}->{position_1}        = $alignments{$best_alignment_location}->{position_1};
+	  $methylation_call_params->{$identifier}->{position_2}        = $alignments{$best_alignment_location}->{position_2};
+	  $methylation_call_params->{$identifier}->{index}             = $alignments{$best_alignment_location}->{index};
+	  $methylation_call_params->{$identifier}->{alignment_score_1} = $alignments{$best_alignment_location}->{alignment_score_1};
+	  $methylation_call_params->{$identifier}->{alignment_score_2} = $alignments{$best_alignment_location}->{alignment_score_2};
+	  $methylation_call_params->{$identifier}->{sum_of_alignment_scores} = $alignments{$best_alignment_location}->{sum_of_alignment_scores};
+	  $methylation_call_params->{$identifier}->{mismatch_info_1}   = $alignments{$best_alignment_location}->{mismatch_info_1};
+	  $methylation_call_params->{$identifier}->{mismatch_info_2}   = $alignments{$best_alignment_location}->{mismatch_info_2};
+	  $methylation_call_params->{$identifier}->{CIGAR_1}           = $alignments{$best_alignment_location}->{CIGAR_1};
+	  $methylation_call_params->{$identifier}->{CIGAR_2}           = $alignments{$best_alignment_location}->{CIGAR_2};
+	  $methylation_call_params->{$identifier}->{flag_1}            = $alignments{$best_alignment_location}->{flag_1};
+	  $methylation_call_params->{$identifier}->{flag_2}            = $alignments{$best_alignment_location}->{flag_2};
+	  last; # exiting since the sequence produced a unique best alignment
+	}
+}
+}
+}
+else{
+die "There are too many potential hits for this sequence pair (1-4 expected, but found: '",scalar keys %alignments,"')\n";;
+}
+### skipping the sequence completely if there were multiple alignments with the same best sum of alignment scores at different positions
+if ($sequence_pair_fails == 1){
+$counting{unsuitable_sequence_count}++;
+### report that the sequence has multiple hits with bitwise flag 256. We can print the sequence to the result file straight away and skip everything else
+# my $ambiguous_read_1 = join("\t",$identifier.'/1','256','*','0','0','*','*','0','0',$sequence_1,$quality_value_1);
+# my $ambiguous_read_2 = join("\t",$identifier.'/2','256','*','0','0','*','*','0','0',$sequence_2,$quality_value_2);
+# print "$ambiguous_read_1\n";
+# print "$ambiguous_read_2\n";
+if ($ambiguous){
+return 2; # => exits to next sequence pair, and prints it out (in FastQ format) to _ambiguous_reads_1.txt and _ambiguous_reads_2.txt if '--ambiguous' was specified
+}
+elsif ($unmapped){
+return 1; # => exits to next sequence pair, and prints it out (in FastQ format) to _unmapped_reads_1.txt and _unmapped_reads_2.txt if '--unmapped' but not '--ambiguous' was specified
+}
+else{
+return 0; # => exits to next sequence pair (default)
+}
+}
+### --DIRECTIONAL
+### If the option --directional has been specified the user wants to consider only alignments to the original top strand or the original bottom strand. We will therefore
+### discard all alignments to strands complementary to the original strands, as they should not exist in reality due to the library preparation protocol
+if ($directional){
+if ( ($methylation_call_params->{$identifier}->{index} == 1) or ($methylation_call_params->{$identifier}->{index} == 2) ){
+#    warn "Alignment rejected! (index was: $methylation_call_params->{$identifier}->{index})\n";
+$counting{alignments_rejected_count}++;
+return 0;
+}
+}
+### If the sequence pair has not been rejected so far it does have a unique best alignment
+$counting{unique_best_alignment_count}++;
+extract_corresponding_genomic_sequence_paired_ends_bowtie2($identifier,$methylation_call_params);
+### check to see if the genomic sequences we extracted has the same length as the observed sequences +2, and only then we perform the methylation call
+if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1}) != length($sequence_1)+2){
+warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{start_seq_1}\n";
+$counting{genomic_sequence_could_not_be_extracted_count}++;
+return 0;
+}
+if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2}) != length($sequence_2)+2){
+warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{start_seq_2}\n";
+$counting{genomic_sequence_could_not_be_extracted_count}++;
+return 0;
+}
+### now we are set to perform the actual methylation call
+$methylation_call_params->{$identifier}->{methylation_call_1} = methylation_call($identifier,$sequence_1,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1},$methylation_call_params->{$identifier}->{read_conversion_1});
+$methylation_call_params->{$identifier}->{methylation_call_2} = methylation_call($identifier,$sequence_2,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2},$methylation_call_params->{$identifier}->{read_conversion_2});
+# print "$methylation_call_params->{$identifier}->{read_conversion_2}\n";
+# print "  $sequence_2\n";
+# print "$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2}\n";
+# print "  $methylation_call_params->{$identifier}->{methylation_call_2}\n";
+print_bisulfite_mapping_results_paired_ends_bowtie2($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2);
+return 0; ## otherwise 1 will be returned by default, which would print the sequence pair to unmapped_1 and _2
+}
+###
+sub decide_whether_paired_end_alignment_is_valid{
+my ($index,$identifier) = @_;
+my ($id_1,$strand_1,$mapped_chromosome_1,$position_1,$bowtie_sequence_1,$mismatch_info_1) = (split (/\t/,$fhs[$index]->{last_line_1},-1))[0,1,2,3,4,7];
+my ($id_2,$strand_2,$mapped_chromosome_2,$position_2,$bowtie_sequence_2,$mismatch_info_2) = (split (/\t/,$fhs[$index]->{last_line_2},-1))[0,1,2,3,4,7];
+chomp $mismatch_info_1;
+chomp $mismatch_info_2;
+my $seq_id_1 = $id_1;
+my $seq_id_2 = $id_2;
+$seq_id_1 =~ s/\/1$//; # removing the read /1
+$seq_id_2 =~ s/\/1$//; # removing the read /1
+### ensuring that the current entry is the correct sequence
+if ($seq_id_1 eq $identifier or $seq_id_2 eq $identifier){
+### checking the orientation of the alignment. We need to discriminate between 8 different conditions, however only 4 of them are theoretically
+### sensible alignments
+my $orientation = ensure_sensical_alignment_orientation_paired_ends ($index,$id_1,$strand_1,$id_2,$strand_2);
+### If the orientation was correct can we move on
+if ($orientation == 1){
+return 1; ### 1st possibility for A SEQUENCE-PAIR TO PASS
+}
+### If the alignment was in the wrong orientation we need to read in two new lines
+elsif($orientation == 0){
+my $newline_1 = $fhs[$index]->{fh}->getline();
+my $newline_2 = $fhs[$index]->{fh}->getline();
+if ($newline_1 and $newline_2){
+	### extract detailed information about the alignment again (from $newline_1 and $newline_2 this time)
+	($id_1,$strand_1) = (split (/\t/,$newline_1))[0,1];
+	($id_2,$strand_2) = (split (/\t/,$newline_2))[0,1];
+	my $seqid;
+	$seq_id_1 = $id_1;
+	$seq_id_2 = $id_2;
+	# we need to capture the first read (ending on /1)
+	if ($seq_id_1 =~ s/\/1$//){ # removing the read /1 tag
+	  $seqid = $seq_id_1;
+	}
+	elsif ($seq_id_2 =~ s/\/1$//){ # removing the read /1 tag
+	  $seqid = $seq_id_2;
+	}
+	else{
+	  die "One of the two reads needs to end on /1!!";
+	}
+	### ensuring that the next entry is still the correct sequence
+	if ($seq_id_1 eq $identifier or  $seq_id_2 eq $identifier){
+	  ### checking orientation again
+	  $orientation = ensure_sensical_alignment_orientation_paired_ends ($index,$id_1,$strand_1,$id_2,$strand_2);
+	  ### If the orientation was correct can we move on
+	  if ($orientation == 1){
+	    ### Writing the current sequence to last_line_1 and last_line_2
+	    $fhs[$index]->{last_seq_id} = $seqid;
+	    $fhs[$index]->{last_line_1} = $newline_1;
+	    $fhs[$index]->{last_line_2} = $newline_2;
+	    return 1; ### 2nd possibility for a SEQUENCE-PAIR TO PASS
+	  }
+	  ### If the alignment was in the wrong orientation again we need to read in yet another 2 new lines and store them in @fhs (this must be
+	  ### the next entry)
+	  elsif ($orientation == 0){
+	    $newline_1 = $fhs[$index]->{fh}->getline();
+	    $newline_2 = $fhs[$index]->{fh}->getline();
+	    if ($newline_1 and $newline_2){
+	      ($seq_id_1) = split (/\t/,$newline_1);
+	      ($seq_id_2) = split (/\t/,$newline_2);
+	      $seqid = '';
+	      if ($seq_id_1 =~ s/\/1$//){ # removing the read /1 tag
+		$seqid = $seq_id_1;
+	      }
+	      elsif ($seq_id_2 =~ s/\/1$//){ # removing the read /1 tag
+		$seqid = $seq_id_2;
+	      }
+	      else{
+		die "One of the two reads needs to end on /1!!";
+	      }
+	      ### check if the next 2 lines still have the same seq ID (must not happen), and if not overwrite the current seq-ID and bowtie output with
+	      ### the same fields of the just read next entry
+	      die "Same seq ID 3 or more times in a row!(should be 2 max)" if ($seqid eq $identifier);
+	      $fhs[$index]->{last_seq_id} = $seqid;
+	      $fhs[$index]->{last_line_1} = $newline_1;
+	      $fhs[$index]->{last_line_2} = $newline_2;
+	      return 0; # not processing anything this round as the alignment currently stored in last_line_1 and _2 was in the wrong orientation
+	    }
+	    else {
+	      ### assigning undef to last_seq_id and last_line (end of bowtie output)
+	      $fhs[$index]->{last_seq_id} = undef;
+	      $fhs[$index]->{last_line_1} = undef;
+	      $fhs[$index]->{last_line_2} = undef;
+	      return 0; # not processing anything as the alignment currently stored in last_line_1 and _2 was in the wrong orientation
+	    }
+	  }
+	  else{
+	    die "The orientation of the alignment must be either correct or incorrect\n";
+	  }
+	}
+	### the sequence pair we just read in is already the next sequence pair to be analysed -> store it in @fhs
+	else{
+	  $fhs[$index]->{last_seq_id} = $seqid;
+	  $fhs[$index]->{last_line_1} = $newline_1;
+	  $fhs[$index]->{last_line_2} = $newline_2;
+	  return 0; # processing the new alignment result only in the next round
+	}
+}
+else {
+	# assigning undef to last_seq_id and both last_lines (end of bowtie output)
+	$fhs[$index]->{last_seq_id} = undef;
+	$fhs[$index]->{last_line_1} = undef;
+	$fhs[$index]->{last_line_2} = undef;
+	return 0; # not processing anything as the alignment currently stored in last_line_1 and _2 was in the wrong orientation
+}
+}
+else{
+die "The orientation of the alignment must be either correct or incorrect\n";
+}
+}
+### the sequence pair stored in @fhs as last_line_1 and last_line_2 is already the next sequence pair to be analysed -> analyse next round
+else{
+return 0;
+}
+}
+### EXTRACT GENOMIC SEQUENCE | BOWTIE 1 | PAIRED-END
+sub extract_corresponding_genomic_sequence_paired_ends {
+my ($sequence_identifier,$methylation_call_params) = @_;
+### A bisulfite sequence pair for 1 location in the genome can theoretically be on any of the 4 possible converted strands. We are also giving the
+### sequence a 'memory' of the conversion we are expecting which we will need later for the methylation call
+my $alignment_read_1;
+my $alignment_read_2;
+my $read_conversion_info_1;
+my $read_conversion_info_2;
+my $genome_conversion;
+### Now extracting the same sequence from the mouse genomic sequence, +2 extra bases at oone of the ends so that we can also make a CpG, CHG or CHH methylation call
+### if the C happens to be at the first or last position of the actually observed sequence
+my $non_bisulfite_sequence_1;
+my $non_bisulfite_sequence_2;
+### all alignments reported by bowtie have the + alignment first and the - alignment as the second one irrespective of whether read 1 or read 2 was
+### the + alignment. We however always read in sequences read 1 then read 2, so if read 2 is the + alignment we need to swap the extracted genomic
+### sequences around!
+### results from CT converted read 1 plus GA converted read 2 vs. CT converted genome (+/- orientation alignments are reported only)
+if ($methylation_call_params->{$sequence_identifier}->{index} == 0){
+### [Index 0, sequence originated from (converted) forward strand]
+$counting{CT_GA_CT_count}++;
+$alignment_read_1 = '+';
+$alignment_read_2 = '-';
+$read_conversion_info_1 = 'CT';
+$read_conversion_info_2 = 'GA';
+$genome_conversion = 'CT';
+### SEQUENCE 1 (this is always the forward hit, in this case it is read 1)
+### for hits on the forward strand we need to capture 2 extra bases at the 3' end
+$non_bisulfite_sequence_1 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{start_seq_1},length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_1})+2); ##CHH change
+### SEQUENCE 2 (this will always be on the reverse strand, in this case it is read 2)
+### As the second conversion is GA we need to capture 1 base 3', so that it is a 5' base after reverse complementation
+if (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) > $methylation_call_params->{$sequence_identifier}->{start_seq_2}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_2})+1){ ## CHH change to +1
+$non_bisulfite_sequence_2 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},($methylation_call_params->{$sequence_identifier}->{start_seq_2}),length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_2})+2);
+### the reverse strand sequence needs to be reverse complemented
+$non_bisulfite_sequence_2 = reverse_complement($non_bisulfite_sequence_2);
+}
+else{
+$non_bisulfite_sequence_2 = '';
+}
+}
+### results from GA converted read 1 plus CT converted read 2 vs. GA converted genome (+/- orientation alignments are reported only)
+elsif ($methylation_call_params->{$sequence_identifier}->{index} == 1){
+### [Index 1, sequence originated from complementary to (converted) reverse strand]
+$counting{GA_CT_GA_count}++;
+$alignment_read_1 = '+';
+$alignment_read_2 = '-';
+$read_conversion_info_1 = 'GA';
+$read_conversion_info_2 = 'CT';
+$genome_conversion = 'GA';
+### SEQUENCE 1 (this is always the forward hit, in this case it is read 1)
+### as we need to make the methylation call for the base 5' of the first base (GA conversion!) we need to capture 2 extra bases at the 5' end
+if ($methylation_call_params->{$sequence_identifier}->{start_seq_1}-1 > 0){ ## CHH change to -1
+$non_bisulfite_sequence_1 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{start_seq_1}-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_1})+2); ### CHH change to -2/+2
+}
+else{
+$non_bisulfite_sequence_1 = '';
+}
+### SEQUENCE 2 (this will always be on the reverse strand, in this case it is read 2)
+### As we are doing a CT comparison for the reverse strand we are taking 2 bases extra at the 5' end, so it is a 3' base after reverse complementation
+$non_bisulfite_sequence_2 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},($methylation_call_params->{$sequence_identifier}->{start_seq_2})-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_2})+2); ### CHH change to -2/+2
+### the reverse strand sequence needs to be reverse complemented
+$non_bisulfite_sequence_2 = reverse_complement($non_bisulfite_sequence_2);
+}
+### results from GA converted read 1 plus CT converted read 2 vs. CT converted genome (-/+ orientation alignments are reported only)
+elsif ($methylation_call_params->{$sequence_identifier}->{index} == 2){
+### [Index 2, sequence originated from the complementary to (converted) forward strand]
+$counting{GA_CT_CT_count}++;
+$alignment_read_1 = '-';
+$alignment_read_2 = '+';
+$read_conversion_info_1 = 'GA';
+$read_conversion_info_2 = 'CT';
+$genome_conversion = 'CT';
+### Here we switch the sequence information round!!  non_bisulfite_sequence_1 will later correspond to the read 1!!!!
+### SEQUENCE 1 (this is always the forward hit, in this case it is READ 2), read 1 is in - orientation on the reverse strand
+### As read 1 is GA converted we need to capture 2 extra 3' bases which will be 2 extra 5' base after reverse complementation
+$non_bisulfite_sequence_1 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},($methylation_call_params->{$sequence_identifier}->{start_seq_2}),length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_2})+2); ### CHH change to +2
+### the reverse strand sequence needs to be reverse complemented
+$non_bisulfite_sequence_1 = reverse_complement($non_bisulfite_sequence_1);
+### SEQUENCE 2 (this will always be on the reverse strand, in this case it is READ 1)
+### non_bisulfite_sequence_2 will later correspond to the read 2!!!!
+### Read 2 is CT converted so we need to capture 2 extra 3' bases
+if (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) > ($methylation_call_params->{$sequence_identifier}->{start_seq_1})+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_1})+1){ ## CHH change to +1
+$non_bisulfite_sequence_2 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},($methylation_call_params->{$sequence_identifier}->{start_seq_1}),length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_1})+2); ## CHH changed from +1 to +2
+}
+else{
+$non_bisulfite_sequence_2 = '';
+}
+}
+### results from CT converted read 1 plus GA converted read 2 vs. GA converted genome (-/+ orientation alignments are reported only)
+elsif ($methylation_call_params->{$sequence_identifier}->{index} == 3){
+### [Index 3, sequence originated from the (converted) reverse strand]
+$counting{CT_GA_GA_count}++;
+$alignment_read_1 = '-';
+$alignment_read_2 = '+';
+$read_conversion_info_1 = 'CT';
+$read_conversion_info_2 = 'GA';
+$genome_conversion = 'GA';
+### Here we switch the sequence information round!!  non_bisulfite_sequence_1 will later correspond to the read 1!!!!
+### SEQUENCE 1 (this is always the forward hit, in this case it is READ 2), read 1 is in - orientation on the reverse strand
+### As read 1 is CT converted we need to capture 2 extra 5' bases which will be 2 extra 3' base after reverse complementation
+if ( ($methylation_call_params->{$sequence_identifier}->{start_seq_2}-1) > 0){ ## CHH changed to -1
+$non_bisulfite_sequence_1 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},($methylation_call_params->{$sequence_identifier}->{start_seq_2})-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_2})+2); ### CHH changed to -2/+2
+### the reverse strand sequence needs to be reverse complemented
+$non_bisulfite_sequence_1 = reverse_complement($non_bisulfite_sequence_1);
+}
+else{
+$non_bisulfite_sequence_1 = '';
+}
+### SEQUENCE 2 (this will always be on the reverse strand, in this case it is READ 1)
+### non_bisulfite_sequence_2 will later correspond to the read 2!!!!
+### Read 2 is GA converted so we need to capture 2 extra 5' bases
+$non_bisulfite_sequence_2 = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},($methylation_call_params->{$sequence_identifier}->{start_seq_1})-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence_1})+2); ### CHH changed to -2/+2
+}
+else{
+die "Too many bowtie result filehandles\n";
+}
+### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
+### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
+$methylation_call_params->{$sequence_identifier}->{alignment_read_1} = $alignment_read_1;
+$methylation_call_params->{$sequence_identifier}->{alignment_read_2} = $alignment_read_2;
+$methylation_call_params->{$sequence_identifier}->{genome_conversion} = $genome_conversion;
+$methylation_call_params->{$sequence_identifier}->{read_conversion_1} = $read_conversion_info_1;
+$methylation_call_params->{$sequence_identifier}->{read_conversion_2} = $read_conversion_info_2;
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_2} = $non_bisulfite_sequence_2;
+}
+### EXTRACT GENOMIC SEQUENCE BOWTIE 2 | PAIRED-END
+sub extract_corresponding_genomic_sequence_paired_ends_bowtie2{
+my ($sequence_identifier,$methylation_call_params) = @_;
+### A bisulfite sequence pair for 1 location in the genome can theoretically be on any of the 4 possible converted strands. We are also giving the
+### sequence a 'memory' of the conversion we are expecting which we will need later for the methylation call
+my $cigar_1 = $methylation_call_params->{$sequence_identifier}->{CIGAR_1};
+my $cigar_2 = $methylation_call_params->{$sequence_identifier}->{CIGAR_2};
+my $flag_1 =  $methylation_call_params->{$sequence_identifier}->{flag_1};
+my $flag_2 =  $methylation_call_params->{$sequence_identifier}->{flag_2};
+#  print "$cigar_1\t$cigar_2\t$flag_1\t$flag_2\n";
+### We are now extracting the corresponding genomic sequence, +2 extra bases at the end (or start) so that we can also make a CpG methylation call and
+### in addition make differential calls for Cs in CHG or CHH context if the C happens to be at the last (or first)  position of the actually observed sequence
+### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
+### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
+my $alignment_read_1;
+my $alignment_read_2;
+my $read_conversion_info_1;
+my $read_conversion_info_2;
+my $genome_conversion;
+### Now extracting the same sequence from the mouse genomic sequence, +2 extra bases at one of the ends so that we can also make a CpG, CHG or CHH methylation call
+### if the C happens to be at the last position of the actually observed sequence
+my $non_bisulfite_sequence_1 = '';
+my $non_bisulfite_sequence_2 = '';
+### Positions in SAM format are 1 based, so we need to subract 1 when getting substrings
+my $pos_1 = $methylation_call_params->{$sequence_identifier}->{position_1}-1;
+my $pos_2 = $methylation_call_params->{$sequence_identifier}->{position_2}-1;
+# parsing CIGAR 1 string
+my @len_1 = split (/\D+/,$cigar_1); # storing the length per operation
+my @ops_1 = split (/\d+/,$cigar_1); # storing the operation
+shift @ops_1; # remove the empty first element
+die "CIGAR 1 string contained a non-matching number of lengths and operations\n" unless (scalar @len_1 == scalar @ops_1);
+# parsing CIGAR 2 string
+my @len_2 = split (/\D+/,$cigar_2); # storing the length per operation
+my @ops_2 = split (/\d+/,$cigar_2); # storing the operation
+shift @ops_2; # remove the empty first element
+die "CIGAR 2 string contained a non-matching number of lengths and operations\n" unless (scalar @len_2 == scalar @ops_2);
+my $indels_1 = 0; # addiong these to the hemming distance value (needed for the NM field in the final SAM output
+my $indels_2 = 0;
+### Extracting read 1 genomic sequence ###
+# extracting 2 additional bp at the 5' end (read 1)
+if ( ($methylation_call_params->{$sequence_identifier}->{index} == 1) or ($methylation_call_params->{$sequence_identifier}->{index} == 3) ){
+# checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+unless ( ($pos_1-2) > 0){# exiting with en empty genomic sequence otherwise
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
+return;
+}
+$non_bisulfite_sequence_1 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_1-2,2);
+}
+foreach (0..$#len_1){
+if ($ops_1[$_] eq 'M'){
+# extracting genomic sequence
+$non_bisulfite_sequence_1 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_1,$len_1[$_]);
+#   warn "$non_bisulfite_sequence_1\n";
+# adjusting position
+$pos_1 += $len_1[$_];
+}
+elsif ($ops_1[$_] eq 'I'){ # insertion in the read sequence
+# we simply add padding Ns instead of finding genomic sequence. This will not be used to infer methylation calls
+$non_bisulfite_sequence_1 .= 'N' x $len_1[$_];
+#    warn "$non_bisulfite_sequence_1\n";
+# position doesn't need adjusting
+	  $indels_1 += $len_1[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
+}
+elsif ($ops_1[$_] eq 'D'){ # deletion in the read sequence
+# we do not add any genomic sequence but only adjust the position
+#     warn "Just adjusting the position by: ",$len_1[$_],"bp\n";
+$pos_1 += $len_1[$_];
+	  $indels_1 += $len_1[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
+}
+elsif($cigar_1 =~ tr/[NSHPX=]//){ # if these (for standard mapping) illegal characters exist we die
+die "The CIGAR 1 string contained illegal CIGAR operations in addition to 'M', 'I' and 'D': $cigar_1\n";
+}
+else{
+die "The CIGAR 1 string contained undefined CIGAR operations in addition to 'M', 'I' and 'D': $cigar_1\n";
+}
+}
+### 3' end of read 1
+if ( ($methylation_call_params->{$sequence_identifier}->{index} == 0) or ($methylation_call_params->{$sequence_identifier}->{index} == 2) ){
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+unless (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) >= $pos_1+2){# exiting with en empty genomic sequence otherwise
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
+return;
+}
+$non_bisulfite_sequence_1 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_1,2);
+}
+### Extracting read 2 genomic sequence ###
+### 5' end of read 2
+if ( ($methylation_call_params->{$sequence_identifier}->{index} == 1) or ($methylation_call_params->{$sequence_identifier}->{index} == 3) ){
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+unless ( ($pos_2-2) >= 0){# exiting with en empty genomic sequence otherwise
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_2} = $non_bisulfite_sequence_2;
+return;
+}
+$non_bisulfite_sequence_2 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_2-2,2);
+}
+foreach (0..$#len_2){
+if ($ops_2[$_] eq 'M'){
+# extracting genomic sequence
+$non_bisulfite_sequence_2 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_2,$len_2[$_]);
+# warn "$non_bisulfite_sequence_2\n";
+# adjusting position
+$pos_2 += $len_2[$_];
+}
+elsif ($ops_2[$_] eq 'I'){ # insertion in the read sequence
+# we simply add padding Ns instead of finding genomic sequence. This will not be used to infer methylation calls
+$non_bisulfite_sequence_2 .= 'N' x $len_2[$_];
+# warn "$non_bisulfite_sequence_2\n";
+# position doesn't need adjusting
+	  $indels_2 += $len_2[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
+}
+elsif ($ops_2[$_] eq 'D'){ # deletion in the read sequence
+# we do not add any genomic sequence but only adjust the position
+# warn "Just adjusting the position by: ",$len_2[$_],"bp\n";
+$pos_2 += $len_2[$_];
+	  $indels_2 += $len_2[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
+}
+elsif($cigar_2 =~ tr/[NSHPX=]//){ # if these (for standard mapping) illegal characters exist we die
+die "The CIGAR 2 string contained illegal CIGAR operations in addition to 'M', 'I' and 'D': $cigar_2\n";
+}
+else{
+die "The CIGAR 2 string contained undefined CIGAR operations in addition to 'M', 'I' and 'D': $cigar_2\n";
+}
+}
+### 3' end of read 2
+if ( ($methylation_call_params->{$sequence_identifier}->{index} == 0) or ($methylation_call_params->{$sequence_identifier}->{index} == 2) ){
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+unless (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) >= $pos_2+2){# exiting with en empty genomic sequence otherwise
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_2} = $non_bisulfite_sequence_2;
+return;
+}
+$non_bisulfite_sequence_2 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_2,2);
+}
+### all paired-end alignments reported by Bowtie 2 have the Read 1 alignment first and the Read 2 alignment as the second one irrespective of whether read 1 or read 2 was
+### the + alignment. We also read in sequences read 1 then read 2 so they should correspond perfectly
+### results from CT converted read 1 plus GA converted read 2 vs. CT converted genome (+/- orientation alignments are reported only)
+if ($methylation_call_params->{$sequence_identifier}->{index} == 0){
+### [Index 0, sequence originated from (converted) forward strand]
+$counting{CT_GA_CT_count}++;
+$alignment_read_1 = '+';
+$alignment_read_2 = '-';
+$read_conversion_info_1 = 'CT';
+$read_conversion_info_2 = 'GA';
+$genome_conversion = 'CT';
+### Read 1 is always the forward hit
+### Read 2 is will always on the reverse strand, so it needs to be reverse complemented
+$non_bisulfite_sequence_2 = reverse_complement($non_bisulfite_sequence_2);
+}
+### results from GA converted read 1 plus CT converted read 2 vs. GA converted genome (+/- orientation alignments are reported only)
+elsif ($methylation_call_params->{$sequence_identifier}->{index} == 1){
+### [Index 1, sequence originated from complementary to (converted) bottom strand]
+$counting{GA_CT_GA_count}++;
+$alignment_read_1 = '+';
+$alignment_read_2 = '-';
+$read_conversion_info_1 = 'GA';
+$read_conversion_info_2 = 'CT';
+$genome_conversion = 'GA';
+### Read 1 is always the forward hit
+### Read 2 is will always on the reverse strand, so it needs to be reverse complemented
+$non_bisulfite_sequence_2 = reverse_complement($non_bisulfite_sequence_2);
+}
+### results from GA converted read 1 plus CT converted read 2 vs. CT converted genome (-/+ orientation alignments are reported only)
+elsif ($methylation_call_params->{$sequence_identifier}->{index} == 2){
+### [Index 2, sequence originated from the complementary to (converted) top strand]
+$counting{GA_CT_CT_count}++;
+$alignment_read_1 = '-';
+$alignment_read_2 = '+';
+$read_conversion_info_1 = 'GA';
+$read_conversion_info_2 = 'CT';
+$genome_conversion = 'CT';
+### Read 1 (the reverse strand) genomic sequence needs to be reverse complemented
+$non_bisulfite_sequence_1 = reverse_complement($non_bisulfite_sequence_1);
+}
+### results from CT converted read 1 plus GA converted read 2 vs. GA converted genome (-/+ orientation alignments are reported only)
+elsif ($methylation_call_params->{$sequence_identifier}->{index} == 3){
+### [Index 3, sequence originated from the (converted) reverse strand]
+$counting{CT_GA_GA_count}++;
+$alignment_read_1 = '-';
+$alignment_read_2 = '+';
+$read_conversion_info_1 = 'CT';
+$read_conversion_info_2 = 'GA';
+$genome_conversion = 'GA';
+### Read 1 (the reverse strand) genomic sequence needs to be reverse complemented
+$non_bisulfite_sequence_1 = reverse_complement($non_bisulfite_sequence_1);
+}
+else{
+die "Too many bowtie result filehandles\n";
+}
+### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
+### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
+$methylation_call_params->{$sequence_identifier}->{alignment_read_1} = $alignment_read_1;
+$methylation_call_params->{$sequence_identifier}->{alignment_read_2} = $alignment_read_2;
+$methylation_call_params->{$sequence_identifier}->{genome_conversion} = $genome_conversion;
+$methylation_call_params->{$sequence_identifier}->{read_conversion_1} = $read_conversion_info_1;
+$methylation_call_params->{$sequence_identifier}->{read_conversion_2} = $read_conversion_info_2;
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_2} = $non_bisulfite_sequence_2;
+## the end position of a read is stored in $pos
+$methylation_call_params->{$sequence_identifier}->{end_position_1} = $pos_1;
+$methylation_call_params->{$sequence_identifier}->{end_position_2} = $pos_2;
+$methylation_call_params->{$sequence_identifier}->{indels_1} = $indels_1;
+$methylation_call_params->{$sequence_identifier}->{indels_2} = $indels_2;
+}
+##########################################
+### PRINT SINGLE END RESULTS: Bowtie 1 ###
+##########################################
+sub print_bisulfite_mapping_result_single_end{
+my ($identifier,$sequence,$methylation_call_params,$quality_value)= @_;
+### we will output the FastQ quality in Sanger encoding (Phred 33 scale)
+if ($phred64){
+$quality_value = convert_phred64_quals_to_phred33($quality_value);
+}
+elsif ($solexa){
+$quality_value = convert_solexa_quals_to_phred33($quality_value);
+}
+### We will add +1 bp to the starting position of single-end reads, as Bowtie 1 reports the index and not the bp position.
+$methylation_call_params->{$identifier}->{position} += 1;
+### writing every uniquely mapped read and its methylation call to the output file
+if ($vanilla){
+my $bowtie1_output = join("\t",$identifier,$methylation_call_params->{$identifier}->{alignment_strand},$methylation_call_params->{$identifier}->{chromosome},$methylation_call_params->{$identifier}->{position},$methylation_call_params->{$identifier}->{end_position},$sequence,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence},$methylation_call_params->{$identifier}->{methylation_call},$methylation_call_params->{$identifier}->{read_conversion},$methylation_call_params->{$identifier}->{genome_conversion},$quality_value);
+print OUT "$bowtie1_output\n";
+}
+else{ # SAM output, default since Bismark v1.0.0
+single_end_SAM_output($identifier,$sequence,$methylation_call_params,$quality_value); # at the end of the script
+}
+}
+##########################################
+### PRINT SINGLE END RESULTS: Bowtie 2 ###
+##########################################
+sub print_bisulfite_mapping_result_single_end_bowtie2{
+my ($identifier,$sequence,$methylation_call_params,$quality_value)= @_;
+### we will output the FastQ quality in Sanger encoding (Phred 33 scale)
+if ($phred64){
+$quality_value = convert_phred64_quals_to_phred33($quality_value);
+}
+elsif ($solexa){
+$quality_value = convert_solexa_quals_to_phred33($quality_value);
+}
+### writing every mapped read and its methylation call to the SAM output file (unmapped and ambiguous reads were already printed)
+	single_end_SAM_output($identifier,$sequence,$methylation_call_params,$quality_value); # at the end of the script
+}
+##########################################
+### PRINT PAIRED END ESULTS: Bowtie 1  ###
+##########################################
+sub print_bisulfite_mapping_results_paired_ends{
+my ($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2)= @_;
+### we will output the FastQ quality in Sanger encoding (Phred 33 scale)
+if ($phred64){
+$quality_value_1 = convert_phred64_quals_to_phred33($quality_value_1);
+$quality_value_2 = convert_phred64_quals_to_phred33($quality_value_2);
+}
+elsif ($solexa){
+$quality_value_1 = convert_solexa_quals_to_phred33($quality_value_1);
+$quality_value_2 = convert_solexa_quals_to_phred33($quality_value_2);
+}
+### We will add +1 bp to the start position of paired-end reads, as Bowtie 1 reports the index and not the bp position. (End position is already 1-based)
+$methylation_call_params->{$identifier}->{start_seq_1} += 1;
+### writing every single aligned read and its methylation call to the output file
+if ($vanilla){
+my $bowtie1_output_paired_end = join("\t",$identifier,$methylation_call_params->{$identifier}->{alignment_read_1},$methylation_call_params->{$identifier}->{chromosome},$methylation_call_params->{$identifier}->{start_seq_1},$methylation_call_params->{$identifier}->{alignment_end},$sequence_1,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1},$methylation_call_params->{$identifier}->{methylation_call_1},$sequence_2,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2},$methylation_call_params->{$identifier}->{methylation_call_2},$methylation_call_params->{$identifier}->{read_conversion_1},$methylation_call_params->{$identifier}->{genome_conversion},$quality_value_1,$quality_value_2);
+print OUT "$bowtie1_output_paired_end\n";
+}
+else{ # SAM output, default since Bismark v1.0.0
+paired_end_SAM_output($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2); # at the end of the script
+}
+}
+##########################################
+### PRINT PAIRED END ESULTS: Bowtie 2  ###
+##########################################
+sub print_bisulfite_mapping_results_paired_ends_bowtie2{
+my ($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2)= @_;
+### we will output the FastQ quality in Sanger encoding (Phred 33 scale)
+if ($phred64){
+$quality_value_1 = convert_phred64_quals_to_phred33($quality_value_1);
+$quality_value_2 = convert_phred64_quals_to_phred33($quality_value_2);
+}
+elsif ($solexa){
+$quality_value_1 = convert_solexa_quals_to_phred33($quality_value_1);
+$quality_value_2 = convert_solexa_quals_to_phred33($quality_value_2);
+}
+### writing every single aligned read and its methylation call to the output file  (unmapped and ambiguous reads were already printed)
+paired_end_SAM_output($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2); # at the end of the script
+}
+sub convert_phred64_quals_to_phred33{
+my $qual = shift;
+my @quals = split (//,$qual);
+my @new_quals;
+foreach my $index (0..$#quals){
+my $phred_score = convert_phred64_quality_string_into_phred_score ($quals[$index]);
+my $phred33_quality_string = convert_phred_score_into_phred33_quality_string ($phred_score);
+$new_quals[$index] = $phred33_quality_string;
+}
+my $phred33_quality = join ("",@new_quals);
+return $phred33_quality;
+}
+sub convert_solexa_quals_to_phred33{
+my $qual = shift;
+my @quals = split (//,$qual);
+my @new_quals;
+foreach my $index (0..$#quals){
+my $phred_score = convert_solexa_pre1_3_quality_string_into_phred_score ($quals[$index]);
+my $phred33_quality_string = convert_phred_score_into_phred33_quality_string ($phred_score);
+$new_quals[$index] = $phred33_quality_string;
+}
+my $phred33_quality = join ("",@new_quals);
+return $phred33_quality;
+}
+sub convert_phred_score_into_phred33_quality_string{
+my $qual = shift;
+$qual = chr($qual+33);
+return $qual;
+}
+sub convert_phred64_quality_string_into_phred_score{
+my $string = shift;
+my $qual = ord($string)-64;
+return $qual;
+}
+sub convert_solexa_pre1_3_quality_string_into_phred_score{
+### We will just use 59 as the offset here as all Phred Scores between 10 and 40 look exactly the same, there is only a minute difference for values between 0 and 10
+my $string = shift;
+my $qual = ord($string)-59;
+return $qual;
+}
+sub extract_corresponding_genomic_sequence_single_end {
+my ($sequence_identifier,$methylation_call_params) = @_;
+### A bisulfite sequence for 1 location in the genome can theoretically be any of the 4 possible converted strands. We are also giving the
+### sequence a 'memory' of the conversion we are expecting which we will need later for the methylation call
+### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
+### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
+my $alignment_strand;
+my $read_conversion_info;
+my $genome_conversion;
+### Also extracting the corresponding genomic sequence, +2 extra bases at the end so that we can also make a CpG methylation call and
+### in addition make differential calls for Cs non-CpG context, which will now be divided into CHG and CHH methylation,
+### if the C happens to be at the last position of the actually observed sequence
+my $non_bisulfite_sequence;
+### depending on the conversion we want to make need to capture 1 extra base at the 3' end
+### results from CT converted read vs. CT converted genome (+ orientation alignments are reported only)
+if ($methylation_call_params->{$sequence_identifier}->{index} == 0){
+### [Index 0, sequence originated from (converted) forward strand]
+$counting{CT_CT_count}++;
+$alignment_strand = '+';
+$read_conversion_info = 'CT';
+$genome_conversion = 'CT';
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+if (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) > $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+1){ ## CHH changed to +1
+### + 2 extra base at the 3' end
+$non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position},length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to +2
+}
+else{
+$non_bisulfite_sequence = '';
+}
+}
+### results from CT converted reads vs. GA converted genome (- orientation alignments are reported only)
+elsif ($methylation_call_params->{$sequence_identifier}->{index} == 1){
+### [Index 1, sequence originated from (converted) reverse strand]
+$counting{CT_GA_count}++;
+$alignment_strand = '-';
+$read_conversion_info = 'CT';
+$genome_conversion = 'GA';
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+if ($methylation_call_params->{$sequence_identifier}->{position}-2 >= 0){ ## CHH changed to -2 # 02 02 2012 Changed this to >= from >
+### Extracting 2 extra 5' bases on forward strand which will become 2 extra 3' bases after reverse complementation
+$non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position}-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to -2/+2
+## reverse complement!
+$non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
+}
+else{
+$non_bisulfite_sequence = '';
+}
+}
+### results from GA converted reads vs. CT converted genome (- orientation alignments are reported only)
+elsif ($methylation_call_params->{$sequence_identifier}->{index} == 2){
+### [Index 2, sequence originated from complementary to (converted) forward strand]
+$counting{GA_CT_count}++;
+$alignment_strand = '-';
+$read_conversion_info = 'GA';
+$genome_conversion = 'CT';
+### +2 extra bases on the forward strand 3', which will become 2 extra 5' bases after reverse complementation
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+if (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) > $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+1){ ## changed to +1 on 02 02 2012
+$non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position},length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to +2
+## reverse complement!
+$non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
+}
+else{
+$non_bisulfite_sequence = '';
+}
+}
+### results from GA converted reads vs. GA converted genome (+ orientation alignments are reported only)
+elsif ($methylation_call_params->{$sequence_identifier}->{index} == 3){
+### [Index 3, sequence originated from complementary to (converted) reverse strand]
+$counting{GA_GA_count}++;
+$alignment_strand = '+';
+$read_conversion_info = 'GA';
+$genome_conversion = 'GA';
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+if ($methylation_call_params->{$sequence_identifier}->{position}-2 >= 0){ ## CHH changed to +2 # 02 02 2012 Changed this to >= from >
+### +2 extra base at the 5' end as we are nominally checking the converted reverse strand
+$non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position}-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to -2/+2
+}
+else{
+$non_bisulfite_sequence = '';
+}
+}
+else{
+die "Too many bowtie result filehandles\n";
+}
+$methylation_call_params->{$sequence_identifier}->{alignment_strand} = $alignment_strand;
+$methylation_call_params->{$sequence_identifier}->{read_conversion} = $read_conversion_info;
+$methylation_call_params->{$sequence_identifier}->{genome_conversion} = $genome_conversion;
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
+### at this point we can also determine the end position of a read
+$methylation_call_params->{$sequence_identifier}->{end_position} = $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence});
+}
+sub extract_corresponding_genomic_sequence_single_end_pbat {
+my ($sequence_identifier,$methylation_call_params) = @_;
+### A bisulfite sequence for 1 location in the genome can theoretically be any of the 4 possible converted strands. We are also giving the
+### sequence a 'memory' of the conversion we are expecting which we will need later for the methylation call
+### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
+### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
+my $alignment_strand;
+my $read_conversion_info;
+my $genome_conversion;
+### Also extracting the corresponding genomic sequence, +2 extra bases at the end so that we can also make a CpG methylation call and
+### in addition make differential calls for Cs non-CpG context, which will now be divided into CHG and CHH methylation,
+### if the C happens to be at the last position of the actually observed sequence
+my $non_bisulfite_sequence;
+### depending on the conversion we want to make need to capture 1 extra base at the 3' end
+my $pbat_index = $methylation_call_params->{$sequence_identifier}->{index} + 2; # (we are simply not running indexes 0 or 1!
+### results from CT converted read vs. CT converted genome (+ orientation alignments are reported only)
+if ($pbat_index == 0){
+### [Index 0, sequence originated from (converted) forward strand]
+$counting{CT_CT_count}++;
+$alignment_strand = '+';
+$read_conversion_info = 'CT';
+$genome_conversion = 'CT';
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+if (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) > $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+1){ ## CHH changed to +1
+### + 2 extra base at the 3' end
+$non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position},length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to +2
+}
+else{
+$non_bisulfite_sequence = '';
+}
+}
+### results from CT converted reads vs. GA converted genome (- orientation alignments are reported only)
+elsif ($pbat_index == 1){
+### [Index 1, sequence originated from (converted) reverse strand]
+$counting{CT_GA_count}++;
+$alignment_strand = '-';
+$read_conversion_info = 'CT';
+$genome_conversion = 'GA';
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+if ($methylation_call_params->{$sequence_identifier}->{position}-2 >= 0){ ## CHH changed to -2 # 02 02 2012 Changed this to >= from >
+### Extracting 2 extra 5' bases on forward strand which will become 2 extra 3' bases after reverse complementation
+$non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position}-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to -2/+2
+## reverse complement!
+$non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
+}
+else{
+$non_bisulfite_sequence = '';
+}
+}
+### results from GA converted reads vs. CT converted genome (- orientation alignments are reported only)
+elsif ($pbat_index == 2){
+### [Index 2, sequence originated from complementary to (converted) forward strand]
+$counting{GA_CT_count}++;
+$alignment_strand = '-';
+$read_conversion_info = 'GA';
+$genome_conversion = 'CT';
+### +2 extra bases on the forward strand 3', which will become 2 extra 5' bases after reverse complementation
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+if (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) > $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+1){ ## changed to +1 on 02 02 2012
+$non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position},length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to +2
+## reverse complement!
+$non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
+}
+else{
+$non_bisulfite_sequence = '';
+}
+}
+### results from GA converted reads vs. GA converted genome (+ orientation alignments are reported only)
+elsif ($pbat_index == 3){
+### [Index 3, sequence originated from complementary to (converted) reverse strand]
+$counting{GA_GA_count}++;
+$alignment_strand = '+';
+$read_conversion_info = 'GA';
+$genome_conversion = 'GA';
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+if ($methylation_call_params->{$sequence_identifier}->{position}-2 >= 0){ ## CHH changed to +2 # 02 02 2012 Changed this to >= from >
+### +2 extra base at the 5' end as we are nominally checking the converted reverse strand
+$non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position}-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to -2/+2
+}
+else{
+$non_bisulfite_sequence = '';
+}
+}
+else{
+die "Too many bowtie result filehandles\n";
+}
+$methylation_call_params->{$sequence_identifier}->{alignment_strand} = $alignment_strand;
+$methylation_call_params->{$sequence_identifier}->{read_conversion} = $read_conversion_info;
+$methylation_call_params->{$sequence_identifier}->{genome_conversion} = $genome_conversion;
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
+### at this point we can also determine the end position of a read
+$methylation_call_params->{$sequence_identifier}->{end_position} = $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence});
+}
+sub extract_corresponding_genomic_sequence_single_end_bowtie2{
+my ($sequence_identifier,$methylation_call_params) = @_;
+my $MD_tag = $methylation_call_params->{$sequence_identifier}->{mismatch_info};
+my $cigar = $methylation_call_params->{$sequence_identifier}->{CIGAR};
+### A bisulfite sequence for 1 location in the genome can theoretically be any of the 4 possible converted strands. We are also giving the
+### sequence a 'memory' of the conversion we are expecting which we will need later for the methylation call
+### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
+### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
+my $alignment_strand;
+my $read_conversion_info;
+my $genome_conversion;
+### We are now extracting the corresponding genomic sequence, +2 extra bases at the end (or start) so that we can also make a CpG methylation call and
+### in addition make differential calls for Cs in CHG or CHH context if the C happens to be at the last (or first)  position of the actually observed sequence
+my $non_bisulfite_sequence = '';
+### Positions in SAM format are 1 based, so we need to subract 1 when getting substrings
+my $pos = $methylation_call_params->{$sequence_identifier}->{position}-1;
+# parsing CIGAR string
+my @len = split (/\D+/,$cigar); # storing the length per operation
+my @ops = split (/\d+/,$cigar); # storing the operation
+shift @ops; # remove the empty first element
+die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+### If the sequence aligns best as CT converted reads vs. GA converted genome (OB, index 1) or GA converted reads vs. GA converted genome (CTOB, index 3)
+if ( ($methylation_call_params->{$sequence_identifier}->{index} == 1) or ($methylation_call_params->{$sequence_identifier}->{index} == 3) ){
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+unless ( ($pos-2) >= 0){ # exiting with en empty genomic sequence otherwise
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
+return;
+}
+$non_bisulfite_sequence .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos-2,2);
+}
+my $indels = 0;
+foreach (0..$#len){
+if ($ops[$_] eq 'M'){
+#extracting genomic sequence
+$non_bisulfite_sequence .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos,$len[$_]);
+# adjusting position
+$pos += $len[$_];
+}
+elsif ($ops[$_] eq 'I'){ # insertion in the read sequence
+# we simply add padding Ns instead of finding genomic sequence. This will not be used to infer methylation calls
+$non_bisulfite_sequence .= 'N' x $len[$_];
+# warn "$non_bisulfite_sequence\n";
+# position doesn't need to be adjusting
+$indels += $len[$_]; # adding this to $indels so we can determine the hemming distance for the SAM output (= single-base substitutions (mismatches, insertions, deletions)
+}
+elsif ($ops[$_] eq 'D'){ # deletion in the read sequence
+# we do not add any genomic sequence but only adjust the position
+$pos += $len[$_];
+$indels += $len[$_]; # adding this to $indels so we can determine the hemming distance for the SAM output (= single-base substitutions (mismatches, insertions, deletions)
+}
+elsif($cigar =~ tr/[NSHPX=]//){ # if these (for standard mapping) illegal characters exist we die
+die "The CIGAR string contained illegal CIGAR operations in addition to 'M', 'I' and 'D': $cigar\n";
+}
+else{
+die "The CIGAR string contained undefined CIGAR operations in addition to 'M', 'I' and 'D': $cigar\n";
+}
+}
+### If the sequence aligns best as CT converted reads vs. CT converted genome (OT, index 0) or GA converted reads vs. CT converted genome (CTOT, index 2)
+if ( ($methylation_call_params->{$sequence_identifier}->{index} == 0) or ($methylation_call_params->{$sequence_identifier}->{index} == 2) ){
+## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+unless (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) >= $pos+2){ # exiting with en empty genomic sequence otherwise
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
+return;
+}
+$non_bisulfite_sequence .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos,2);
+# print "$methylation_call_params->{$sequence_identifier}->{bowtie_sequence}\n$non_bisulfite_sequence\n";
+}
+### results from CT converted read vs. CT converted genome (+ orientation alignments are reported only)
+if ($methylation_call_params->{$sequence_identifier}->{index} == 0){
+### [Index 0, sequence originated from (converted) forward strand]
+$counting{CT_CT_count}++;
+$alignment_strand = '+';
+$read_conversion_info = 'CT';
+$genome_conversion = 'CT';
+}
+### results from CT converted reads vs. GA converted genome (- orientation alignments are reported only)
+elsif ($methylation_call_params->{$sequence_identifier}->{index} == 1){
+### [Index 1, sequence originated from (converted) reverse strand]
+$counting{CT_GA_count}++;
+$alignment_strand = '-';
+$read_conversion_info = 'CT';
+$genome_conversion = 'GA';
+### reverse complement!
+$non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
+}
+### results from GA converted reads vs. CT converted genome (- orientation alignments are reported only)
+elsif ($methylation_call_params->{$sequence_identifier}->{index} == 2){
+### [Index 2, sequence originated from complementary to (converted) forward strand]
+$counting{GA_CT_count}++;
+$alignment_strand = '-';
+$read_conversion_info = 'GA';
+$genome_conversion = 'CT';
+### reverse complement!
+$non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
+}
+### results from GA converted reads vs. GA converted genome (+ orientation alignments are reported only)
+elsif ($methylation_call_params->{$sequence_identifier}->{index} == 3){
+### [Index 3, sequence originated from complementary to (converted) reverse strand]
+$counting{GA_GA_count}++;
+$alignment_strand = '+';
+$read_conversion_info = 'GA';
+$genome_conversion = 'GA';
+}
+else{
+die "Too many Bowtie 2 result filehandles\n";
+}
+$methylation_call_params->{$sequence_identifier}->{alignment_strand} = $alignment_strand;
+$methylation_call_params->{$sequence_identifier}->{read_conversion} = $read_conversion_info;
+$methylation_call_params->{$sequence_identifier}->{genome_conversion} = $genome_conversion;
+$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
+### the end position of a read is stored in $pos
+$methylation_call_params->{$sequence_identifier}->{end_position} = $pos;
+$methylation_call_params->{$sequence_identifier}->{indels} = $indels;
+}
+### METHYLATION CALL
+sub methylation_call{
+my ($identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion) = @_;
+### splitting both the actually observed sequence and the genomic sequence up into single bases so we can compare them one by one
+my @seq = split(//,$sequence_actually_observed);
+my @genomic = split(//,$genomic_sequence);
+#  print join ("\n",$identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion),"\n";
+### Creating a match-string with different characters for non-cytosine bases (disregarding mismatches here), methyl-Cs or non-methyl Cs in either
+### CpG, CHH or CHG context
+#################################################################
+### . for bases not involving cytosines                       ###
+### X for methylated C in CHG context (was protected)         ###
+### x for not methylated C in CHG context (was converted)     ###
+### H for methylated C in CHH context (was protected)         ###
+### h for not methylated C in CHH context (was converted)     ###
+### Z for methylated C in CpG context (was protected)         ###
+### z for not methylated C in CpG context (was converted)     ###
+#################################################################
+my @match =();
+warn "length of \@seq: ",scalar @seq,"\tlength of \@genomic: ",scalar @genomic,"\n" unless (scalar @seq eq (scalar@genomic-2)); ## CHH changed to -2
+my $methyl_CHH_count = 0;
+my $methyl_CHG_count = 0;
+my $methyl_CpG_count = 0;
+my $unmethylated_CHH_count = 0;
+my $unmethylated_CHG_count = 0;
+my $unmethylated_CpG_count = 0;
+if ($read_conversion eq 'CT'){
+for my $index (0..$#seq) {
+if ($seq[$index] eq $genomic[$index]) {
+	### The residue can only be a C if it was not converted to T, i.e. protected my methylation
+	if ($genomic[$index] eq 'C') {
+	  ### If the residue is a C we want to know if it was in CpG context or in any other context
+	  my $downstream_base = $genomic[$index+1];
+	  if ($downstream_base eq 'G'){
+	    ++$methyl_CpG_count;
+	    push @match,'Z'; # protected C, methylated, in CpG context
+	  }
+	  else {
+	    ### C in not in CpG-context, determining the second downstream base context
+	    my $second_downstream_base = $genomic[$index+2];
+	    if ($second_downstream_base eq 'G'){
+	      ++$methyl_CHG_count;
+	      push @match,'X'; # protected C, methylated, in CHG context
+	    }
+	    else{
+	      ++$methyl_CHH_count;
+	      push @match,'H'; # protected C, methylated, in CHH context
+	    }
+	  }
+	}
+	else {
+	  push @match, '.';
+	}
+}
+elsif ($seq[$index] ne $genomic[$index]) {
+	### for the methylation call we are only interested in mismatches involving cytosines (in the genomic sequence) which were converted into Ts
+	### in the actually observed sequence
+	if ($genomic[$index] eq 'C' and $seq[$index] eq 'T') {
+	  ### If the residue was converted to T we want to know if it was in CpG, CHG or CHH  context
+	  my $downstream_base = $genomic[$index+1];
+	  if ($downstream_base eq 'G'){
+	    ++$unmethylated_CpG_count;
+	    push @match,'z'; # converted C, not methylated, in CpG context
+	  }
+	  else{
+	    ### C in not in CpG-context, determining the second downstream base context
+	    my $second_downstream_base = $genomic[$index+2];
+	    if ($second_downstream_base eq 'G'){
+	      ++$unmethylated_CHG_count;
+	      push @match,'x'; # converted C, not methylated, in CHG context
+	    }
+	    else{
+	      ++$unmethylated_CHH_count;
+	      push @match,'h'; # converted C, not methylated, in CHH context
+	    }
+	  }
+	}
+	### all other mismatches are not of interest for a methylation call
+	else {
+	  push @match,'.';
+	}
+}
+else{
+	die "There can be only 2 possibilities\n";
+}
+}
+}
+elsif ($read_conversion eq 'GA'){
+# print join ("\n",'***',$identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion,'***'),"\n";
+for my $index (0..$#seq) {
+if ($seq[$index] eq $genomic[$index+2]) {
+	### The residue can only be a G if the C on the other strand was not converted to T, i.e. protected my methylation
+	if ($genomic[$index+2] eq 'G') {
+	  ### If the residue is a G we want to know if the C on the other strand was in CpG, CHG or CHH context, therefore we need
+	  ### to look if the base upstream is a C
+	  my $upstream_base = $genomic[$index+1];
+	  if ($upstream_base eq 'C'){
+	    ++$methyl_CpG_count;
+	    push @match,'Z'; # protected C on opposing strand, methylated, in CpG context
+	  }
+	  else{
+	    ### C in not in CpG-context, determining the second upstream base context
+	    my $second_upstream_base = $genomic[$index];
+	    if ($second_upstream_base eq 'C'){
+	      ++$methyl_CHG_count;
+	      push @match,'X'; # protected C on opposing strand, methylated, in CHG context
+	    }
+	    else{
+	      ++$methyl_CHH_count;
+	      push @match,'H'; # protected C on opposing strand, methylated, in CHH context
+	    }
+	  }
+	}
+	else{
+	  push @match, '.';
+	}
+}
+elsif ($seq[$index] ne $genomic[$index+2]) {
+	### for the methylation call we are only interested in mismatches involving cytosines (in the genomic sequence) which were converted to Ts
+	### on the opposing strand, so G to A conversions in the actually observed sequence
+	if ($genomic[$index+2] eq 'G' and $seq[$index] eq 'A') {
+	  ### If the C residue on the opposing strand was converted to T then we will see an A in the currently observed sequence. We want to know if
+	  ### the C on the opposing strand was it was in CpG, CHG or CHH context, therefore we need to look one (or two) bases upstream!
+	  my $upstream_base = $genomic[$index+1];
+	  if ($upstream_base eq 'C'){
+	    ++$unmethylated_CpG_count;
+	    push @match,'z'; # converted C on opposing strand, not methylated, in CpG context
+	  }
+	  else{
+	    ### C in not in CpG-context, determining the second upstream base context
+	    my $second_upstream_base = $genomic[$index];
+	    if ($second_upstream_base eq 'C'){
+	      ++$unmethylated_CHG_count;
+	      push @match,'x'; # converted C on opposing strand, not methylated, in CHG context
+	    }
+	    else{
+	      ++$unmethylated_CHH_count;
+	      push @match,'h'; # converted C on opposing strand, not methylated, in CHH context
+	    }
+	  }
+	}
+	### all other mismatches are not of interest for a methylation call
+	else {
+	  push @match,'.';
+	}
+}
+else{
+	die "There can be only 2 possibilities\n";
+}
+}
+}
+else{
+die "Strand conversion info is required to perform a methylation call\n";
+}
+my $methylation_call = join ("",@match);
+$counting{total_meCHH_count} += $methyl_CHH_count;
+$counting{total_meCHG_count} += $methyl_CHG_count;
+$counting{total_meCpG_count} += $methyl_CpG_count;
+$counting{total_unmethylated_CHH_count} += $unmethylated_CHH_count;
+$counting{total_unmethylated_CHG_count} += $unmethylated_CHG_count;
+$counting{total_unmethylated_CpG_count} += $unmethylated_CpG_count;
+# print "\n$sequence_actually_observed\n$genomic_sequence\n",@match,"\n$read_conversion\n\n";
+return $methylation_call;
+}
+sub read_genome_into_memory{
+## working directoy
+my $cwd = shift;
+## reading in and storing the specified genome in the %chromosomes hash
+chdir ($genome_folder) or die "Can't move to $genome_folder: $!";
+print "Now reading in and storing sequence information of the genome specified in: $genome_folder\n\n";
+my @chromosome_filenames =  <*.fa>;
+### if there aren't any genomic files with the extension .fa we will look for files with the extension .fasta
+unless (@chromosome_filenames){
+@chromosome_filenames =  <*.fasta>;
+}
+unless (@chromosome_filenames){
+die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions)\n";
+}
+foreach my $chromosome_filename (@chromosome_filenames){
+	open (CHR_IN,$chromosome_filename) or die "Failed to read from sequence file $chromosome_filename $!\n";
+	### first line needs to be a fastA header
+	my $first_line = <CHR_IN>;
+	chomp $first_line;
+	$first_line =~ s/\r//;
+	### Extracting chromosome name from the FastA header
+	my $chromosome_name = extract_chromosome_name($first_line);
+	my $sequence;
+	while (<CHR_IN>){
+	    chomp;
+	    $_ =~ s/\r//;
+	    if ($_ =~ /^>/){
+		### storing the previous chromosome in the %chromosomes hash, only relevant for Multi-Fasta-Files (MFA)
+		if (exists $chromosomes{$chromosome_name}){
+		    print "chr $chromosome_name (",length $sequence ," bp)\n";
+		    die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name!\n";
+		}
+		else {
+		    if (length($sequence) == 0){
+			warn "Chromosome $chromosome_name in the multi-fasta file $chromosome_filename did not contain any sequence information!\n";
+		    }
+		    print "chr $chromosome_name (",length $sequence ," bp)\n";
+		    $chromosomes{$chromosome_name} = $sequence;
+		}
+		### resetting the sequence variable
+		$sequence = '';
+		### setting new chromosome name
+		$chromosome_name = extract_chromosome_name($_);
+	    }
+	    else{
+		$sequence .= uc$_;
+	    }
+	}
+	if (exists $chromosomes{$chromosome_name}){
+	    print "chr $chromosome_name (",length $sequence ," bp)\t";
+	    die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name.\n";
+	}
+	else{
+	    if (length($sequence) == 0){
+		warn "Chromosome $chromosome_name in the file $chromosome_filename did not contain any sequence information!\n";
+	    }
+	    print "chr $chromosome_name (",length $sequence ," bp)\n";
+	    $chromosomes{$chromosome_name} = $sequence;
+	}
+}
+print "\n";
+chdir $cwd or die "Failed to move to directory $cwd\n";
+}
+sub extract_chromosome_name {
+## Bowtie seems to extract the first string after the inition > in the FASTA file, so we are doing this as well
+my $fasta_header = shift;
+if ($fasta_header =~ s/^>//){
+	my ($chromosome_name) = split (/\s+/,$fasta_header);
+	return $chromosome_name;
+}
+else{
+	die "The specified chromosome ($fasta_header) file doesn't seem to be in FASTA format as required!\n";
+}
+}
+sub reverse_complement{
+my $sequence = shift;
+$sequence =~ tr/CATG/GTAC/;
+$sequence = reverse($sequence);
+return $sequence;
+}
+sub biTransformFastAFiles {
+my $file = shift;
+my ($dir,$filename);
+if ($file =~ /\//){
+($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
+}
+else{
+$filename = $file;
+}
+### gzipped version of the infile
+if ($file =~ /\.gz$/){
+open (IN,"zcat $file |") or die "Couldn't read from file $file: $!\n";
+}
+else{
+open (IN,$file) or die "Couldn't read from file $file: $!\n";
+}
+if ($skip){
+warn "Skipping the first $skip reads from $file\n";
+sleep (1);
+}
+if ($upto){
+warn "Processing reads up to sequence no. $upto from $file\n";
+sleep (1);
+}
+my $C_to_T_infile = my $G_to_A_infile = $filename;
+if ($gzip){
+$C_to_T_infile =~ s/$/_C_to_T.fa.gz/;
+$G_to_A_infile =~ s/$/_G_to_A.fa.gz/;
+}
+else{
+$C_to_T_infile =~ s/$/_C_to_T.fa/;
+$G_to_A_infile =~ s/$/_G_to_A.fa/;
+}
+warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+if ($gzip){
+open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
+}
+else{
+open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+}
+unless ($directional){
+warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+if ($gzip){
+open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
+}
+else{
+open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+}
+}
+my $count = 0;
+while (1){
+my $header = <IN>;
+my $sequence= <IN>;
+last unless ($header and $sequence);
+$header = fix_IDs($header); # this is to avoid problems with truncated read ID when they contain white spaces
+++$count;
+if ($skip){
+next unless ($count > $skip);
+}
+if ($upto){
+last if ($count > $upto);
+}
+$sequence = uc$sequence; # make input file case insensitive
+# detecting if the input file contains tab stops, as this is likely to result in no alignments
+if (index($header,"\t") != -1){
+$seqID_contains_tabs++;
+}
+### small check if the sequence seems to be in FastA format
+die "Input file doesn't seem to be in FastA format at sequence $count: $!\n" unless ($header =~ /^>.*/);
+my $sequence_C_to_T = $sequence;
+$sequence_C_to_T =~ tr/C/T/;
+print CTOT "$header$sequence_C_to_T";
+unless ($directional){
+my $sequence_G_to_A = $sequence;
+$sequence_G_to_A =~ tr/G/A/;
+print GTOA "$header$sequence_G_to_A";
+}
+}
+close CTOT or die "Failed to close filehandle $!\n";
+if ($directional){
+warn "\nCreated C -> T converted versions of the FastA file $filename ($count sequences in total)\n\n";
+}
+else{
+close GTOA or die "Failed to close filehandle $!\n";
+warn "\nCreated C -> T as well as G -> A converted versions of the FastA file $filename ($count sequences in total)\n\n";
+}
+return ($C_to_T_infile,$G_to_A_infile);
+}
+sub biTransformFastAFiles_paired_end {
+my ($file,$read_number) = @_;
+if ($gzip){
+warn "GZIP compression of temporary files is not supported for paired-end FastA data. Continuing to write uncompressed files\n";
+sleep (2);
+}
+my ($dir,$filename);
+if ($file =~ /\//){
+($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
+}
+else{
+$filename = $file;
+}
+### gzipped version of the infile
+if ($file =~ /\.gz$/){
+open (IN,"zcat $file |") or die "Couldn't read from file $file: $!\n";
+}
+else{
+open (IN,$file) or die "Couldn't read from file $file: $!\n";
+}
+if ($skip){
+warn "Skipping the first $skip reads from $file\n";
+sleep (1);
+}
+if ($upto){
+warn "Processing reads up to sequence no. $upto from $file\n";
+sleep (1);
+}
+my $C_to_T_infile = my $G_to_A_infile = $filename;
+$C_to_T_infile =~ s/$/_C_to_T.fa/;
+$G_to_A_infile =~ s/$/_G_to_A.fa/;
+if ($directional){
+if ($read_number == 1){
+warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+}
+elsif ($read_number == 2){
+warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+}
+else{
+die "Read number needs to be 1 or 2, but was: $read_number\n\n";
+}
+}
+else{ # all four strand output
+warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+}
+my $count = 0;
+while (1){
+my $header = <IN>;
+my $sequence= <IN>;
+last unless ($header and $sequence);
+$header = fix_IDs($header); # this is to avoid problems with truncated read ID when they contain white spaces
+++$count;
+if ($skip){
+next unless ($count > $skip);
+}
+if ($upto){
+last if ($count > $upto);
+}
+$sequence = uc$sequence; # make input file case insensitive
+# detecting if the input file contains tab stops, as this is likely to result in no alignments
+if (index($header,"\t") != -1){
+$seqID_contains_tabs++;
+}
+## small check if the sequence seems to be in FastA format
+die "Input file doesn't seem to be in FastA format at sequence $count: $!\n" unless ($header =~ /^>/);
+if ($read_number == 1){
+if ($bowtie2){
+	$header =~ s/$/\/1\/1/;
+}
+else{
+	$header =~ s/$/\/1/;
+}
+}
+elsif ($read_number == 2){
+if ($bowtie2){
+	$header =~ s/$/\/2\/2/;
+}
+else{
+	$header =~ s/$/\/2/;
+}
+}
+else{
+die "Read number needs to be 1 or 2, but was: $read_number\n\n";
+}
+my $sequence_C_to_T = my $sequence_G_to_A = $sequence;
+$sequence_C_to_T =~ tr/C/T/;
+$sequence_G_to_A =~ tr/G/A/;
+if ($directional){
+if ($read_number == 1){
+	print CTOT "$header$sequence_C_to_T";
+}
+elsif ($read_number == 2){
+	print GTOA "$header$sequence_G_to_A";
+}
+}
+else{
+print CTOT "$header$sequence_C_to_T";
+print GTOA "$header$sequence_G_to_A";
+}
+}
+if ($directional){
+if ($read_number == 1){
+warn "\nCreated C -> T converted version of the FastA file $filename ($count sequences in total)\n\n";
+}
+else{
+warn "\nCreated G -> A converted version of the FastA file $filename ($count sequences in total)\n\n";
+}
+}
+else{
+warn "\nCreated C -> T as well as G -> A converted versions of the FastA file $filename ($count sequences in total)\n\n";
+}
+if ($directional){
+if ($read_number == 1){
+return ($C_to_T_infile);
+}
+else{
+return ($G_to_A_infile);
+}
+}
+else{
+return ($C_to_T_infile,$G_to_A_infile);
+}
+}
+sub biTransformFastQFiles {
+my $file = shift;
+my ($dir,$filename);
+if ($file =~ /\//){
+($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
+}
+else{
+$filename = $file;
+}
+### gzipped version of the infile
+if ($file =~ /\.gz$/){
+open (IN,"zcat $file |") or die "Couldn't read from file $file: $!\n";
+}
+else{
+open (IN,$file) or die "Couldn't read from file $file: $!\n";
+}
+if ($skip){
+warn "Skipping the first $skip reads from $file\n";
+sleep (1);
+}
+if ($upto){
+warn "Processing reads up to sequence no. $upto from $file\n";
+sleep (1);
+}
+my $C_to_T_infile = my $G_to_A_infile = $filename;
+if ($pbat){ # PBAT-Seq
+if ($gzip){
+$G_to_A_infile =~ s/$/_G_to_A.fastq.gz/;
+}
+else{
+$G_to_A_infile =~ s/$/_G_to_A.fastq/;
+}
+warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+if ($gzip){
+open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
+}
+else{
+open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+}
+}
+else{ # directional or non-directional
+if ($gzip){
+$C_to_T_infile =~ s/$/_C_to_T.fastq.gz/;
+}
+else{
+$C_to_T_infile =~ s/$/_C_to_T.fastq/;
+}
+warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+if ($gzip){
+open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
+}
+else{
+open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n"; # uncompressed option
+}
+unless ($directional){
+if ($gzip){
+	$G_to_A_infile =~ s/$/_G_to_A.fastq.gz/;
+}
+else{
+	$G_to_A_infile =~ s/$/_G_to_A.fastq/;
+}
+warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+if ($gzip){
+	open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
+}
+else{
+	open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+}
+}
+}
+my $count = 0;
+while (1){
+my $identifier = <IN>;
+my $sequence = <IN>;
+my $identifier2 = <IN>;
+my $quality_score = <IN>;
+last unless ($identifier and $sequence and $identifier2 and $quality_score);
+$identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
+++$count;
+if ($skip){
+next unless ($count > $skip);
+}
+if ($upto){
+last if ($count > $upto);
+}
+$sequence = uc$sequence; # make input file case insensitive
+# detecting if the input file contains tab stops, as this is likely to result in no alignments
+if (index($identifier,"\t") != -1){
+$seqID_contains_tabs++;
+}
+## small check if the sequence file appears to be a FastQ file
+if ($count == 1){
+if ($identifier !~ /^\@/ or $identifier2 !~ /^\+/){
+	die "Input file doesn't seem to be in FastQ format at sequence $count: $!\n";
+}
+}
+if ($pbat){
+my $sequence_G_to_A = $sequence;
+$sequence_G_to_A =~ tr/G/A/;
+print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
+}
+else{ # directional or non-directional
+my $sequence_C_to_T = $sequence;
+$sequence_C_to_T =~ tr/C/T/;
+print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
+unless ($directional){
+	my $sequence_G_to_A = $sequence;
+	$sequence_G_to_A =~ tr/G/A/;
+	print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
+}
+}
+}
+if ($directional){
+close CTOT or die "Failed to close filehandle $!\n";
+warn "\nCreated C -> T converted version of the FastQ file $filename ($count sequences in total)\n\n";
+}
+elsif($pbat){
+warn "\nCreated G -> A converted version of the FastQ file $filename ($count sequences in total)\n\n";
+close GTOA or die "Failed to close filehandle $!\n";
+return ($G_to_A_infile);
+}
+else{
+close CTOT or die "Failed to close filehandle $!\n";
+close GTOA or die "Failed to close filehandle $!\n";
+warn "\nCreated C -> T as well as G -> A converted versions of the FastQ file $filename ($count sequences in total)\n\n";
+}
+return ($C_to_T_infile,$G_to_A_infile);
+}
+sub biTransformFastQFiles_paired_end {
+my ($file,$read_number) = @_;
+my ($dir,$filename);
+if ($file =~ /\//){
+($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
+}
+else{
+$filename = $file;
+}
+### gzipped version of the infile
+if ($file =~ /\.gz$/){
+open (IN,"zcat $file |") or die "Couldn't read from file $file: $!\n";
+}
+else{
+open (IN,$file) or die "Couldn't read from file $file: $!\n";
+}
+if ($skip){
+warn "Skipping the first $skip reads from $file\n";
+sleep (1);
+}
+if ($upto){
+warn "Processing reads up to sequence no. $upto from $file\n";
+sleep (1);
+}
+my $C_to_T_infile = my $G_to_A_infile = $filename;
+if ($gzip){
+$C_to_T_infile =~ s/$/_C_to_T.fastq.gz/;
+$G_to_A_infile =~ s/$/_G_to_A.fastq.gz/;
+}
+else{
+$C_to_T_infile =~ s/$/_C_to_T.fastq/;
+$G_to_A_infile =~ s/$/_G_to_A.fastq/;
+}
+if ($directional){
+if ($read_number == 1){
+warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+if ($gzip){
+	open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
+}
+else{
+	open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+}
+}
+elsif ($read_number == 2){
+warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+if ($gzip){
+	open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
+}
+else{
+	open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+}
+}
+else{
+die "Read number needs to be 1 or 2, but was $read_number!\n\n";
+}
+}
+else{
+warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+if ($gzip){
+open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
+open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
+}
+else{
+open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+}
+}
+my $count = 0;
+while (1){
+my $identifier = <IN>;
+my $sequence = <IN>;
+my $identifier2 = <IN>;
+my $quality_score = <IN>;
+last unless ($identifier and $sequence and $identifier2 and $quality_score);
+++$count;
+$identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
+if ($skip){
+next unless ($count > $skip);
+}
+if ($upto){
+last if ($count > $upto);
+}
+$sequence= uc$sequence; # make input file case insensitive
+## small check if the sequence file appears to be a FastQ file
+if ($count == 1){
+if ($identifier !~ /^\@/ or $identifier2 !~ /^\+/){
+	die "Input file doesn't seem to be in FastQ format at sequence $count: $!\n";
+}
+}
+my $sequence_C_to_T = my $sequence_G_to_A = $sequence;
+if ($read_number == 1){
+if ($bowtie2){
+	$identifier =~ s/$/\/1\/1/;
+}
+else{
+	$identifier =~ s/$/\/1/;
+}
+}
+elsif ($read_number == 2){
+if ($bowtie2){
+	$identifier =~ s/$/\/2\/2/;
+}
+else{
+	$identifier =~ s/$/\/2/;
+}
+}
+else{
+die "Read number needs to be 1 or 2\n";
+}
+$sequence_C_to_T =~ tr/C/T/;
+$sequence_G_to_A =~ tr/G/A/;
+if ($directional){
+if ($read_number == 1){
+	print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
+}
+else{
+	print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
+}
+}
+else{
+print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
+print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
+}
+}
+if ($directional){
+if ($read_number == 1){
+warn "\nCreated C -> T converted version of the FastQ file $filename ($count sequences in total)\n\n";
+}
+else{
+warn "\nCreated G -> A converted version of the FastQ file $filename ($count sequences in total)\n\n";
+}
+}
+else{
+warn "\nCreated C -> T as well as G -> A converted versions of the FastQ file $filename ($count sequences in total)\n\n";
+}
+if ($directional){
+if ($read_number == 1){
+close CTOT or die "Failed to close filehandle $!\n";
+return ($C_to_T_infile);
+}
+else{
+close GTOA or die "Failed to close filehandle $!\n";
+return ($G_to_A_infile);
+}
+}
+else{
+close CTOT or die "Failed to close filehandle $!\n";
+close GTOA or die "Failed to close filehandle $!\n";
+return ($C_to_T_infile,$G_to_A_infile);
+}
+}
+### SPECIAL BOWTIE 1 PAIRED-END FORMAT FOR GZIPPED OUTPUT FILES
+sub biTransformFastQFiles_paired_end_bowtie1_gzip {
+my ($file_1,$file_2) = @_;
+my ($dir,$filename);
+if ($file_1 =~ /\//){
+($dir,$filename) = $file_1 =~ m/(.*\/)(.*)$/;
+}
+else{
+$filename = $file_1;
+}
+### gzipped version of infile 1
+if ($file_1 =~ /\.gz$/){
+open (IN_1,"zcat $file_1 |") or die "Couldn't read from file $file_1: $!\n";
+}
+else{
+open (IN_1,$file_1) or die "Couldn't read from file $file_1: $!\n";
+}
+### gzipped version of infile 2
+if ($file_2 =~ /\.gz$/){
+open (IN_2,"zcat $file_2 |") or die "Couldn't read from file $file_2: $!\n";
+}
+else{
+open (IN_2,$file_2) or die "Couldn't read from file $file_2: $!\n";
+}
+if ($skip){
+warn "Skipping the first $skip reads from $file_1 and $file_2\n";
+sleep (1);
+}
+if ($upto){
+warn "Processing reads up to sequence no. $upto from $file_1 and $file_2\n";
+sleep (1);
+}
+my $CT_plus_GA_infile = my $GA_plus_CT_infile = $filename;
+$CT_plus_GA_infile =~ s/$/.CT_plus_GA.fastq.gz/;
+$GA_plus_CT_infile =~ s/$/.GA_plus_CT.fastq.gz/;
+warn "Writing a C -> T converted version of $file_1 and a G -> A converted version of $file_2 to $temp_dir$CT_plus_GA_infile\n";
+open (CTPLUSGA,"| gzip -c - > ${temp_dir}${CT_plus_GA_infile}") or die "Can't write to file: $!\n";
+# open (CTPLUSGA,'>',"$temp_dir$CT_plus_GA_infile") or die "Couldn't write to file $!\n";
+unless ($directional){
+print "Writing a G -> A converted version of $file_1 and a C -> T converted version of $file_2 to $temp_dir$GA_plus_CT_infile\n";
+open (GAPLUSCT,"| gzip -c - > ${temp_dir}${GA_plus_CT_infile}") or die "Can't write to file: $!\n";
+}
+### for Bowtie 1 we need to write a single gzipped file with 1 line per pair of sequences in the the following format:
+### <seq-ID>     <sequence #1 mate>     <quality #1 mate>     <sequence #2 mate>     <quality #2 mate>
+my $count = 0;
+while (1){
+my $identifier_1 = <IN_1>;
+my $sequence_1 = <IN_1>;
+my $identifier2_1 = <IN_1>;
+my $quality_score_1 = <IN_1>;
+my $identifier_2 = <IN_2>;
+my $sequence_2 = <IN_2>;
+my $identifier2_2 = <IN_2>;
+my $quality_score_2 = <IN_2>;
+last unless ($identifier_1 and $sequence_1 and $identifier2_1 and $quality_score_1 and $identifier_2 and $sequence_2 and $identifier2_2 and $quality_score_2);
+++$count;
+## small check if the sequence file appears to be a FastQ file
+if ($count == 1){
+if ($identifier_1 !~ /^\@/ or $identifier2_1 !~ /^\+/){
+	die "Input file 1 doesn't seem to be in FastQ format at sequence $count: $!\n";
+}
+if ($identifier_2 !~ /^\@/ or $identifier2_2 !~ /^\+/){
+	die "Input file 2 doesn't seem to be in FastQ format at sequence $count: $!\n";
+}
+}
+$identifier_1 = fix_IDs($identifier_1); # this is to avoid problems with truncated read ID when they contain white spaces
+chomp $identifier_1;
+chomp $sequence_1;
+chomp $sequence_2;
+chomp $quality_score_1;
+chomp $quality_score_2;
+$identifier_1 =~ s/^\@//;
+$identifier_1 =~ s/$/\/1/; #adding an extra /1 to the end which is being removed by Bowtie otherwise (which leads to no sequences alignments whatsoever)
+if ($skip){
+next unless ($count > $skip);
+}
+if ($upto){
+last if ($count > $upto);
+}
+$sequence_1 = uc$sequence_1; # make input file 1 case insensitive
+$sequence_2 = uc$sequence_2; # make input file 2 case insensitive
+# print "$identifier_1\t$sequence_1\t$quality_score_1\t$sequence_2\t$quality_score_2\n";
+my $sequence_1_C_to_T = $sequence_1;
+my $sequence_2_G_to_A = $sequence_2;
+$sequence_1_C_to_T =~ tr/C/T/;
+$sequence_2_G_to_A =~ tr/G/A/;
+print CTPLUSGA "$identifier_1\t$sequence_1_C_to_T\t$quality_score_1\t$sequence_2_G_to_A\t$quality_score_2\n";
+unless ($directional){
+my $sequence_1_G_to_A = $sequence_1;
+my $sequence_2_C_to_T = $sequence_2;
+$sequence_1_G_to_A =~ tr/G/A/;
+$sequence_2_C_to_T =~ tr/C/T/;
+print GAPLUSCT "$identifier_1\t$sequence_1_G_to_A\t$quality_score_1\t$sequence_2_C_to_T\t$quality_score_2\n";
+}
+}
+close CTPLUSGA or die "Couldn't close filehandle\n";
+warn "\nCreated C -> T converted version of FastQ file '$file_1' and G -> A converted version of FastQ file '$file_2' ($count sequences in total)\n";
+if ($directional){
+warn "\n";
+return ($CT_plus_GA_infile);
+}
+else{
+close GAPLUSCT or die "Couldn't close filehandle\n";
+warn "Created G -> A converted version of FastQ file '$file_1' and C -> T converted version of FastQ file '$file_2' ($count sequences in total)\n\n";
+return ($CT_plus_GA_infile,$GA_plus_CT_infile);
+}
+}
+sub fix_IDs{
+my $id = shift;
+$id =~ s/[ \t]+/_/g; # replace spaces or tabs with underscores
+return $id;
+}
+sub ensure_sensical_alignment_orientation_single_end{
+my $index = shift; # index number if the sequence produced an alignment
+my $strand = shift;
+###  setting $orientation to 1 if it is in the correct orientation, and leave it 0 if it is the nonsensical wrong one
+my $orientation = 0;
+##############################################################################################################
+## FORWARD converted read against FORWARD converted genome (read: C->T.....C->T..      genome:C->T.......C->T)
+## here we only want reads in the forward (+) orientation
+if ($fhs[$index]->{name} eq 'CTreadCTgenome') {
+### if the alignment is (+) we count it, and return 1 for a correct orientation
+if ($strand eq '+') {
+$fhs[$index]->{seen}++;
+$orientation = 1;
+return $orientation;
+}
+### if the orientation equals (-) the alignment is nonsensical
+elsif ($strand eq '-') {
+$fhs[$index]->{wrong_strand}++;
+return $orientation;
+}
+}
+###############################################################################################################
+## FORWARD converted read against reverse converted genome (read: C->T.....C->T..      genome: G->A.......G->A)
+## here we only want reads in the forward (-) orientation
+elsif ($fhs[$index]->{name} eq 'CTreadGAgenome') {
+### if the alignment is (-) we count it and return 1 for a correct orientation
+if ($strand eq '-') {
+$fhs[$index]->{seen}++;
+$orientation = 1;
+return $orientation;
+}
+### if the orientation equals (+) the alignment is nonsensical
+elsif ($strand eq '+') {
+$fhs[$index]->{wrong_strand}++;
+return $orientation;
+}
+}
+###############################################################################################################
+## Reverse converted read against FORWARD converted genome (read: G->A.....G->A..      genome: C->T.......C->T)
+## here we only want reads in the forward (-) orientation
+elsif ($fhs[$index]->{name} eq 'GAreadCTgenome') {
+### if the alignment is (-) we count it and return 1 for a correct orientation
+if ($strand eq '-') {
+$fhs[$index]->{seen}++;
+$orientation = 1;
+return $orientation;
+}
+### if the orientation equals (+) the alignment is nonsensical
+elsif ($strand eq '+') {
+$fhs[$index]->{wrong_strand}++;
+return $orientation;
+}
+}
+###############################################################################################################
+## Reverse converted read against reverse converted genome (read: G->A.....G->A..      genome: G->A.......G->A)
+## here we only want reads in the forward (+) orientation
+elsif ($fhs[$index]->{name} eq 'GAreadGAgenome') {
+### if the alignment is (+) we count it and return 1 for a correct orientation
+if ($strand eq '+') {
+$fhs[$index]->{seen}++;
+$orientation = 1;
+return $orientation;
+}
+### if the orientation equals (-) the alignment is nonsensical
+elsif ($strand eq '-') {
+$fhs[$index]->{wrong_strand}++;
+return $orientation;
+}
+} else{
+die "One of the above conditions must be true\n";
+}
+}
+sub ensure_sensical_alignment_orientation_paired_ends{
+my ($index,$id_1,$strand_1,$id_2,$strand_2) = @_; # index number if the sequence produced an alignment
+###  setting $orientation to 1 if it is in the correct orientation, and leave it 0 if it is the nonsensical wrong one
+my $orientation = 0;
+##############################################################################################################
+## [Index 0, sequence originated from (converted) forward strand]
+## CT converted read 1
+## GA converted read 2
+## CT converted genome
+## here we only want read 1 in (+) orientation and read 2 in (-) orientation
+if ($fhs[$index]->{name} eq 'CTread1GAread2CTgenome') {
+### if the paired-end alignment is read1 (+) and read2 (-) we count it, and return 1 for a correct orientation
+if ($id_1 =~ /1$/ and $strand_1 eq '+' and $id_2 =~ /2$/ and $strand_2 eq '-') {
+$fhs[$index]->{seen}++;
+$orientation = 1;
+return $orientation;
+}
+### if the read 2 is in (+) orientation and read 1 in (-) the alignment is nonsensical
+elsif ($id_1 =~ /2$/ and $strand_1 eq '+' and $id_2 =~ /1$/ and $strand_2 eq '-') {
+$fhs[$index]->{wrong_strand}++;
+return $orientation;
+}
+else{
+die "id1: $id_1\tid2: $id_2\tThis should be impossible\n";
+}
+}
+###############################################################################################################
+## [Index 1, sequence originated from (converted) reverse strand]
+## GA converted read 1
+## CT converted read 2
+## GA converted genome
+## here we only want read 1 in (+) orientation and read 2 in (-) orientation
+elsif ($fhs[$index]->{name} eq 'GAread1CTread2GAgenome') {
+### if the paired-end alignment is read1 (+) and read2 (-) we count it, and return 1 for a correct orientation
+if ($id_1 =~ /1$/ and $strand_1 eq '+' and $id_2 =~ /2$/ and $strand_2 eq '-') {
+$fhs[$index]->{seen}++;
+$orientation = 1;
+return $orientation;
+}
+### if the read 2 is in (+) orientation and read 1 in (-) the alignment is nonsensical
+elsif ($id_1 =~ /2$/ and $strand_1 eq '+' and $id_2 =~ /1$/ and $strand_2 eq '-') {
+$fhs[$index]->{wrong_strand}++;
+return $orientation;
+}
+else{
+die "id1: $id_1\tid2: $id_2\tThis should be impossible\n";
+}
+}
+###############################################################################################################
+## [Index 2, sequence originated from complementary to (converted) forward strand]
+## GA converted read 1
+## CT converted read 2
+## CT converted genome
+## here we only want read 1 in (-) orientation and read 2 in (+) orientation
+elsif ($fhs[$index]->{name} eq 'GAread1CTread2CTgenome') {
+### if the paired-end alignment is read1 (-) and read2 (+) we count it, and return 1 for a correct orientation
+if ($id_1 =~ /2$/ and $strand_1 eq '+' and $id_2 =~ /1$/ and $strand_2 eq '-') {
+$fhs[$index]->{seen}++;
+$orientation = 1;
+return $orientation;
+}
+### if the read 2 is in (+) orientation and read 1 in (-) the alignment is nonsensical
+elsif ($id_1 =~ /1$/ and $strand_1 eq '+' and $id_2 =~ /2$/ and $strand_2 eq '-') {
+$fhs[$index]->{wrong_strand}++;
+return $orientation;
+}
+else{
+die "id1: $id_1\tid2: $id_2\tThis should be impossible\n";
+}
+}
+###############################################################################################################
+## [Index 3, sequence originated from complementary to (converted) reverse strand]
+## CT converted read 1
+## GA converted read 2
+## GA converted genome
+## here we only want read 1 in (+) orientation and read 2 in (-) orientation
+elsif ($fhs[$index]->{name} eq 'CTread1GAread2GAgenome') {
+### if the paired-end alignment is read1 (-) and read2 (+) we count it, and return 1 for a correct orientation
+if ($id_1 =~ /2$/ and $strand_1 eq '+' and $id_2 =~ /1$/ and $strand_2 eq '-') {
+$fhs[$index]->{seen}++;
+$orientation = 1;
+return $orientation;
+}
+### if the read 2 is in (+) orientation and read 1 in (-) the alignment is nonsensical
+elsif ($id_1 =~ /1$/ and $strand_1 eq '+' and $id_2 =~ /2$/ and $strand_2 eq '-') {
+$fhs[$index]->{wrong_strand}++;
+return $orientation;
+}
+else{
+die "id1: $id_1\tid2: $id_2\tThis should be impossible\n";
+}
+}
+else{
+die "One of the above conditions must be true\n";
+}
+}
+#####################################################################################################################################################
+### Bowtie 1 (default) | PAIRED-END | FASTA
+sub paired_end_align_fragments_to_bisulfite_genome_fastA {
+my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+if ($directional){
+warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastA)\n";
+}
+else{
+warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastA)\n";
+}
+## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
+## data structure above
+if ($directional){
+warn "Now running 2 instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+else{
+warn "Now running 4 individual instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+foreach my $fh (@fhs) {
+if ($directional){
+unless ($fh->{inputfile_1}){
+	$fh->{last_seq_id} = undef;
+	$fh->{last_line_1} = undef;
+	$fh->{last_line_2} = undef;
+	next;
+}
+}
+my $bt_options = $bowtie_options;
+if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
+$bt_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+}
+else {
+$bt_options .= ' --nofw';
+}
+warn "Now starting a Bowtie paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $bt_options)\n";
+open ($fh->{fh},"$path_to_bowtie $bt_options $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to bowtie: $!";
+my $line_1 = $fh->{fh}->getline();
+my $line_2 = $fh->{fh}->getline();
+# if Bowtie produces an alignment we store the first line of the output
+if ($line_1 and $line_2) {
+chomp $line_1;
+chomp $line_2;
+my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first bowtie output line (= the sequence identifier)
+my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second bowtie output line
+### Bowtie always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
+### We will thus identify which sequence was read 1 and store this ID as last_seq_id
+if ($id_1 =~ s/\/1$//){ # removing the read 1 tag if present
+	$fh->{last_seq_id} = $id_1;
+}
+elsif ($id_2 =~ s/\/1$//){ # removing the read 1 tag if present
+	$fh->{last_seq_id} = $id_2;
+}
+else{
+	die "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
+}
+$fh->{last_line_1} = $line_1; # this contains either read 1 or read 2
+$fh->{last_line_2} = $line_2; # this contains either read 1 or read 2
+warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
+}
+# otherwise we just initialise last_seq_id and last_lines as undefined
+else {
+warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
+$fh->{last_seq_id} = undef;
+$fh->{last_line_1} = undef;
+$fh->{last_line_2} = undef;
+}
+}
+}
+### Bowtie 2 | PAIRED-END | FASTA
+sub paired_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 {
+my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+if ($directional){
+warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastA)\n";
+}
+else{
+warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastA)\n";
+}
+## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
+## data structure above
+if ($directional){
+warn "Now running 2 instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+else{
+warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+foreach my $fh (@fhs) {
+if ($directional){
+unless ($fh->{inputfile_1}){
+	$fh->{last_seq_id} = undef;
+	$fh->{last_line_1} = undef;
+	$fh->{last_line_2} = undef;
+	next;
+}
+}
+my $bt2_options = $bowtie_options;
+if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
+$bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+}
+else {
+$bt2_options .= ' --nofw';
+}
+warn "Now starting a Bowtie 2 paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $bt2_options))\n";
+open ($fh->{fh},"$path_to_bowtie $bt2_options $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to bowtie: $!";
+### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
+while (1){
+$_ = $fh->{fh}->getline();
+if ($_) {
+	last unless ($_ =~ /^\@/); # SAM headers start with @
+}
+else{
+	last; # no alignment output
+}
+}
+my $line_1 = $_;
+my $line_2 = $fh->{fh}->getline();
+# if Bowtie produces an alignment we store the first line of the output
+if ($line_1 and $line_2) {
+chomp $line_1;
+chomp $line_2;
+my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first bowtie output line (= the sequence identifier)
+my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second bowtie output line
+### Bowtie always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
+### We will thus identify which sequence was read 1 and store this ID as last_seq_id
+if ($id_1 =~ s/\/1$//){ # removing the read 1 /1 tag if present (remember that Bowtie2 clips off /1 or /2 line endings itself, so we added /1/1 or /2/2 to start with
+	$fh->{last_seq_id} = $id_1;
+}
+elsif ($id_2 =~ s/\/1$//){ # removing the read 1 /2 tag if present
+	$fh->{last_seq_id} = $id_2;
+}
+else{
+	warn "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
+}
+$fh->{last_line_1} = $line_1; # this contains either read 1 or read 2
+$fh->{last_line_2} = $line_2; # this contains either read 1 or read 2
+warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
+}
+# otherwise we just initialise last_seq_id and last_lines as undefined
+else {
+warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
+$fh->{last_seq_id} = undef;
+$fh->{last_line_1} = undef;
+$fh->{last_line_2} = undef;
+}
+}
+}
+### Bowtie 1 (default) | PAIRED-END | FASTQ
+sub paired_end_align_fragments_to_bisulfite_genome_fastQ {
+my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+if ($directional){
+warn "Input file is $C_to_T_infile_1 (FastQ)\n";
+}
+elsif($pbat){
+warn "Input file is $G_to_A_infile_1 (FastQ; PBAT-Seq)\n";
+}
+else{
+warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 (FastQ)\n";
+}
+## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
+## data structure above
+if ($directional or $pbat){
+warn "Now running 2 instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+else{
+warn "Now running 4 individual instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+foreach my $fh (@fhs) {
+if ($directional or $pbat){
+unless ($fh->{inputfile_1}){
+	$fh->{last_seq_id} = undef;
+	$fh->{last_line_1} = undef;
+	$fh->{last_line_2} = undef;
+	next; # skipping unwanted filehandles
+}
+}
+my $bt_options = $bowtie_options;
+if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
+$bt_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+}
+else {
+$bt_options .= ' --nofw';
+}
+if ($gzip){
+warn "Now starting a Bowtie paired-end alignment for $fh->{name} (reading in sequences from ${temp_dir}$fh->{inputfile_1}, with the options: $bt_options)\n";
+open ($fh->{fh},"zcat ${temp_dir}$fh->{inputfile_1} | $path_to_bowtie $bt_options $fh->{bisulfiteIndex} --12 - |") or die "Can't open pipe to bowtie: $!";
+}
+else{
+warn "Now starting a Bowtie paired-end alignment for $fh->{name} (reading in sequences from ${temp_dir}$fh->{inputfile_1} and ${temp_dir}$fh->{inputfile_2}, with the options: $bt_options))\n";
+sleep(5);
+open ($fh->{fh},"$path_to_bowtie $bt_options $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to bowtie: $!";
+}
+my $line_1 = $fh->{fh}->getline();
+my $line_2 = $fh->{fh}->getline();
+# if Bowtie produces an alignment we store the first line of the output
+if ($line_1 and $line_2) {
+chomp $line_1;
+chomp $line_2;
+### Bowtie always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
+### We will thus identify which sequence was read 1 and store this ID as last_seq_id
+my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first bowtie output line (= the sequence identifier)
+my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second bowtie output line
+if ($id_1 =~ s/\/1$//){ # removing the read 1 tag if present
+	$fh->{last_seq_id} = $id_1;
+}
+elsif ($id_2 =~ s/\/1$//){ # removing the read 1 tag if present
+	$fh->{last_seq_id} = $id_2;
+}
+else{
+	die "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
+}
+$fh->{last_line_1} = $line_1; # this contains read 1 or read 2
+$fh->{last_line_2} = $line_2; # this contains read 1 or read 2
+warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
+}
+# otherwise we just initialise last_seq_id and last_lines as undefined
+else {
+warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
+$fh->{last_seq_id} = undef;
+$fh->{last_line_1} = undef;
+$fh->{last_line_2} = undef;
+}
+}
+}
+### Bowtie 2 | PAIRED-END | FASTQ
+sub paired_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 {
+my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+if ($directional){
+warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastQ)\n";
+}
+else{
+warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastQ)\n";
+}
+## Now starting up 4 instances of Bowtie 2 feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
+## data structure above
+if ($directional){
+warn "Now running 2 instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+else{
+warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+foreach my $fh (@fhs) {
+if ($directional){
+unless ($fh->{inputfile_1}){
+	$fh->{last_seq_id} = undef;
+	$fh->{last_line_1} = undef;
+	$fh->{last_line_2} = undef;
+	next;
+}
+}
+my $bt2_options = $bowtie_options;
+if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
+$bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+}
+else {
+$bt2_options .= ' --nofw';
+}
+warn "Now starting a Bowtie 2 paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $bt2_options))\n";
+open ($fh->{fh},"$path_to_bowtie $bt2_options $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to bowtie: $!";
+### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
+while (1){
+$_ = $fh->{fh}->getline();
+if ($_) {
+	last unless ($_ =~ /^\@/); # SAM headers start with @
+}
+else{
+	last; # no alignment output
+}
+}
+my $line_1 = $_;
+my $line_2 = $fh->{fh}->getline();
+# if Bowtie produces an alignment we store the first line of the output
+if ($line_1 and $line_2) {
+chomp $line_1;
+chomp $line_2;
+### Bowtie always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
+### We will thus identify which sequence was read 1 and store this ID as last_seq_id
+my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first bowtie output line (= the sequence identifier)
+my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second bowtie output line
+if ($id_1 =~ s/\/1$//){ # removing the read 1 tag if present (remember that Bowtie2 clips off /1 or /2 line endings itself, so we added /1/1 or /2/2 to start with
+	$fh->{last_seq_id} = $id_1;
+}
+elsif ($id_2 =~ s/\/1$//){ # removing the read 1 tag if present
+	$fh->{last_seq_id} = $id_2;
+}
+else{
+	die "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
+}
+$fh->{last_line_1} = $line_1; # this contains read 1 or read 2
+$fh->{last_line_2} = $line_2; # this contains read 1 or read 2
+warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
+}
+# otherwise we just initialise last_seq_id and last_lines as undefined
+else {
+warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
+$fh->{last_seq_id} = undef;
+$fh->{last_line_1} = undef;
+$fh->{last_line_2} = undef;
+}
+}
+}
+#####################################################################################################################################################
+### Bowtie 1 (default) | SINGLE-END | FASTA
+sub single_end_align_fragments_to_bisulfite_genome_fastA {
+my ($C_to_T_infile,$G_to_A_infile) = @_;
+if ($directional){
+warn "Input file is $C_to_T_infile (FastA)\n";
+}
+else{
+warn "Input files are $C_to_T_infile and $G_to_A_infile (FastA)\n";
+}
+## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
+## data structure above
+if ($directional){
+warn "Now running 2 instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+else{
+warn "Now running 4 individual instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+foreach my $fh (@fhs) {
+my $bt_options = $bowtie_options;
+if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
+$bt_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+}
+else {
+$bt_options .= ' --nofw';
+}
+warn "Now starting the Bowtie aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options: $bt_options)\n";
+if ($gzip){
+open ($fh->{fh},"zcat $temp_dir$fh->{inputfile} | $path_to_bowtie $bt_options $fh->{bisulfiteIndex} - |") or die "Can't open pipe to bowtie: $!";
+}
+else{
+open ($fh->{fh},"$path_to_bowtie $bt_options $fh->{bisulfiteIndex} $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!"; # command for uncompressed data
+}
+# if Bowtie produces an alignment we store the first line of the output
+$_ = $fh->{fh}->getline();
+if ($_) {
+chomp;
+my $id = (split(/\t/))[0]; # this is the first element of the bowtie output (= the sequence identifier)
+$fh->{last_seq_id} = $id;
+$fh->{last_line} = $_;
+warn "Found first alignment:\t$fh->{last_line}\n";
+}
+# otherwise we just initialise last_seq_id and last_line as undefined
+else {
+warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
+$fh->{last_seq_id} = undef;
+$fh->{last_line} = undef;
+}
+}
+}
+### Bowtie 2 | SINGLE-END | FASTA
+sub single_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 {
+my ($C_to_T_infile,$G_to_A_infile) = @_;
+if ($directional){
+warn "Input file is $C_to_T_infile (FastA)\n";
+}
+else{
+warn "Input files are $C_to_T_infile and $G_to_A_infile (FastA)\n";
+}
+## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
+## data structure above
+if ($directional){
+warn "Now running 2 instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+else{
+warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+foreach my $fh (@fhs) {
+my $bt2_options = $bowtie_options;
+if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
+$bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+}
+else {
+$bt2_options .= ' --nofw';
+}
+warn "Now starting the Bowtie 2 aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options: $bt2_options)\n";
+open ($fh->{fh},"$path_to_bowtie $bt2_options $fh->{bisulfiteIndex} -U $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!";
+### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
+while (1){
+$_ = $fh->{fh}->getline();
+if ($_) {
+	last unless ($_ =~ /^\@/); # SAM headers start with @
+}
+else{
+	last; # no alignment output
+}
+}
+# Bowtie 2 outputs a result line even for sequences without any alignments. We thus store the first line of the output
+if ($_) {
+chomp;
+my $id = (split(/\t/))[0]; # this is the first element of the Bowtie output (= the sequence identifier)
+$fh->{last_seq_id} = $id;
+$fh->{last_line} = $_;
+warn "Found first alignment:\t$fh->{last_line}\n";
+}
+# otherwise we just initialise last_seq_id and last_line as undefinded. This should only happen at the end of a file for Bowtie 2 output
+else {
+warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
+$fh->{last_seq_id} = undef;
+$fh->{last_line} = undef;
+}
+}
+}
+### Bowtie 1 (default) | SINGLE-END | FASTQ
+sub single_end_align_fragments_to_bisulfite_genome_fastQ {
+my ($C_to_T_infile,$G_to_A_infile) = @_;
+if ($directional){
+warn "Input file is $C_to_T_infile (FastQ)\n";
+}
+elsif($pbat){
+warn "Input file is $G_to_A_infile (FastQ)\n";
+}
+else{
+warn "Input files are $C_to_T_infile and $G_to_A_infile (FastQ)\n";
+}
+## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
+## the data structure above
+if ($directional or $pbat){
+warn "Now running 2 instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+else{
+warn "Now running 4 individual instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+foreach my $fh (@fhs) {
+my $bt_options = $bowtie_options;
+if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
+$bt_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+}
+else {
+$bt_options .= ' --nofw';
+}
+warn "Now starting the Bowtie aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options: $bt_options)\n";
+sleep (5);
+if ($gzip){
+open ($fh->{fh},"zcat $temp_dir$fh->{inputfile} | $path_to_bowtie $bowtie_options $fh->{bisulfiteIndex} - |") or die "Can't open pipe to bowtie: $!";
+}
+else{
+open ($fh->{fh},"$path_to_bowtie $bowtie_options $fh->{bisulfiteIndex} $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!"; # command for uncompressed data
+}
+# if Bowtie produces an alignment we store the first line of the output
+$_ = $fh->{fh}->getline();
+if ($_) {
+chomp;
+my $id = (split(/\t/))[0]; # this is the first element of the Bowtie output (= the sequence identifier)
+$fh->{last_seq_id} = $id;
+$fh->{last_line} = $_;
+warn "Found first alignment:\t$fh->{last_line}\n";
+}
+# otherwise we just initialise last_seq_id and last_line as undefined
+else {
+warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
+$fh->{last_seq_id} = undef;
+$fh->{last_line} = undef;
+}
+}
+}
+### Bowtie 2 | SINGLE-END | FASTQ
+sub single_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 {
+my ($C_to_T_infile,$G_to_A_infile) = @_;
+if ($directional){
+warn "Input file is $C_to_T_infile (FastQ)\n\n";
+}
+else{
+warn "Input files are $C_to_T_infile and $G_to_A_infile (FastQ)\n\n";
+}
+## Now starting up to 4 instances of Bowtie 2 feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
+## the data structure above
+if ($directional){
+warn "Now running 2 instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+else{
+warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
+}
+foreach my $fh (@fhs) {
+my $bt2_options = $bowtie_options;
+if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
+$bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
+}
+else {
+$bt2_options .= ' --nofw';
+}
+warn "Now starting the Bowtie 2 aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options $bt2_options)\n";
+warn "Using Bowtie 2 index: $fh->{bisulfiteIndex}\n\n";
+open ($fh->{fh},"$path_to_bowtie $bt2_options $fh->{bisulfiteIndex} -U $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!";
+### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
+while (1){
+$_ = $fh->{fh}->getline();
+# warn "$_\n";
+# sleep(1);
+if ($_) {
+	last unless ($_ =~ /^\@/); # SAM headers start with @
+}
+else {
+	last;
+}
+}
+# Bowtie 2 outputs a result line even for sequences without any alignments. We thus store the first line of the output
+if ($_) {
+chomp;
+my $id = (split(/\t/))[0]; # this is the first element of the Bowtie 2 output (= the sequence identifier)
+$fh->{last_seq_id} = $id;
+$fh->{last_line} = $_;
+warn "Found first alignment:\t$fh->{last_line}\n";
+# warn "storing $id and\n$_\n";
+}
+# otherwise we just initialise last_seq_id and last_line as undefined. This should only happen at the end of a file for Bowtie 2 output
+else {
+warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
+$fh->{last_seq_id} = undef;
+$fh->{last_line} = undef;
+}
+}
+}
+###########################################################################################################################################
+sub reset_counters_and_fhs{
+my $filename = shift;
+%counting=(
+	     total_meCHH_count => 0,
+	     total_meCHG_count => 0,
+	     total_meCpG_count => 0,
+	     total_unmethylated_CHH_count => 0,
+	     total_unmethylated_CHG_count => 0,
+	     total_unmethylated_CpG_count => 0,
+	     sequences_count => 0,
+	     no_single_alignment_found => 0,
+	     unsuitable_sequence_count => 0,
+	     genomic_sequence_could_not_be_extracted_count => 0,
+	     unique_best_alignment_count => 0,
+	     low_complexity_alignments_overruled_count => 0,
+	     CT_CT_count => 0, #(CT read/CT genome, original top strand)
+	     CT_GA_count => 0, #(CT read/GA genome, original bottom strand)
+	     GA_CT_count => 0, #(GA read/CT genome, complementary to original top strand)
+	     GA_GA_count => 0, #(GA read/GA genome, complementary to original bottom strand)
+	     CT_GA_CT_count => 0, #(CT read1/GA read2/CT genome, original top strand)
+	     GA_CT_GA_count => 0, #(GA read1/CT read2/GA genome, complementary to original bottom strand)
+	     GA_CT_CT_count => 0, #(GA read1/CT read2/CT genome, complementary to original top strand)
+	     CT_GA_GA_count => 0, #(CT read1/GA read2/GA genome, original bottom strand)
+	     alignments_rejected_count => 0, # only relevant if --directional was specified
+	    );
+if ($directional){
+if ($filename =~ ','){ # paired-end files
+@fhs=(
+	    { name => 'CTreadCTgenome',
+	      strand_identity => 'con ori forward',
+	      bisulfiteIndex => $CT_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'CTreadGAgenome',
+	      strand_identity => 'con ori reverse',
+	      bisulfiteIndex => $GA_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'GAreadCTgenome',
+	      strand_identity => 'compl ori con forward',
+	      bisulfiteIndex => $CT_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'GAreadGAgenome',
+	    strand_identity => 'compl ori con reverse',
+	      bisulfiteIndex => $GA_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	   );
+}
+else{ # single-end files
+@fhs=(
+	    { name => 'CTreadCTgenome',
+	      strand_identity => 'con ori forward',
+	      bisulfiteIndex => $CT_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'CTreadGAgenome',
+	      strand_identity => 'con ori reverse',
+	      bisulfiteIndex => $GA_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	   );
+}
+}
+elsif($pbat){
+if ($filename =~ ','){ # paired-end files
+@fhs=(
+	    { name => 'CTreadCTgenome',
+	      strand_identity => 'con ori forward',
+	      bisulfiteIndex => $CT_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'CTreadGAgenome',
+	      strand_identity => 'con ori reverse',
+	      bisulfiteIndex => $GA_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'GAreadCTgenome',
+	      strand_identity => 'compl ori con forward',
+	      bisulfiteIndex => $CT_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'GAreadGAgenome',
+	    strand_identity => 'compl ori con reverse',
+	      bisulfiteIndex => $GA_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	   );
+}
+else{ # single-end files
+@fhs=(
+	    { name => 'GAreadCTgenome',
+	      strand_identity => 'compl ori con forward',
+	      bisulfiteIndex => $CT_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'GAreadGAgenome',
+	      strand_identity => 'compl ori con reverse',
+	      bisulfiteIndex => $GA_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	   );
+}
+}
+else{
+@fhs=(
+	  { name => 'CTreadCTgenome',
+	    strand_identity => 'con ori forward',
+	    bisulfiteIndex => $CT_index_basename,
+	    seen => 0,
+	    wrong_strand => 0,
+	  },
+	  { name => 'CTreadGAgenome',
+	    strand_identity => 'con ori reverse',
+	    bisulfiteIndex => $GA_index_basename,
+	    seen => 0,
+	    wrong_strand => 0,
+	  },
+	  { name => 'GAreadCTgenome',
+	    strand_identity => 'compl ori con forward',
+	    bisulfiteIndex => $CT_index_basename,
+	    seen => 0,
+	    wrong_strand => 0,
+	  },
+	  { name => 'GAreadGAgenome',
+	    strand_identity => 'compl ori con reverse',
+	    bisulfiteIndex => $GA_index_basename,
+	    seen => 0,
+	    wrong_strand => 0,
+	  },
+	 );
+}
+}
+sub process_command_line{
+my @bowtie_options;
+my $help;
+my $mates1;
+my $mates2;
+my $path_to_bowtie;
+my $fastq;
+my $fasta;
+my $skip;
+my $qupto;
+my $phred64;
+my $phred33;
+my $solexa;
+my $mismatches;
+my $seed_length;
+my $best;
+my $sequence_format;
+my $version;
+my $quiet;
+my $chunk;
+my $non_directional;
+my $ceiling;
+my $maxins;
+my $minins;
+my $unmapped;
+my $multi_map;
+my $output_dir;
+my $bowtie2;
+my $vanilla;
+my $sam_no_hd;
+my $seed_extension_fails;
+my $reseed_repetitive_seeds;
+my $most_valid_alignments;
+my $score_min;
+my $parallel;
+my $temp_dir;
+my $rdg;
+my $rfg;
+my $non_bs_mm;
+my $samtools_path;
+my $bam;
+my $gzip;
+my $pbat;
+my $command_line = GetOptions ('help|man' => \$help,
+				 '1=s' => \$mates1,
+				 '2=s' => \$mates2,
+				 'path_to_bowtie=s' => \$path_to_bowtie,
+				 'f|fasta' => \$fasta,
+				 'q|fastq' => \$fastq,
+				 's|skip=i' => \$skip,
+				 'u|upto=i' => \$qupto,
+				 'phred33-quals' => \$phred33,
+				 'phred64-quals|solexa1' => \$phred64,
+				 'solexa-quals' => \$solexa,
+				 'n|seedmms=i' => \$mismatches,
+				 'l|seedlen=i' => \$seed_length,
+				 'no_best' => \$best,
+				 'version' => \$version,
+				 'quiet' => \$quiet,
+				 'chunkmbs=i' => \$chunk,
+				 'non_directional' => \$non_directional,
+				 'I|minins=i' => \$minins,
+				 'X|maxins=i' => \$maxins,
+				 'e|maqerr=i' => \$ceiling,
+				 'un|unmapped' => \$unmapped,
+				 'ambiguous' => \$multi_map,
+				 'o|output_dir=s' => \$output_dir,
+				 'bowtie2' => \$bowtie2,
+				 'vanilla' => \$vanilla,
+				 'sam-no-hd' => \$sam_no_hd,
+				 'D=i' => \$seed_extension_fails,
+				 'R=i' => \$reseed_repetitive_seeds,
+				 'score_min=s' => \$score_min,
+				 'most_valid_alignments=i' => \$most_valid_alignments,
+				 'p=i' => \$parallel,
+				 'temp_dir=s' => \$temp_dir,
+				 'rdg=s' => \$rdg,
+				 'rfg=s' => \$rfg,
+				 'non_bs_mm' => \$non_bs_mm,
+				 'samtools_path=s' => \$samtools_path,
+				 'bam' => \$bam,
+				 'gzip' => \$gzip,
+				 'pbat' => \$pbat,
+				);
+### EXIT ON ERROR if there were errors with any of the supplied options
+unless ($command_line){
+die "Please respecify command line options\n";
+}
+### HELPFILE
+if ($help){
+print_helpfile();
+exit;
+}
+if ($version){
+print << "VERSION";
+Bismark - Bisulfite Mapper and Methylation Caller.
+Bismark Version: $bismark_version
+Copyright 2010-13 Felix Krueger, Babraham Bioinformatics
+www.bioinformatics.babraham.ac.uk/projects/
+VERSION
+exit;
+}
+##########################
+### PROCESSING OPTIONS ###
+##########################
+unless ($bowtie2){
+$bowtie2 = 0;
+}
+unless ($sam_no_hd){
+$sam_no_hd =0;
+}
+### PATH TO BOWTIE
+### if a special path to Bowtie 1/2 was specified we will use that one, otherwise it is assumed that Bowtie 1/2 is in the PATH
+if ($path_to_bowtie){
+unless ($path_to_bowtie =~ /\/$/){
+$path_to_bowtie =~ s/$/\//;
+}
+if (-d $path_to_bowtie){
+if ($bowtie2){
+	$path_to_bowtie = "${path_to_bowtie}bowtie2";
+}
+else{
+	$path_to_bowtie = "${path_to_bowtie}bowtie";
+}
+}
+else{
+die "The path to bowtie provided ($path_to_bowtie) is invalid (not a directory)!\n";
+}
+}
+else{
+if ($bowtie2){
+$path_to_bowtie = 'bowtie2';
+warn "Path to Bowtie 2 specified as: $path_to_bowtie\n";  }
+else{
+$path_to_bowtie = 'bowtie';
+warn "Path to Bowtie specified as: $path_to_bowtie\n";
+}
+}
+### OUTPUT REQUESTED AS BAM FILE
+if ($bam){
+if ($vanilla){
+die "Specifying BAM output is not compatible with \"--vanilla\" format. Please respecify\n\n";
+}
+### PATH TO SAMTOOLS
+if (defined $samtools_path){
+# if Samtools was specified as full command
+if ($samtools_path =~ /samtools$/){
+	if (-e $samtools_path){
+	  # Samtools executable found
+	}
+	else{
+	  die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
+	}
+}
+else{
+	unless ($samtools_path =~ /\/$/){
+	  $samtools_path =~ s/$/\//;
+	}
+	$samtools_path .= 'samtools';
+	if (-e $samtools_path){
+	  # Samtools executable found
+	}
+	else{
+	  die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
+	}
+}
+warn "Alignments will be written out in BAM format. Samtools path provided as: '$samtools_path'\n";
+$bam = 1;
+}
+# Check whether Samtools is in the PATH if no path was supplied by the user
+else{
+if (!system "which samtools >/dev/null 2>&1"){ # STDOUT is binned, STDERR is redirected to STDOUT. Returns 0 if samtools is in the PATH
+	$samtools_path = `which samtools`;
+	chomp $samtools_path;
+	warn "Alignments will be written out in BAM format. Samtools found here: '$samtools_path'\n";
+	$bam = 1;
+}
+}
+unless (defined $samtools_path){
+$bam = 2;
+warn "Did not find Samtools on the system. Alignments will be compressed with GZIP instead (.sam.gz)\n";
+}
+sleep (1);
+}
+####################################
+### PROCESSING ARGUMENTS
+### GENOME FOLDER
+my $genome_folder = shift @ARGV; # mandatory
+unless ($genome_folder){
+warn "Genome folder was not specified!\n";
+print_helpfile();
+exit;
+}
+### checking that the genome folder, all subfolders and the required bowtie index files exist
+unless ($genome_folder =~/\/$/){
+$genome_folder =~ s/$/\//;
+}
+if (chdir $genome_folder){
+my $absolute_genome_folder = getcwd; ## making the genome folder path absolute
+unless ($absolute_genome_folder =~/\/$/){
+$absolute_genome_folder =~ s/$/\//;
+}
+warn "Reference genome folder provided is $genome_folder\t(absolute path is '$absolute_genome_folder)'\n";
+$genome_folder = $absolute_genome_folder;
+}
+else{
+die "Failed to move to $genome_folder: $!\nUSAGE: bismark [options] <genome_folder> {-1 <mates1> -2 <mates2> | <singles>} [<hits>]    (--help for more details)\n";
+}
+my $CT_dir = "${genome_folder}Bisulfite_Genome/CT_conversion/";
+my $GA_dir = "${genome_folder}Bisulfite_Genome/GA_conversion/";
+if ($bowtie2){ ### Bowtie 2 (new)
+### checking the integrity of $CT_dir
+chdir $CT_dir or die "Failed to move to directory $CT_dir: $!\n";
+my @CT_bowtie_index = ('BS_CT.1.bt2','BS_CT.2.bt2','BS_CT.3.bt2','BS_CT.4.bt2','BS_CT.rev.1.bt2','BS_CT.rev.2.bt2');
+foreach my $file(@CT_bowtie_index){
+unless (-f $file){
+	die "The Bowtie 2 index of the C->T converted genome seems to be faulty ($file). Please run the bismark_genome_preparation before running Bismark.\n";
+}
+}
+### checking the integrity of $GA_dir
+chdir $GA_dir or die "Failed to move to directory $GA_dir: $!\n";
+my @GA_bowtie_index = ('BS_GA.1.bt2','BS_GA.2.bt2','BS_GA.3.bt2','BS_GA.4.bt2','BS_GA.rev.1.bt2','BS_GA.rev.2.bt2');
+foreach my $file(@GA_bowtie_index){
+unless (-f $file){
+	die "The Bowtie 2 index of the G->A converted genome seems to be faulty ($file). Please run bismark_genome_preparation before running Bismark.\n";
+}
+}
+}
+else{ ### Bowtie 1 (default)
+### checking the integrity of $CT_dir
+chdir $CT_dir or die "Failed to move to directory $CT_dir: $!\n";
+my @CT_bowtie_index = ('BS_CT.1.ebwt','BS_CT.2.ebwt','BS_CT.3.ebwt','BS_CT.4.ebwt','BS_CT.rev.1.ebwt','BS_CT.rev.2.ebwt');
+foreach my $file(@CT_bowtie_index){
+unless (-f $file){
+	die "The Bowtie index of the C->T converted genome seems to be faulty ($file). Please run bismark_genome_preparation before running Bismark.\n";
+}
+}
+### checking the integrity of $GA_dir
+chdir $GA_dir or die "Failed to move to directory $GA_dir: $!\n";
+my @GA_bowtie_index = ('BS_GA.1.ebwt','BS_GA.2.ebwt','BS_GA.3.ebwt','BS_GA.4.ebwt','BS_GA.rev.1.ebwt','BS_GA.rev.2.ebwt');
+foreach my $file(@GA_bowtie_index){
+unless (-f $file){
+	die "The Bowtie index of the G->A converted genome seems to be faulty ($file). Please run bismark_genome_preparation before running Bismark.\n";
+}
+}
+}
+my $CT_index_basename = "${CT_dir}BS_CT";
+my $GA_index_basename = "${GA_dir}BS_GA";
+### INPUT OPTIONS
+### SEQUENCE FILE FORMAT
+### exits if both fastA and FastQ were specified
+if ($fasta and $fastq){
+die "Only one sequence filetype can be specified (fastA or fastQ)\n";
+}
+### unless fastA is specified explicitely, fastQ sequence format is expected by default
+if ($fasta){
+print "FastA format specified\n";
+$sequence_format = 'FASTA';
+push @bowtie_options, '-f';
+}
+elsif ($fastq){
+print "FastQ format specified\n";
+$sequence_format = 'FASTQ';
+push @bowtie_options, '-q';
+}
+else{
+$fastq = 1;
+print "FastQ format assumed (by default)\n";
+$sequence_format = 'FASTQ';
+push @bowtie_options, '-q';
+}
+### SKIP
+if ($skip){
+warn "Skipping the first $skip reads from the input file\n";
+# push @bowtie_options,"-s $skip";
+}
+### UPTO
+if ($qupto){
+warn "Processing sequences up to read no. $qupto from the input file\n";
+if ($bowtie2){
+#      push @bowtie_options,"--upto $qupto"; ## slightly changed for Bowtie 2
+}
+else{
+#     push @bowtie_options,"--qupto $qupto";
+}
+}
+### QUALITY VALUES
+if (($phred33 and $phred64) or ($phred33 and $solexa) or ($phred64 and $solexa)){
+die "You can only specify one type of quality value at a time! (--phred33-quals or --phred64-quals or --solexa-quals)";
+}
+if ($phred33){ ## if nothing else is specified $phred33 will be used as default by both Bowtie 1 and 2.
+# Phred quality values work only when -q is specified
+unless ($fastq){
+die "Phred quality values works only when -q (FASTQ) is specified\n";
+}
+if ($bowtie2){
+push @bowtie_options,"--phred33";
+}
+else{
+push @bowtie_options,"--phred33-quals";
+}
+}
+if ($phred64){
+# Phred quality values work only when -q is specified
+unless ($fastq){
+die "Phred quality values work only when -q (FASTQ) is specified\n";
+}
+if ($bowtie2){
+push @bowtie_options,"--phred64";
+}
+else{
+push @bowtie_options,"--phred64-quals";
+}
+}
+else{
+$phred64 = 0;
+}
+if ($solexa){
+if ($bowtie2){
+die "The option '--solexa-quals' is not compatible with Bowtie 2. Please respecify!\n";
+}
+# Solexa to Phred value conversion works only when -q is specified
+unless ($fastq){
+die "Conversion from Solexa to Phred quality values works only when -q (FASTQ) is specified\n";
+}
+push @bowtie_options,"--solexa-quals";
+}
+else{
+$solexa = 0;
+}
+### ALIGNMENT OPTIONS
+### MISMATCHES
+if (defined $mismatches){
+if ($bowtie2){
+if ($mismatches == 0 or $mismatches == 1){
+	push @bowtie_options,"-N $mismatches";
+}
+else{
+	die "Please set the number of multiseed mismatches for Bowtie 2 with '-N <int>' (where <int> can be 0 or 1)\n";
+}
+}
+else{
+if ($mismatches >= 0 and $mismatches <= 3){
+	push @bowtie_options,"-n $mismatches";
+}
+else{
+	die "Please set the number of seed mismatches for Bowtie 1 with '-n <int>' (where <int> can be 0,1,2 or 3)\n";
+}
+}
+}
+else{
+unless ($bowtie2){
+push @bowtie_options,"-n 1"; # setting -n to 1 by default (for use with Bowtie only) because it is much quicker than the default mode of -n 2
+}
+}
+### SEED LENGTH
+if (defined $seed_length){
+if ($bowtie2){
+push @bowtie_options,"-L $seed_length";
+}
+else{
+push @bowtie_options,"-l $seed_length";
+}
+}
+### MISMATCH CEILING
+if (defined $ceiling){
+die "The option '-e' is not compatible with Bowtie 2. Please respecify options\n" if ($bowtie2);
+push @bowtie_options,"-e $ceiling";
+}
+### BOWTIE 2 EFFORT OPTIONS
+### CONSECUTIVE SEED EXTENSION FAILS
+if (defined $seed_extension_fails){
+die "The option '-D <int>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
+push @bowtie_options,"-D $seed_extension_fails";
+}
+### RE-SEEDING REPETITIVE SEEDS
+if (defined $reseed_repetitive_seeds){
+die "The option '-R <int>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
+push @bowtie_options,"-R $reseed_repetitive_seeds";
+}
+### BOWTIE 2 SCORING OPTIONS
+if ($score_min){
+die "The option '--score_min <func>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
+unless ($score_min =~ /^L,.+,.+$/){
+die "The option '--score_min <func>' needs to be in the format <L,value,value> . Please consult \"setting up functions\" in the Bowtie 2 manual for further information\n\n";
+}
+push @bowtie_options,"--score-min $score_min";
+}
+else{
+if ($bowtie2){
+push @bowtie_options,"--score-min L,0,-0.2"; # default setting, more stringent than normal Bowtie2
+}
+}
+### BOWTIE 2 READ GAP OPTIONS
+my ($insertion_open,$insertion_extend,$deletion_open,$deletion_extend);
+if ($rdg){
+die "The option '--rdg <int1>,<int2>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
+if ($rdg =~ /^(\d+),(\d+)$/){
+$deletion_open = $1;
+$deletion_extend = $2;
+}
+else{
+die "The option '--rdg <int1>,<int2>' needs to be in the format <integer,integer> . Please consult \"setting up functions\" in the Bowtie 2 manual for further information\n\n";
+}
+push @bowtie_options,"--rdg $rdg";
+}
+else{
+$deletion_open = 5;
+$deletion_extend = 3;
+}
+### BOWTIE 2 REFERENCE GAP OPTIONS
+if ($rfg){
+die "The option '--rfg <int1>,<int2>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
+if ($rfg =~ /^(\d+),(\d+)$/){
+$insertion_open = $1;
+$insertion_extend = $2;
+}
+else{
+die "The option '--rfg <int1>,<int2>' needs to be in the format <integer,integer> . Please consult \"setting up functions\" in the Bowtie 2 manual for further information\n\n";
+}
+push @bowtie_options,"--rfg $rfg";
+}
+else{
+$insertion_open = 5;
+$insertion_extend = 3;
+}
+### BOWTIE 2 PARALLELIZATION OPTIONS
+if (defined $parallel){
+die "The parallelization switch '-p' only works for Bowtie 2. Please respecify!" unless ($bowtie2);
+}
+if ($bowtie2){
+if ($parallel){
+die "Please select a value for -p of 2 or more!\n" unless ($parallel > 1);
+push @bowtie_options,"-p $parallel";
+push @bowtie_options,'--reorder'; ## re-orders the bowtie 2 output so that it does match the input files. This is abolutely required for parallelization to work.
+print "Each Bowtie 2 instance is going to be run with $parallel threads. Please monitor performance closely and tune down if needed!\n";
+sleep (2);
+}
+}
+### REPORTING OPTIONS
+if ($bowtie2){
+push @bowtie_options,'--ignore-quals'; ## All mismatches will receive penalty for mismatches as if they were of high quality, which is 6 by default
+### Option -M is deprecated since Bowtie 2 version 2.0.0 beta7. I'll leave this option commented out for a while
+if(defined $most_valid_alignments){
+warn "\nThe option -M is now deprecated (as of Bowtie 2 version 2.0.0 beta7). What used to be called -M mode is still the default mode. Use the -D and -R options to adjust the effort expended to find valid alignments.\n\n";
+#      push @bowtie_options,"-M $most_valid_alignments";sleep (5);
+}
+#  else{
+#    push @bowtie_options,'-M 10';    # the default behavior for Bowtie 2 is to report (and sort) up to 500 alignments for a given sequence
+#  }
+}
+else{ # Because of the way Bismark works we will always use the reporting option -k 2 (report up to 2 valid alignments) for Bowtie 1
+push @bowtie_options,'-k 2';
+}
+### --BEST
+if ($bowtie2){
+if ($best){    # Bowtie 2 does away with the concept of --best, so one can also not select --no-best when Bowtie 2 is to be used
+die "The option '--no-best' is not compatible with Bowtie 2. Please respecify options\n";
+}
+}
+else{
+# --best is the default option for Bowtie 1, specifying --no-best can turn it off (e.g. to speed up alignment process)
+unless ($best){
+push @bowtie_options,'--best';
+}
+}
+### VANILLA BISMARK (BOWTIE 1) OUTPUT
+if ($vanilla){
+if ($bowtie2){
+die "The options --bowtie2 and the --vanilla are not compatible. Please respecify!\n\n";
+}
+}
+else{
+$vanilla = 0;
+}
+### PAIRED-END MAPPING
+if ($mates1){
+my @mates1 = (split (/,/,$mates1));
+die "Paired-end mapping requires the format: -1 <mates1> -2 <mates2>, please respecify!\n" unless ($mates2);
+my @mates2 = (split(/,/,$mates2));
+unless (scalar @mates1 == scalar @mates2){
+die "Paired-end mapping requires the same amounnt of mate1 and mate2 files, please respecify! (format: -1 <mates1> -2 <mates2>)\n";
+}
+while (1){
+my $mate1 = shift @mates1;
+my $mate2 = shift @mates2;
+last unless ($mate1 and $mate2);
+push @filenames,"$mate1,$mate2";
+}
+if ($bowtie2){
+push @bowtie_options,'--no-mixed';     ## By default Bowtie 2 is not looking for single-end alignments if it can't find concordant or discordant alignments
+push @bowtie_options,'--no-discordant';## By default Bowtie 2 is not looking for discordant alignments if it can't find concordant ones
+}
+}
+elsif ($mates2){
+die "Paired-end mapping requires the format: -1 <mates1> -2 <mates2>, please respecify!\n";
+}
+### SINGLE-END MAPPING
+# Single-end mapping will be performed if no mate pairs for paired-end mapping have been specified
+my $singles;
+unless ($mates1 and $mates2){
+$singles = join (',',@ARGV);
+unless ($singles){
+die "\nNo filename supplied! Please specify one or more files for single-end Bismark mapping!\n";
+}
+$singles =~ s/\s/,/g;
+@filenames = (split(/,/,$singles));
+warn "\nFiles to be analysed:\n";
+warn "@filenames\n\n";
+sleep (3);
+}
+### MININUM INSERT SIZE (PAIRED-END ONLY)
+if (defined $minins){
+die "-I/--minins can only be used for paired-end mapping!\n\n" if ($singles);
+push @bowtie_options,"--minins $minins";
+}
+### MAXIMUM INSERT SIZE (PAIRED-END ONLY)
+if (defined $maxins){
+die "-X/--maxins can only be used for paired-end mapping!\n\n" if ($singles);
+push @bowtie_options,"--maxins $maxins";
+}
+else{
+unless ($singles){
+push @bowtie_options,'--maxins 500';
+}
+}
+### QUIET prints nothing  besides alignments (suppresses warnings)
+if ($quiet){
+push @bowtie_options,'--quiet';
+}
+### CHUNKMBS needed to be increased to avoid memory exhaustion warnings for Bowtie 1, particularly for --best (and paired-end) alignments
+unless ($bowtie2){ # Bowtie 2 does not have a chunkmbs option
+if (defined $chunk){
+push @bowtie_options,"--chunkmbs $chunk";
+}
+else{
+push @bowtie_options,'--chunkmbs 512'; ## setting the default to 512MB (up from 64 default)
+}
+}
+### SUMMARY OF ALL BOWTIE OPTIONS
+my $bowtie_options = join (' ',@bowtie_options);
+### STRAND-SPECIFIC LIBRARIES
+my $directional;
+if ($non_directional){
+die "A library can only be specified to be either non-directional or a PBAT-Seq library. Please respecify!\n\n" if ($pbat);
+warn "Library was specified to be not strand-specific (non-directional), therefore alignments to all four possible bisulfite strands (OT, CTOT, OB and CTOB) will be reported\n";
+sleep (3);
+$directional = 0;
+}
+elsif($pbat){
+die "The option --pbat is currently not compatible with --gzip. Please run alignments with uncompressed temporary files, i.e. lose the option --gzip\n" if ($gzip);
+die "The option --pbat is currently not working for Bowtie 2. Please run alignments in default (i.e. Bowtie 1) mode!\n" if ($bowtie2);
+die "The option --pbat is currently only working with FastQ files. Please respecify (i.e. lose the option -f)!\n" if ($fasta);
+warn "Library was specified as PBAT-Seq (Post-Bisulfite Adapter Tagging), only performing alignments to the complementary strands (CTOT and CTOB)\n";
+sleep (3);
+$directional = 0;
+}
+else{
+warn "Library is assumed to be strand-specific (directional), alignments to strands complementary to the original top or bottom strands will be ignored (i.e. not performed!)\n";
+sleep (3);
+$directional = 1; # default behaviour
+}
+### UNMAPPED SEQUENCE OUTPUT
+$unmapped = 0 unless ($unmapped);
+### AMBIGUOUS ALIGNMENT SEQUENCE OUTPUT
+$multi_map = 0 unless ($multi_map);
+### OUTPUT DIRECTORY
+chdir $parent_dir or die "Failed to move back to current working directory\n";
+if ($output_dir){
+unless ($output_dir =~ /\/$/){
+$output_dir =~ s/$/\//;
+}
+if (chdir $output_dir){
+$output_dir = getcwd; #  making the path absolute
+unless ($output_dir =~ /\/$/){
+	$output_dir =~ s/$/\//;
+}
+}
+else{
+mkdir $output_dir or die "Unable to create directory $output_dir $!\n";
+warn "Created output directory $output_dir!\n\n";
+chdir $output_dir or die "Failed to move to $output_dir\n";
+$output_dir = getcwd; #  making the path absolute
+unless ($output_dir =~ /\/$/){
+	$output_dir =~ s/$/\//;
+}
+}
+warn "Output will be written into the directory: $output_dir\n";
+}
+else{
+$output_dir = '';
+}
+### TEMPORARY DIRECTORY for C->T and G->A transcribed files
+chdir $parent_dir or die "Failed to move back to current working directory\n";
+if ($temp_dir){
+warn "\nUsing temp directory: $temp_dir\n";
+unless ($temp_dir =~ /\/$/){
+$temp_dir =~ s/$/\//;
+}
+if (chdir $temp_dir){
+$temp_dir = getcwd; #  making the path absolute
+unless ($temp_dir =~ /\/$/){
+	$temp_dir =~ s/$/\//;
+}
+}
+else{
+mkdir $temp_dir or die "Unable to create directory $temp_dir $!\n";
+warn "Created temporary directory $temp_dir!\n\n";
+chdir $temp_dir or die "Failed to move to $temp_dir\n";
+$temp_dir = getcwd; #  making the path absolute
+unless ($temp_dir =~ /\/$/){
+	$temp_dir =~ s/$/\//;
+}
+}
+warn "Temporary files will be written into the directory: $temp_dir\n";
+}
+else{
+$temp_dir = '';
+}
+### OPTIONAL NON-BS MISMATCH OUTPUT AS EXTRA COLUMN IN SAM FILE
+if ($non_bs_mm){
+if ($vanilla){
+die "Option '--non_bs_mm' may only be specified for output in SAM format. Please respecify!\n";
+}
+}
+return ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_format,$bowtie_options,$directional,$unmapped,$multi_map,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$qupto,$temp_dir,$non_bs_mm,$insertion_open,$insertion_extend,$deletion_open,$deletion_extend,$gzip,$bam,$samtools_path,$pbat);
+}
+sub generate_SAM_header{
+print OUT "\@HD\tVN:1.0\tSO:unsorted\n";          # @HD = header, VN = version, SO = sort order
+foreach my $chr (keys %chromosomes){
+my $length = length ($chromosomes{$chr});
+print OUT "\@SQ\tSN:$chr\tLN:$length\n";        # @SQ = sequence, SN = seq name, LN = length
+}
+print OUT "\@PG\tID:Bismark\tVN:$bismark_version\tCL:\"bismark $command_line\"\n";        # @PG = program, ID = unique identifier, PN = program name name, VN = program version
+}
+### I would like to thank the following individuals for their valuable contributions to the Bismark SAM output format:
+### O. Tam (Sep 2010), C. Whelan (2011), E. Vidal (2011), T. McBryan (2011), P. Hickey (2011)
+sub single_end_SAM_output{
+my ($id,$actual_seq,$methylation_call_params,$qual) = @_;
+my $strand            = $methylation_call_params->{$id}->{alignment_strand};
+my $chr               = $methylation_call_params->{$id}->{chromosome};
+my $start             = $methylation_call_params->{$id}->{position};
+my $stop              = $methylation_call_params->{$id}->{end_position};
+my $ref_seq           = $methylation_call_params->{$id}->{unmodified_genomic_sequence};
+my $methcall          = $methylation_call_params->{$id}->{methylation_call};
+my $read_conversion   = $methylation_call_params->{$id}->{read_conversion};
+my $genome_conversion = $methylation_call_params->{$id}->{genome_conversion};
+my $number_of_mismatches;
+if ($bowtie2){
+$number_of_mismatches= $methylation_call_params->{$id}->{alignment_score};
+}
+else{
+$number_of_mismatches= $methylation_call_params->{$id}->{number_of_mismatches};
+}
+### This is a description of the bitwise FLAG field which needs to be set for the SAM file taken from: "The SAM Format Specification (v1.4-r985), September 7, 2011"
+## FLAG: bitwise FLAG. Each bit is explained in the following table:
+## Bit    Description                                                Comment                                Value
+## 0x1    template has multiple segments in sequencing               0: single-end 1: paired end            value: 2**0 (  1)
+## 0x2    each segment properly aligned according to the aligner     true only for paired-end alignments    value: 2**1 (  2)
+## 0x4    segment unmapped                                           ---                                           ---
+## 0x8    next segment in the template unmapped                      ---                                           ---
+## 0x10   SEQ being reverse complemented                                                                    value: 2**4 ( 16)
+## 0x20   SEQ of the next segment in the template being reversed                                            value: 2**5 ( 32)
+## 0x40   the first segment in the template                          read 1                                 value: 2**6 ( 64)
+## 0x80   the last segment in the template                           read 2                                 value: 2**7 (128)
+## 0x100  secondary alignment                                        ---                                           ---
+## 0x200  not passing quality controls                               ---                                           ---
+## 0x400  PCR or optical duplicate                                   ---                                           ---
+#####
+my $flag;                                                           # FLAG variable used for SAM format.
+if ($strand eq "+"){
+if ($read_conversion eq 'CT' and $genome_conversion eq 'CT'){
+$flag = 0;                                                      # 0 for "+" strand (OT)
+}
+elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA'){
+$flag = 16;                                                     # 16 for "-" strand (CTOB, yields information for the original bottom strand)
+}
+else{
+die "Unexpected strand and read/genome conversion: strand: $strand, read conversion: $read_conversion, genome_conversion: $genome_conversion\n\n";
+}
+}
+elsif ($strand eq "-"){
+if ($read_conversion eq 'CT' and $genome_conversion eq 'GA'){
+$flag = 16;                                                     # 16 for "-" strand (OB)
+}
+elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT'){
+$flag = 0;                                                      # 0 for "+" strand (CTOT, yields information for the original top strand)
+}
+else{
+die "Unexpected strand and read/genome conversion: strand: $strand, read conversion: $read_conversion, genome_conversion: $genome_conversion\n\n";
+}
+}
+else{
+die "Unexpected strand information: $strand\n\n";
+}
+#####
+my $mapq = 255;                                                     # Assume mapping quality is unavailable
+#####
+my $cigar;
+if ($bowtie2){
+$cigar = $methylation_call_params->{$id}->{CIGAR};                # Actual CIGAR string reported by Bowtie 2
+}
+else{
+$cigar = length($actual_seq) . "M";                               # Bowtie 1 output does not contain indels (only matches and mismatches)
+}
+#####
+my $rnext = "*";                                                    # Paired-end variable
+#####
+my $pnext = 0;                                                      # Paired-end variable
+#####
+my $tlen = 0;                                                       # Paired-end variable
+#####
+if ($read_conversion eq 'CT'){
+$ref_seq = substr($ref_seq, 0, length($ref_seq) - 2);    # Removes additional nucleotides from the 3' end. This only works for the original top or bottom strands
+}
+else{
+$ref_seq = substr($ref_seq, 2, length($ref_seq) - 2);    # Removes additional nucleotides from the 5' end. This works for the complementary strands in non-directional libraries
+}
+if ($strand eq '-'){
+$actual_seq = revcomp($actual_seq);                               # Sequence represented on the forward genomic strand
+$ref_seq = revcomp($ref_seq);                                     # Required for comparison with actual sequence
+$qual = reverse $qual;                                            # if the sequence was reverse-complemented the quality string needs to be reversed as well
+}
+#####
+my $hemming_dist = hemming_dist($actual_seq,$ref_seq);              # Edit distance to the reference, i.e. minimal number of one-nucleotide edits needed to transform the read string
+# into the reference string. hemming_dist()
+if ($bowtie2){
+$hemming_dist += $methylation_call_params->{$id}->{indels};       # Adding the number of inserted/deleted bases which we parsed while getting the genomic sequence
+}
+my $NM_tag = "NM:i:$hemming_dist";                                  # Optional tag NM: edit distance based on nucleotide differences
+#####
+my $XX_tag = make_mismatch_string($actual_seq, $ref_seq);           # Optional tag XX: string providing mismatched reference bases in the alignment (NO indel information!)
+#####
+my $XM_tag;                                                         # Optional tag XM: Methylation Call String
+if ($strand eq '+'){
+$XM_tag = "XM:Z:$methcall";
+}
+elsif ($strand eq '-'){
+$XM_tag = 'XM:Z:'.reverse $methcall;                              # if the sequence was reverse-complemented the methylation call string needs to be reversed as well
+}
+#####
+my $XR_tag = "XR:Z:$read_conversion";                               # Optional tag XR: Read Conversion
+#####
+my $XG_tag = "XG:Z:$genome_conversion";                             # Optional tag XG: Genome Conversion
+#####
+# Optionally calculating number of mismatches for Bowtie 2 alignments
+if ($non_bs_mm) {
+if ($bowtie2) {
+$number_of_mismatches =~ s/-//; # removing the minus sign
+	### if Bowtie 2 was used we need to analyse the CIGAR string whether the read contained any indels to determine the number of mismatches
+	if ($cigar =~ /(D|I)/) {
+	  # warn "$cigar\n";
+	  # parsing CIGAR string
+	  my @len = split (/\D+/,$cigar); # storing the length per operation
+	  my @ops = split (/\d+/,$cigar); # storing the operation
+	  shift @ops;		# remove the empty first element
+	  die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+	  foreach (0..$#len) {
+	    if ($ops[$_] eq 'M') {
+	      # warn "skipping\n";
+	      next;		# irrelevant
+	    }
+	    elsif ($ops[$_] eq 'I') {	# insertion in the read sequence
+	      $number_of_mismatches -= $insertion_open;
+	      $number_of_mismatches -= $len[$_] * $insertion_extend;
+	      # warn "Insertion: Subtracting $ops[$_], length $len[$_], open: $insertion_open, extend: $insertion_extend\n";
+	    }
+	    elsif ($ops[$_] eq 'D') {	# deletion in the read sequence
+	      $number_of_mismatches -= $deletion_open;
+	      $number_of_mismatches -= $len[$_] * $deletion_extend;
+	      # warn "Deletion: Subtracting $ops[$_], length $len[$_], open: $deletion_open, extend: $deletion_extend\n";
+	    }
+	    elsif ($cigar =~ tr/[NSHPX=]//) {	# if these (for standard mapping) illegal characters exist we die
+	      die "The CIGAR string contained illegal CIGAR operations in addition to 'M', 'I' and 'D': $cigar\n";
+	    }
+	    else {
+	      die "The CIGAR string contained undefined CIGAR operations in addition to 'M', 'I' and 'D': $cigar\n";
+	    }
+	  }
+	  # warn "Alignment score $number_of_mismatches\n";
+	  # print "Mismatches $number_of_mismatches\n\n";
+	}
+### Now we have InDel corrected alignment scores
+### if the actual sequence contained Ns we need to adjust the number of mismatches. Ns receive a penalty of -1, but normal mismatches receive -6. This might still break if the
+### sequence contained more than 5 Ns, but this should occur close to never
+my $seq_N_count = $number_of_mismatches % 6; # modulo 6 will return the integer rest after the division
+# warn "N count: $seq_N_count\n";
+$number_of_mismatches = int ($number_of_mismatches / 6) + $seq_N_count;
+# warn "MM    $number_of_mismatches\n";
+}
+}
+####
+my $XA_tag = "XA:Z:$number_of_mismatches";
+#####
+# SAM format: QNAME, FLAG, RNAME, 1-based POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL, optional fields
+### optionally print number of non-bisulfite mismatches
+if ($non_bs_mm){
+print OUT join("\t",($id,$flag,$chr,$start,$mapq,$cigar,$rnext,$pnext,$tlen,$actual_seq,$qual,$NM_tag,$XX_tag,$XM_tag,$XR_tag,$XG_tag,$XA_tag)),"\n";
+}
+else{ # default
+# SAM format: QNAME, FLAG, RNAME, 1-based POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL, optional fields
+print OUT join("\t",($id,$flag,$chr,$start,$mapq,$cigar,$rnext,$pnext,$tlen,$actual_seq,$qual,$NM_tag,$XX_tag,$XM_tag,$XR_tag,$XG_tag)),"\n";
+}
+}
+sub paired_end_SAM_output{
+my ($id,$actual_seq_1,$actual_seq_2,$methylation_call_params,$qual_1,$qual_2) = @_;
+my $strand_1                = $methylation_call_params->{$id}->{alignment_read_1}; # Bowtie 1 only reports the read 1 alignment strand
+my $strand_2                = $methylation_call_params->{$id}->{alignment_read_2};
+my $chr                     = $methylation_call_params->{$id}->{chromosome};
+my $ref_seq_1               = $methylation_call_params->{$id}->{unmodified_genomic_sequence_1};
+my $ref_seq_2               = $methylation_call_params->{$id}->{unmodified_genomic_sequence_2};
+my $methcall_1              = $methylation_call_params->{$id}->{methylation_call_1};
+my $methcall_2              = $methylation_call_params->{$id}->{methylation_call_2};
+my $read_conversion_1       = $methylation_call_params->{$id}->{read_conversion_1};
+my $read_conversion_2       = $methylation_call_params->{$id}->{read_conversion_2};
+my $genome_conversion       = $methylation_call_params->{$id}->{genome_conversion};
+my $id_1 = $id.'/1';
+my $id_2 = $id.'/2';
+# Allows all degenerate nucleotide sequences in reference genome
+die "Reference sequence ($ref_seq_1) contains invalid nucleotides!\n" if $ref_seq_1 =~ /[^ACTGNRYMKSWBDHV]/i;
+die "Reference sequence ($ref_seq_2) contains invalid nucleotides!\n" if $ref_seq_2 =~ /[^ACTGNRYMKSWBDHV]/i;
+my $index; # used to store the srand origin of the alignment in a less convoluted way
+if ($read_conversion_1 eq 'CT' and $genome_conversion eq 'CT'){
+$index = 0; ## this is OT   (original top strand)
+}
+elsif ($read_conversion_1 eq 'GA' and $genome_conversion eq 'GA'){
+$index = 1; ## this is CTOB (complementary to OB)
+}
+elsif ($read_conversion_1 eq 'GA' and $genome_conversion eq 'CT'){
+$index = 2; ## this is CTOT (complementary to OT)
+}
+elsif ($read_conversion_1 eq 'CT' and $genome_conversion eq 'GA'){
+$index = 3; ## this is OB   (original bottom)
+}
+else {
+die "Unexpected combination of read 1 and genome conversion: $read_conversion_1 / $genome_conversion\n";
+}
+my $number_of_mismatches_1;
+my $number_of_mismatches_2;
+if ($bowtie2){ # Bowtie 2 reports always as read 1 then read 2, so this is fine
+$number_of_mismatches_1  = $methylation_call_params->{$id}->{alignment_score_1}; # only needed for custom allele-specific output, not the default!
+$number_of_mismatches_2  = $methylation_call_params->{$id}->{alignment_score_2};
+}
+else{ # Bowtie 1 reports always the leftmost read first. That means we have to reverse the strings if the first read aligned in reverse orientation
+if ($index == 2 or $index == 3){ # CTOT or OB
+$number_of_mismatches_1  = $methylation_call_params->{$id}->{number_of_mismatches_2}; # only needed for custom allele-specific output, not the default!
+$number_of_mismatches_2  = $methylation_call_params->{$id}->{number_of_mismatches_1};
+}
+else{ # if the first read aligned in forward direction it is like for Bowtie 2
+$number_of_mismatches_1  = $methylation_call_params->{$id}->{number_of_mismatches_1}; # only needed for custom allele-specific output, not the default!
+$number_of_mismatches_2  = $methylation_call_params->{$id}->{number_of_mismatches_2};
+}
+}
+### we need to remove 2 bp of the genomic sequence as we were extracting read + 2bp long fragments to make a methylation call at the
+### first or last position.
+if ($index == 0 or $index == 3){ # OT or OB
+$ref_seq_1 = substr($ref_seq_1,0,length($ref_seq_1)-2);
+$ref_seq_2 = substr($ref_seq_2,2,length($ref_seq_2)-2);
+}
+else{ # CTOT or CTOB
+$ref_seq_1 = substr($ref_seq_1,2,length($ref_seq_1)-2);
+$ref_seq_2 = substr($ref_seq_2,0,length($ref_seq_2)-2);
+}
+#####
+my $start_read_1;
+my $start_read_2;
+# adjusting end positions
+if ($bowtie2){
+$start_read_1 = $methylation_call_params->{$id}->{position_1};
+$start_read_2 = $methylation_call_params->{$id}->{position_2};
+}
+else{ # Bowtie 1 output. $strand_1 stores the alignment of Read 1
+if ($strand_1 eq '+'){ # Read 1 aligns to the + strand
+$start_read_1 = $methylation_call_params->{$id}->{start_seq_1};
+$start_read_2 = $methylation_call_params->{$id}->{alignment_end} - length ($actual_seq_2) + 1;
+}
+else{ # read 1 is on the - strand
+$start_read_1 = $methylation_call_params->{$id}->{alignment_end} - length ($actual_seq_1) + 1;
+$start_read_2 = $methylation_call_params->{$id}->{start_seq_1};
+}
+}
+#####
+my $end_read_1;
+my $end_read_2;
+# adjusting end positions
+if ($bowtie2){
+$end_read_1 = $methylation_call_params->{$id}->{end_position_1};
+$end_read_2 = $methylation_call_params->{$id}->{end_position_2};
+}
+else{ # Bowtie 1 output. $strand_1 stores the alignment of Read 1
+if ($strand_1 eq '+'){ # Read 1 aligns to the + strand
+$end_read_1 = $methylation_call_params->{$id}->{start_seq_1} + length ($actual_seq_1)-1;
+$end_read_2 = $methylation_call_params->{$id}->{alignment_end};
+}
+else{
+$end_read_1 = $methylation_call_params->{$id}->{alignment_end};
+$end_read_2 = $methylation_call_params->{$id}->{start_seq_1} + length ($actual_seq_2)-1;
+}
+}
+#####
+### This is a description of the bitwise FLAG field which needs to be set for the SAM file taken from: "The SAM Format Specification (v1.4-r985), September 7, 2011"
+## FLAG: bitwise FLAG. Each bit is explained in the following table:
+## Bit    Description                                                Comment                                Value
+## 0x1    template having multiple segments in sequencing            0: single-end 1: paired end            value: 2^^0 (  1)
+## 0x2    each segment properly aligned according to the aligner     true only for paired-end alignments    value: 2^^1 (  2)
+## 0x4    segment unmapped                                           ---                                           ---
+## 0x8    next segment in the template unmapped                      ---                                           ---
+## 0x10   SEQ being reverse complemented                             - strand alignment                     value: 2^^4 ( 16)
+## 0x20   SEQ of the next segment in the template being reversed     + strand alignment                     value: 2^^5 ( 32)
+## 0x40   the first segment in the template                          read 1                                 value: 2^^6 ( 64)
+## 0x80   the last segment in the template                           read 2                                 value: 2^^7 (128)
+## 0x100  secondary alignment                                        ---                                           ---
+## 0x200  not passing quality controls                               ---                                           ---
+## 0x400  PCR or optical duplicate                                   ---                                           ---
+### As the FLAG value do not consider that there might be 4 different bisulfite strands of DNA, we are trying to make FLAG tags which take the strand identity into account
+# strands OT and CTOT will be treated as aligning to the top strand (both sequences are scored as aligning to the top strand)
+# strands OB and CTOB will be treated as aligning to the bottom strand (both sequences are scored as reverse complemented sequences)
+my $flag_1;                                                          # FLAG variable used for SAM format
+my $flag_2;
+if ($index == 0){       # OT
+$flag_1 = 67;                                                      # Read 1 is on the + strand  (1+2+64) (Read 2 is technically reverse-complemented, but we do not score it)
+$flag_2 = 131;                                                     # Read 2 is on - strand but informative for the OT        (1+2+128)
+}
+elsif ($index == 1){    # CTOB
+$flag_1 = 115;                                                     # Read 1 is on the + strand, we score for OB  (1+2+16+32+64)
+$flag_2 = 179;                                                     # Read 2 is on the - strand  (1+2+16+32+128)
+}
+elsif ($index == 2){    # CTOT
+$flag_1 = 67;                                                      # Read 1 is on the - strand (CTOT) strand, but we score it for OT (1+2+64)
+$flag_2 = 131;                                                     # Read 2 is on the + strand, score it for OT (1+2+128)
+}
+elsif ($index == 3){    # OB
+$flag_1 = 115;                                                     # Read 1 is on the - strand, we score for OB  (1+2+16+32+64)
+$flag_2 = 179;                                                     # Read 2 is on the + strand  (1+2+16+32+128)
+}
+#####
+my $mapq = 255;                                                      # Mapping quality is unavailable
+#####
+my $cigar_1;
+my $cigar_2;
+if ($bowtie2){
+$cigar_1 = $methylation_call_params->{$id}->{CIGAR_1};             # Actual CIGAR string reported by Bowtie 2
+$cigar_2 = $methylation_call_params->{$id}->{CIGAR_2};
+}
+else{
+$cigar_1 = length($actual_seq_1) . "M";                            # Assume no indels for Bowtie 1  mapping (only matches and mismatches)
+$cigar_2 = length($actual_seq_2) . "M";
+}
+#####
+my $rnext = '=';                                                     # Chromosome of mate; applies to both reads
+#####
+my $pnext_1 = $start_read_2;                                         # Leftmost position of mate
+my $pnext_2 = $start_read_1;
+#####
+my $tlen_1;                                                          # signed observed Template LENgth (or inferred fragment size)
+my $tlen_2;
+if ($bowtie2){
+if ($start_read_1 <= $start_read_2){
+# Read 1 alignment is leftmost
+if ($end_read_2 >= $end_read_1){
+	# ------------------------->     read 1   reads overlapping
+	#  <-------------------------    read 2
+	#
+	# or
+	#
+	# ------------------------->     read 1
+	#   <-----------------------     read 2   read 2 contained within read 1
+	#
+	# or
+	#
+	# ------------------------->     read 1   reads 1 and 2 exactly overlapping
+	# <-------------------------     read 2
+	#
+	# dovetailing of reads is not enabled for Bowtie 2 alignments
+	$tlen_1 = $end_read_2 - $start_read_1 + 1;                         # Leftmost read has a + sign,
+	$tlen_2 = $start_read_1 - $end_read_2 - 1;                         # Rightmost read has a - sign
+}
+elsif ($end_read_2 < $end_read_1){
+	# ------------------------->     read 1
+	#       <-----------             read 2   read 2 contained within read 1
+	#
+	# or
+	#
+	# ------------------------->     read 1
+	# <-----------                   read 2   read 2 contained within read 1
+	# start and end of read 2  are fully contained within read 1
+	$tlen_1 = 0;                                                       # Set as 0 when the information is unavailable
+	$tlen_2 = 0;                                                       # Set as 0 when the information is unavailable
+}
+}
+elsif ($start_read_2 < $start_read_1){
+if ($end_read_1 >= $end_read_2){
+# Read 2 alignment is leftmost
+	# ------------------------->     read 2   reads overlapping
+	#  <-------------------------    read 1
+	#
+	# or
+	#
+	# ------------------------->     read 2
+	#   <-----------------------     read 1   read 1 contained within read 2
+	#
+	#
+	$tlen_2 = $end_read_1 - $start_read_2 + 1;                         # Leftmost read has a + sign,
+	$tlen_1 = $start_read_2 - $end_read_1 - 1;                         # Rightmost read has a - sign
+}
+elsif ($end_read_1 < $end_read_2){
+	# ------------------------->     read 2
+	#       <-----------             read 1   read 1 contained within read 2
+	#
+	# or
+	#
+	# ------------------------->     read 2
+	# <-----------                   read 1   read 1 contained within read 2
+	# start and end of read 1  are fully contained within read 2
+	$tlen_1 = 0;                                                       # Set as 0 when the information is unavailable
+	$tlen_2 = 0;                                                       # Set as 0 when the information is unavailable
+}
+}
+}
+else{ # Bowtie 1
+if ($end_read_2 >= $end_read_1){
+# Read 1 alignment is leftmost
+# ------------------------->  read 1
+#  <------------------------- read 2
+# this is the most extreme case for Bowtie 1 alignments, reads do not contain each other, also no dovetailing
+$tlen_1 = $end_read_2 - $start_read_1 + 1;                         # Leftmost read has a + sign,
+$tlen_2 = $start_read_1 - $end_read_2 - 1;                         # Rightmost read has a - sign
+}
+else{
+# Read 2 alignment is leftmost
+# ------------------------->  read 2
+#  <------------------------- read 1
+# this is the most extreme case for Bowtie 1 alignments, reads do not contain each other, also no dovetailing
+$tlen_2 = $end_read_1 - $start_read_2 + 1;                         # Leftmost read has a + sign,
+$tlen_1 = $start_read_2 - $end_read_1 - 1;                         # Rightmost read has a - sign
+}
+}
+#####
+# adjusting the strand of the sequence before we use them to generate mismatch strings
+if ($strand_1 eq '-'){
+$actual_seq_1 = revcomp($actual_seq_1);                            # Sequence represented on the forward genomic strand
+$ref_seq_1 = revcomp($ref_seq_1);                                  # Required for comparison with actual sequence
+$qual_1 = reverse $qual_1;                                         # we need to reverse the quality string as well
+}
+if ($strand_2 eq '-'){
+$actual_seq_2 = revcomp($actual_seq_2);                            # Mate sequence represented on the forward genomic strand
+$ref_seq_2 = revcomp($ref_seq_2);                                  # Required for comparison with actual sequence
+$qual_2 = reverse $qual_2;                                         # If the sequence gets reverse complemented we reverse the quality string as well
+}
+#  print "$actual_seq_1\n$ref_seq_1\n\n";
+#  print "$actual_seq_2\n$ref_seq_2\n\n";
+#####
+my $hemming_dist_1 = hemming_dist($actual_seq_1,$ref_seq_1);         # Minimal number of one-nucleotide edits needed to transform the read string into the reference sequence
+my $hemming_dist_2 = hemming_dist($actual_seq_2,$ref_seq_2);
+if ($bowtie2){
+$hemming_dist_1 += $methylation_call_params->{$id}->{indels_1};    # Adding the number of inserted/deleted bases which we parsed while getting the genomic sequence
+$hemming_dist_2 += $methylation_call_params->{$id}->{indels_2};    # Adding the number of inserted/deleted bases which we parsed while getting the genomic sequence
+}
+my $NM_tag_1 = "NM:i:$hemming_dist_1";                               # Optional tag NM: edit distance based on nucleotide differences
+my $NM_tag_2 = "NM:i:$hemming_dist_2";                               # Optional tag NM: edit distance based on nucleotide differences
+#####
+my $XX_tag_1 = make_mismatch_string($actual_seq_1,$ref_seq_1);       # Optional tag XX: String providing mismatched reference bases in the alignment (NO indel information!)
+my $XX_tag_2 = make_mismatch_string($actual_seq_2,$ref_seq_2);
+#####
+my $XM_tag_1;                                                        # Optional tag XM: Methylation call string
+my $XM_tag_2;
+if ($strand_1 eq '-'){
+$XM_tag_1 = 'XM:Z:'.reverse $methcall_1;                           # Needs to be reversed if the sequence was reverse complemented
+}
+else{
+$XM_tag_1 = "XM:Z:$methcall_1";
+}
+if ($strand_2 eq '-'){
+$XM_tag_2 = 'XM:Z:'.reverse $methcall_2;                           # Needs to be reversed if the sequence was reverse complemented
+}
+else{
+$XM_tag_2 = "XM:Z:$methcall_2";
+}
+#####
+my $XR_tag_1 = "XR:Z:$read_conversion_1";                            # Optional tag XR: Read 1 conversion state
+my $XR_tag_2 = "XR:Z:$read_conversion_2";                            # Optional tag XR: Read 2 conversion state
+#####
+my $XG_tag = "XG:Z:$genome_conversion";                              # Optional tag XG: Genome Conversion state; valid for both reads
+#####
+# Optionally calculating number of mismatches for Bowtie 2 alignments
+if ($non_bs_mm) {
+if ($bowtie2) {
+$number_of_mismatches_1 =~ s/-//; # removing the minus sign
+$number_of_mismatches_2 =~ s/-//;
+### if Bowtie 2 was used we need to analyse the CIGAR strings whether the reads contained any indels to determine the number of mismatches
+### CIGAR 1
+if ($cigar_1 =~ /(D|I)/) {
+	# warn "$cigar_1\n";
+	# parsing CIGAR string
+	my @len = split (/\D+/,$cigar_1); # storing the length per operation
+	my @ops = split (/\d+/,$cigar_1); # storing the operation
+	shift @ops;		# remove the empty first element
+	die "CIGAR string '$cigar_1' contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+	foreach (0..$#len) {
+	  if ($ops[$_] eq 'M') {
+	    # warn "skipping\n";
+	    next;		# irrelevant
+	  }
+	  elsif ($ops[$_] eq 'I') {	# insertion in the read sequence
+	    $number_of_mismatches_1 -= $insertion_open;
+	    $number_of_mismatches_1 -= $len[$_] * $insertion_extend;
+	    # warn "Insertion: Subtracting $ops[$_], length $len[$_], open: $insertion_open, extend: $insertion_extend\n";
+	  }
+	  elsif ($ops[$_] eq 'D') {	# deletion in the read sequence
+	    $number_of_mismatches_1 -= $deletion_open;
+	    $number_of_mismatches_1 -= $len[$_] * $deletion_extend;
+	    # warn "Deletion: Subtracting $ops[$_], length $len[$_], open: $deletion_open, extend: $deletion_extend\n";
+	  }
+	  elsif ($cigar_1 =~ tr/[NSHPX=]//) {	# if these (for standard mapping) illegal characters exist we die
+	    die "The CIGAR string contained illegal CIGAR operations in addition to 'M', 'I' and 'D': $cigar_1\n";
+	  }
+	  else {
+	    die "The CIGAR string contained undefined CIGAR operations in addition to 'M', 'I' and 'D': $cigar_1\n";
+	  }
+	}
+	# warn "Alignment score $number_of_mismatches_1\n";
+	# print "Mismatches $number_of_mismatches_1\n\n";
+}
+### CIGAR 2
+if ($cigar_2 =~ /(D|I)/) {
+	# warn "$cigar_2\n";
+	# parsing CIGAR string
+	my @len = split (/\D+/,$cigar_2); # storing the length per operation
+	my @ops = split (/\d+/,$cigar_2); # storing the operation
+	shift @ops;		# remove the empty first element
+	die "CIGAR string '$cigar_2' contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+	foreach (0..$#len) {
+	  if ($ops[$_] eq 'M') {
+	    # warn "skipping\n";
+	    next; #irrelevant
+	  }
+	  elsif ($ops[$_] eq 'I') {	# insertion in the read sequence
+	    $number_of_mismatches_2 -= $insertion_open;
+	    $number_of_mismatches_2 -= $len[$_] * $insertion_extend;
+	    # warn "Insertion: Subtracting $ops[$_], length $len[$_], open: $insertion_open, extend: $insertion_extend\n";
+	  }
+	  elsif ($ops[$_] eq 'D') {	# deletion in the read sequence
+	    $number_of_mismatches_2 -= $deletion_open;
+	    $number_of_mismatches_2 -= $len[$_] * $deletion_extend;
+	    # warn "Deletion: Subtracting $ops[$_], length $len[$_], open: $deletion_open, extend: $deletion_extend\n";
+	  }
+	  elsif ($cigar_2 =~ tr/[NSHPX=]//) {	# if these (for standard mapping) illegal characters exist we die
+	    die "The CIGAR string contained illegal CIGAR operations in addition to 'M', 'I' and 'D': $cigar_2\n";
+	  }
+	  else {
+	    die "The CIGAR string contained undefined CIGAR operations in addition to 'M', 'I' and 'D': $cigar_2\n";
+	  }
+	}
+}
+### Now we have InDel corrected Alignment scores
+### if the actual sequence contained Ns we need to adjust the number of mismatches. Ns receive a penalty of -1, but normal mismatches receive -6. This might still break if the
+### sequence contained more than 5 Ns, but this should occur close to never
+my $seq_1_N_count = $number_of_mismatches_1 % 6; # modulo 6 will return the integer rest after the division
+my $seq_2_N_count = $number_of_mismatches_2 % 6;
+#   warn "N count 1: $seq_1_N_count\n";
+#   warn "N count 2: $seq_2_N_count\n";
+$number_of_mismatches_1 = int ($number_of_mismatches_1 / 6) + $seq_1_N_count;
+$number_of_mismatches_2 = int ($number_of_mismatches_2 / 6) + $seq_2_N_count;
+# warn "MM1    $number_of_mismatches_1 \n";
+# warn "MM2    $number_of_mismatches_2 \n";
+}
+}
+####
+my $XA_tag = "XA:Z:$number_of_mismatches_1";
+my $XB_tag = "XB:Z:$number_of_mismatches_2";
+# SAM format: QNAME, FLAG, RNAME, 1-based POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL, optional fields
+### optionally print number of non-bisulfite mismatches
+if ($non_bs_mm){
+print OUT join("\t", ($id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $XX_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag,$XA_tag)), "\n";
+print OUT join("\t", ($id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $XX_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag,$XB_tag)), "\n";
+}
+else{ # default
+print OUT join("\t", ($id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $XX_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag)), "\n";
+print OUT join("\t", ($id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $XX_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag)), "\n";
+}
+}
+sub revcomp{
+my $seq = shift or die "Missing seq to reverse complement\n";
+$seq = reverse $seq;
+$seq =~ tr/ACTGactg/TGACTGAC/;
+return $seq;
+}
+sub hemming_dist{
+my $matches = 0;
+my @actual_seq = split //,(shift @_);
+my @ref_seq = split //,(shift @_);
+foreach (0..$#actual_seq){
+++$matches if ($actual_seq[$_] eq $ref_seq[$_]);
+}
+return my $hd = scalar @actual_seq - $matches;
+}
+sub make_mismatch_string{
+my $actual_seq = shift or die "Missing actual sequence";
+my $ref_seq = shift or die "Missing reference sequence";
+my $XX_tag = "XX:Z:";
+my $tmp = ($actual_seq ^ $ref_seq);                    # Bitwise comparison
+my $prev_mm_pos = 0;
+while($tmp =~ /[^\0]/g){                               # Where bitwise comparison showed a difference
+my $nuc_match = pos($tmp) - $prev_mm_pos - 1;        # Generate number of nucleotide that matches since last mismatch
+my $nuc_mm = substr($ref_seq, pos($tmp) - 1, 1) if pos($tmp) <= length($ref_seq);  # Obtain reference nucleotide that was different from the actual read
+$XX_tag .= "$nuc_match" if $nuc_match > 0;           # Ignore if mismatches are adjacent to each other
+$XX_tag .= "$nuc_mm" if defined $nuc_mm;             # Ignore if there is no mismatch (prevents uninitialized string concatenation)
+$prev_mm_pos = pos($tmp);                            # Position of last mismatch
+}
+my $end_matches = length($ref_seq) - $prev_mm_pos;     # Provides number of matches from last mismatch till end of sequence
+$XX_tag .= "$end_matches" if $end_matches > 0;         # Ignore if mismatch is at the end of sequence
+return $XX_tag;
+}
+sub print_helpfile{
+print << "HOW_TO";
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+DESCRIPTION
+The following is a brief description of command line options and arguments to control the Bismark
+bisulfite mapper and methylation caller. Bismark takes in FastA or FastQ files and aligns the
+reads to a specified bisulfite genome. Sequence reads are transformed into a bisulfite converted forward strand
+version (C->T conversion) or into a bisulfite treated reverse strand (G->A conversion of the forward strand).
+Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome
+(C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the
+forward strand, by doing this alignments will produce the same positions). These 4 instances of Bowtie (1 or 2)
+are run in parallel. The sequence file(s) are then read in again sequence by sequence to pull out the original
+sequence from the genome and determine if there were any protected C's present or not.
+As of version 0.7.0 Bismark will only run 2 alignment threads for OT and OB in parallel, the 4 strand mode can be
+re-enabled by using --non_directional.
+The final output of Bismark is in SAM format by default. For Bowtie 1 one can alos choose to report the old
+'vanilla' output format, which is a single tab delimited file with all sequences that have a unique best
+alignment to any of the 4 possible strands of a bisulfite PCR product. Both formats are described in more detail below.
+USAGE: bismark [options] <genome_folder> {-1 <mates1> -2 <mates2> | <singles>}
+ARGUMENTS:
+<genome_folder>          The path to the folder containing the unmodified reference genome
+as well as the subfolders created by the Bismark_Genome_Preparation
+script (/Bisulfite_Genome/CT_conversion/ and /Bisulfite_Genome/GA_conversion/).
+Bismark expects one or more fastA files in this folder (file extension: .fa
+or .fasta). The path can be relative or absolute.
+-1 <mates1>              Comma-separated list of files containing the #1 mates (filename usually includes
+"_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
+correspond file-for-file and read-for-read with those specified in <mates2>.
+Reads may be a mix of different lengths. Bismark will produce one mapping result
+and one report file per paired-end input file pair.
+-2 <mates2>              Comma-separated list of files containing the #2 mates (filename usually includes
+"_2"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
+correspond file-for-file and read-for-read with those specified in <mates1>.
+Reads may be a mix of different lengths.
+<singles>                A comma- or space-separated list of files containing the reads to be aligned (e.g.
+lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will
+produce one mapping result and one report file per input file.
+OPTIONS:
+Input:
+-q/--fastq               The query input files (specified as <mate1>,<mate2> or <singles> are FASTQ
+files (usually having extension .fg or .fastq). This is the default. See also
+--solexa-quals.
+-f/--fasta               The query input files (specified as <mate1>,<mate2> or <singles> are FASTA
+files (usually havin extension .fa, .mfa, .fna or similar). All quality values
+are assumed to be 40 on the Phred scale. FASTA files are expected to contain both
+the read name and the sequence on a single line (and not spread over several lines).
+-s/--skip <int>          Skip (i.e. do not align) the first <int> reads or read pairs from the input.
+-u/--upto <int>          Only aligns the first <int> reads or read pairs from the input. Default: no limit.
+--phred33-quals          FASTQ qualities are ASCII chars equal to the Phred quality plus 33. Default: on.
+--phred64-quals          FASTQ qualities are ASCII chars equal to the Phred quality plus 64. Default: off.
+--solexa-quals           Convert FASTQ qualities from solexa-scaled (which can be negative) to phred-scaled
+(which can't). The formula for conversion is:
+phred-qual = 10 * log(1 + 10 ** (solexa-qual/10.0)) / log(10). Used with -q. This
+is usually the right option for use with (unconverted) reads emitted by the GA
+Pipeline versions prior to 1.3. Works only for Bowtie 1. Default: off.
+--solexa1.3-quals        Same as --phred64-quals. This is usually the right option for use with (unconverted)
+reads emitted by GA Pipeline version 1.3 or later. Default: off.
+--path_to_bowtie         The full path </../../> to the Bowtie (1 or 2) installation on your system. If not
+specified it is assumed that Bowtie (1 or 2) is in the PATH.
+Alignment:
+-n/--seedmms <int>       The maximum number of mismatches permitted in the "seed", i.e. the first L base pairs
+of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the
+default is 1. This option is only available for Bowtie 1 (for Bowtie 2 see -N).
+-l/--seedlen             The "seed length"; i.e., the number of bases of the high quality end of the read to
+which the -n ceiling applies. The default is 28. Bowtie (and thus Bismark) is faster for
+larger values of -l. This option is only available for Bowtie 1 (for Bowtie 2 see -L).
+-e/--maqerr <int>        Maximum permitted total of quality values at all mismatched read positions throughout
+the entire alignment, not just in the "seed". The default is 70. Like Maq, bowtie rounds
+quality values to the nearest 10 and saturates at 30. This value is not relevant for
+Bowtie 2.
+--chunkmbs <int>         The number of megabytes of memory a given thread is given to store path descriptors in
+--best mode. Best-first search must keep track of many paths at once to ensure it is
+always extending the path with the lowest cumulative cost. Bowtie tries to minimize the
+memory impact of the descriptors, but they can still grow very large in some cases. If
+you receive an error message saying that chunk memory has been exhausted in --best mode,
+try adjusting this parameter up to dedicate more memory to the descriptors. This value
+is not relevant for Bowtie 2. Default: 512.
+-I/--minins <int>        The minimum insert size for valid paired-end alignments. E.g. if -I 60 is specified and
+a paired-end alignment consists of two 20-bp alignments in the appropriate orientation
+with a 20-bp gap between them, that alignment is considered valid (as long as -X is also
+satisfied). A 19-bp gap would not be valid in that case. Default: 0.
+-X/--maxins <int>        The maximum insert size for valid paired-end alignments. E.g. if -X 100 is specified and
+a paired-end alignment consists of two 20-bp alignments in the proper orientation with a
+60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied).
+A 61-bp gap would not be valid in that case. Default: 500.
+Bowtie 1 Reporting:
+-k <2>                   Due to the way Bismark works Bowtie will report up to 2 valid alignments. This option
+will be used by default.
+--best                   Make Bowtie guarantee that reported singleton alignments are "best" in terms of stratum
+(i.e. number of mismatches, or mismatches in the seed in the case if -n mode) and in
+terms of the quality; e.g. a 1-mismatch alignment where the mismatch position has Phred
+quality 40 is preferred over a 2-mismatch alignment where the mismatched positions both
+have Phred quality 10. When --best is not specified, Bowtie may report alignments that
+are sub-optimal in terms of stratum and/or quality (though an effort is made to report
+the best alignment). --best mode also removes all strand bias. Note that --best does not
+affect which alignments are considered "valid" by Bowtie, only which valid alignments
+are reported by Bowtie. Bowtie is about 1-2.5 times slower when --best is specified.
+Default: on.
+--no_best                Disables the --best option which is on by default. This can speed up the alignment process,
+e.g. for testing purposes, but for credible results it is not recommended to disable --best.
+Output:
+--non_directional        The sequencing library was constructed in a non strand-specific manner, alignments to all four
+bisulfite strands will be reported. Default: OFF.
+(The current Illumina protocol for BS-Seq is directional, in which case the strands complementary
+to the original strands are merely theoretical and should not exist in reality. Specifying directional
+alignments (which is the default) will only run 2 alignment threads to the original top (OT)
+or bottom (OB) strands in parallel and report these alignments. This is the recommended option
+for sprand-specific libraries).
+--pbat                   This options may be used for PBAT-Seq libraries (Post-Bisulfite Adapter Tagging; Kobayashi et al.,
+PLoS Genetics, 2012). This is essentially the exact opposite of alignments in 'directional' mode,
+as it will only launch two alignment threads to the CTOT and CTOB strands instead of the normal OT
+and OB ones. Use this option only if you are certain that your libraries were constructed following
+a PBAT protocol (if you don't know what PBAT-Seq is you should not specify this option). The option
+--pbat works only for single-end and paired-end FastQ files for use with Bowtie1 (uncompressed
+temporary files only).
+--sam-no-hd              Suppress SAM header lines (starting with @). This might be useful when very large input files are
+split up into several smaller files to run concurrently and the output files are to be merged.
+--quiet                  Print nothing besides alignments.
+--vanilla                Performs bisulfite mapping with Bowtie 1 and prints the 'old' output (as in Bismark 0.5.X) instead
+of SAM format output.
+-un/--unmapped           Write all reads that could not be aligned to a file in the output directory. Written reads will
+appear as they did in the input, without any translation of quality values that may have
+taken place within Bowtie or Bismark. Paired-end reads will be written to two parallel files with _1
+and _2 inserted in their filenames, i.e. _unmapped_reads_1.txt and unmapped_reads_2.txt. Reads
+with more than one valid alignment with the same number of lowest mismatches (ambiguous mapping)
+are also written to _unmapped_reads.txt unless the option --ambiguous is specified as well.
+--ambiguous              Write all reads which produce more than one valid alignment with the same number of lowest
+mismatches or other reads that fail to align uniquely to a file in the output directory.
+Written reads will appear as they did in the input, without any of the translation of quality
+values that may have taken place within Bowtie or Bismark. Paired-end reads will be written to two
+parallel files with _1 and _2 inserted in theit filenames, i.e. _ambiguous_reads_1.txt and
+_ambiguous_reads_2.txt. These reads are not written to the file specified with --un.
+-o/--output_dir <dir>    Write all output files into this directory. By default the output files will be written into
+the same folder as the input file(s). If the specified folder does not exist, Bismark will attempt
+to create it first. The path to the output folder can be either relative or absolute.
+--temp_dir <dir>         Write temporary files to this directory instead of into the same directory as the input files. If
+the specified folder does not exist, Bismark will attempt to create it first. The path to the
+temporary folder can be either relative or absolute.
+--non_bs_mm              Optionally outputs an extra column specifying the number of non-bisulfite mismatches a read during the
+alignment step. This option is only available for SAM format. In Bowtie 2 context, this value is
+just the number of actual non-bisulfite mismatches and ignores potential insertions or deletions.
+The format for single-end reads and read 1 of paired-end reads is 'XA:Z:number of mismatches'
+and 'XB:Z:number of mismatches' for read 2 of paired-end reads.
+--gzip                   Temporary bisulfite conversion files will be written out in a GZIP compressed form to save disk
+space. This option is available for most alignment modes but is not available for paired-end FastA
+files. This option might be somewhat slower than writing out uncompressed files, but this awaits
+further testing.
+--bam                    The output will be written out in BAM format instead of the default SAM format. Bismark will
+attempt to use the path to Samtools that was specified with '--samtools_path', or, if it hasn't
+been specified, attempt to find Samtools in the PATH. If no installation of Samtools can be found,
+the SAM output will be compressed with GZIP instead (yielding a .sam.gz output file).
+--samtools_path          The path to your Samtools installation, e.g. /home/user/samtools/. Does not need to be specified
+explicitly if Samtools is in the PATH already.
+Other:
+-h/--help                Displays this help file.
+-v/--version             Displays version information.
+BOWTIE 2 SPECIFIC OPTIONS
+--bowtie2                Uses Bowtie 2 instead of Bowtie 1. Bismark limits Bowtie 2 to only perform end-to-end
+alignments, i.e. searches for alignments involving all read characters (also called
+untrimmed or unclipped alignments). Bismark assumes that raw sequence data is adapter
+and/or quality trimmed where appropriate. Default: off.
+Bowtie 2 alignment options:
+-N <int>                 Sets the number of mismatches to allowed in a seed alignment during multiseed alignment.
+Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower)
+but increases sensitivity. Default: 0. This option is only available for Bowtie 2 (for
+Bowtie 1 see -n).
+-L <int>                 Sets the length of the seed substrings to align during multiseed alignment. Smaller values
+make alignment slower but more senstive. Default: the --sensitive preset of Bowtie 2 is
+used by default, which sets -L to 20. This option is only available for Bowtie 2 (for
+Bowtie 1 see -l).
+--ignore-quals           When calculating a mismatch penalty, always consider the quality value at the mismatched
+position to be the highest possible, regardless of the actual value. I.e. input is treated
+as though all quality values are high. This is also the default behavior when the input
+doesn't specify quality values (e.g. in -f mode). This option is invariable and on by default.
+Bowtie 2 paired-end options:
+--no-mixed               This option disables Bowtie 2's behavior to try to find alignments for the individual mates if
+it cannot find a concordant or discordant alignment for a pair. This option is invariable and
+and on by default.
+--no-discordant          Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments.
+A discordant alignment is an alignment where both mates align uniquely, but that does not
+satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior
+and it is on by default.
+Bowtie 2 effort options:
+-D <int>                 Up to <int> consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using
+the alignments found so far. A seed extension "fails" if it does not yield a new best or a
+new second-best alignment. Default: 15.
+-R <int>                 <int> is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds.
+When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of
+mismatches allowed) at different offsets and searches for more alignments. A read is considered
+to have repetitive seeds if the total number of seed hits divided by the number of seeds
+that aligned at least once is greater than 300. Default: 2.
+Bowtie 2 parallelization options:
+-p NTHREADS              Launch NTHREADS parallel search threads (default: 1). Threads will run on separate processors/cores
+and synchronize when parsing reads and outputting alignments. Searching for alignments is highly
+parallel, and speedup is close to linear. Increasing -p increases Bowtie 2's memory footprint.
+E.g. when aligning to a human genome index, increasing -p from 1 to 8 increases the memory footprint
+by a few hundred megabytes. This option is only available if bowtie is linked with the pthreads
+library (i.e. if BOWTIE_PTHREADS=0 is not specified at build time). In addition, this option will
+automatically use the option '--reorder', which guarantees that output SAM records are printed in
+an order corresponding to the order of the reads in the original input file, even when -p is set
+greater than 1 (Bismark requires the Bowtie 2 output to be this way). Specifying --reorder and
+setting -p greater than 1 causes Bowtie 2 to run somewhat slower and use somewhat more memory then
+if --reorder were not specified. Has no effect if -p is set to 1, since output order will naturally
+correspond to input order in that case.
+Bowtie 2 Scoring options:
+--score_min <func>       Sets a function governing the minimum alignment score needed for an alignment to be considered
+"valid" (i.e. good enough to report). This is a function of read length. For instance, specifying
+L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length.
+See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is
+L,0,-0.2.
+--rdg <int1>,<int2>      Sets the read gap open (<int1>) and extend (<int2>) penalties. A read gap of length N gets a penalty
+of <int1> + N * <int2>. Default: 5, 3.
+--rfg <int1>,<int2>      Sets the reference gap open (<int1>) and extend (<int2>) penalties. A reference gap of length N gets
+a penalty of <int1> + N * <int2>. Default: 5, 3.
+Bowtie 2 Reporting options:
+-most_valid_alignments <int> This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is
+deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the
+default mode, but adjusting the -M setting is deprecated.  Use the -D and -R options to adjust the
+effort expended to find valid alignments.
+For reference, this used to be the old (now deprecated) description of -M:
+Bowtie 2 searches for at most <int>+1 distinct, valid alignments for each read. The search terminates when it
+can't find more distinct valid alignments, or when it finds <int>+1 distinct alignments, whichever
+happens first. Only the best alignment is reported. Information from the other alignments is used to
+estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes
+Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that
+aligns many places. For reads that have more than <int>+1 distinct, valid alignments, Bowtie 2 does not
+guarantee that the alignment reported is the best possible in terms of alignment score. -M is
+always used and its default value is set to 10.
+'VANILLA' Bismark  OUTPUT:
+Single-end output format (tab-separated):
+(1) <seq-ID>
+(2) <read alignment strand>
+(3) <chromosome>
+(4) <start position>
+(5) <end position>
+(6) <observed bisulfite sequence>
+(7) <equivalent genomic sequence>
+(8) <methylation call>
+(9) <read conversion
+(10) <genome conversion>
+(11) <read quality score (Phred33)>
+Paired-end output format (tab-separated):
+(1) <seq-ID>
+(2) <read 1 alignment strand>
+(3) <chromosome>
+(4) <start position>
+(5) <end position>
+(6) <observed bisulfite sequence 1>
+(7) <equivalent genomic sequence 1>
+(8) <methylation call 1>
+(9) <observed bisulfite sequence 2>
+(10) <equivalent genomic sequence 2>
+(11) <methylation call 2>
+(12) <read 1 conversion
+(13) <genome conversion>
+(14) <read 1 quality score (Phred33)>
+(15) <read 2 quality score (Phred33)>
+Bismark SAM OUTPUT (default):
+(1) QNAME  (seq-ID)
+(2) FLAG   (this flag tries to take the strand a bisulfite read originated from into account (this is different from ordinary DNA alignment flags!))
+(3) RNAME  (chromosome)
+(4) POS    (start position)
+(5) MAPQ   (always 255)
+(6) CIGAR
+(7) RNEXT
+(8) PNEXT
+(9) TLEN
+(10) SEQ
+(11) QUAL   (Phred33 scale)
+(12) NM-tag (edit distance to the reference)
+(13) XX-tag (base-by-base mismatches to the reference. This does not include indels)
+(14) XM-tag (methylation call string)
+(15) XR-tag (read conversion state for the alignment)
+(16) XG-tag (genome conversion state for the alignment)
+(17) XA/XB-tag (non-bisulfite mismatches) (optional!)
+Each read of paired-end alignments is written out in a separate line in the above format.
+Last edited on 10 May 2013.
+HOW_TO
+}

Mercurial > repos > bgruening > bismark

comparison bismark @ 0:62c6da72dd4a draft