annotate methylation_analysis_bismark/methylation_analysis/methylation_extractor @ 11:6479112a673b draft

Uploaded
author fcaramia
date Wed, 12 Dec 2012 19:46:06 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1 #!/usr/bin/perl
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2 use warnings;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
3 use strict;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
4 $|++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
5 use Getopt::Long;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
6 use Cwd;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
7
6479112a673b Uploaded
fcaramia
parents:
diff changeset
8 my @filenames;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
9 my %counting;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
10 my $parent_dir = getcwd();
6479112a673b Uploaded
fcaramia
parents:
diff changeset
11
6479112a673b Uploaded
fcaramia
parents:
diff changeset
12 my %fhs;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
13
6479112a673b Uploaded
fcaramia
parents:
diff changeset
14 my $version = 'v0.7.6';
6479112a673b Uploaded
fcaramia
parents:
diff changeset
15 my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla) = process_commandline();
6479112a673b Uploaded
fcaramia
parents:
diff changeset
16
6479112a673b Uploaded
fcaramia
parents:
diff changeset
17 process_Bismark_results_file();
6479112a673b Uploaded
fcaramia
parents:
diff changeset
18
6479112a673b Uploaded
fcaramia
parents:
diff changeset
19 sub process_commandline{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
20 my $help;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
21 my $single_end;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
22 my $paired_end;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
23 my $ignore;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
24 my $genomic_fasta;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
25 my $full;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
26 my $report;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
27 my $extractor_version;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
28 my $no_overlap;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
29 my $merge_non_CpG;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
30 my $vanilla;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
31
6479112a673b Uploaded
fcaramia
parents:
diff changeset
32 my $command_line = GetOptions ('help|man' => \$help,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
33 'p|paired-end' => \$paired_end,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
34 's|single-end' => \$single_end,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
35 'fasta' => \$genomic_fasta,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
36 'ignore=i' => \$ignore,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
37 'comprehensive' => \$full,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
38 'report' => \$report,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
39 'version' => \$extractor_version,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
40 'no_overlap' => \$no_overlap,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
41 'merge_non_CpG' => \$merge_non_CpG,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
42 'vanilla' => \$vanilla,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
43 );
6479112a673b Uploaded
fcaramia
parents:
diff changeset
44
6479112a673b Uploaded
fcaramia
parents:
diff changeset
45 ### EXIT ON ERROR if there were errors with any of the supplied options
6479112a673b Uploaded
fcaramia
parents:
diff changeset
46 unless ($command_line){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
47 die "Please respecify command line options\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
48 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
49
6479112a673b Uploaded
fcaramia
parents:
diff changeset
50 ### HELPFILE
6479112a673b Uploaded
fcaramia
parents:
diff changeset
51 if ($help){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
52 print_helpfile();
6479112a673b Uploaded
fcaramia
parents:
diff changeset
53 exit;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
54 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
55
6479112a673b Uploaded
fcaramia
parents:
diff changeset
56 if ($extractor_version){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
57 print << "VERSION";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
58
6479112a673b Uploaded
fcaramia
parents:
diff changeset
59
6479112a673b Uploaded
fcaramia
parents:
diff changeset
60 Bismark Methylation Extractor
6479112a673b Uploaded
fcaramia
parents:
diff changeset
61
6479112a673b Uploaded
fcaramia
parents:
diff changeset
62 Bismark Extractor Version: $version Copyright 2010-12 Felix Krueger, Babraham Bioinformatics
6479112a673b Uploaded
fcaramia
parents:
diff changeset
63 www.bioinformatics.babraham.ac.uk/projects/bismark/
6479112a673b Uploaded
fcaramia
parents:
diff changeset
64
6479112a673b Uploaded
fcaramia
parents:
diff changeset
65
6479112a673b Uploaded
fcaramia
parents:
diff changeset
66 VERSION
6479112a673b Uploaded
fcaramia
parents:
diff changeset
67 exit;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
68 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
69
6479112a673b Uploaded
fcaramia
parents:
diff changeset
70
6479112a673b Uploaded
fcaramia
parents:
diff changeset
71 ### no files provided
6479112a673b Uploaded
fcaramia
parents:
diff changeset
72 unless (@ARGV){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
73 die "You need to provide one or more Bismark files to create an individual C methylation output. Please respecify!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
74 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
75 @filenames = @ARGV;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
76
6479112a673b Uploaded
fcaramia
parents:
diff changeset
77 warn "\n *** Bismark methylation extractor version $version ***\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
78
6479112a673b Uploaded
fcaramia
parents:
diff changeset
79 ### IGNORING <INT> bases at the start of the read when processing the methylation call string
6479112a673b Uploaded
fcaramia
parents:
diff changeset
80 unless ($ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
81 $ignore = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
82 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
83 ### PRINT A REPORT
6479112a673b Uploaded
fcaramia
parents:
diff changeset
84 unless ($report){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
85 $report = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
86 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
87
6479112a673b Uploaded
fcaramia
parents:
diff changeset
88 ### OLD (VANILLA) OUTPUT FORMAT
6479112a673b Uploaded
fcaramia
parents:
diff changeset
89 unless ($vanilla){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
90 $vanilla = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
91 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
92
6479112a673b Uploaded
fcaramia
parents:
diff changeset
93 if ($single_end){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
94 $paired_end = 0; ### SINGLE END ALIGNMENTS
6479112a673b Uploaded
fcaramia
parents:
diff changeset
95 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
96 elsif ($paired_end){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
97 $single_end = 0; ### PAIRED-END ALIGNMENTS
6479112a673b Uploaded
fcaramia
parents:
diff changeset
98 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
99 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
100 die "Please specify whether the supplied file(s) are in Bismark single-end or paired-end format\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
101 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
102
6479112a673b Uploaded
fcaramia
parents:
diff changeset
103 ### NO OVERLAP
6479112a673b Uploaded
fcaramia
parents:
diff changeset
104 if ($no_overlap){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
105 die "The option '--no_overlap' can only be specified for paired-end input!\n" unless ($paired_end);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
106 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
107 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
108 $no_overlap = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
109 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
110
6479112a673b Uploaded
fcaramia
parents:
diff changeset
111 ### COMPREHENSIVE OUTPUT
6479112a673b Uploaded
fcaramia
parents:
diff changeset
112 unless ($full){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
113 $full = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
114 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
115
6479112a673b Uploaded
fcaramia
parents:
diff changeset
116 ### MERGE NON-CpG context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
117 unless ($merge_non_CpG){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
118 $merge_non_CpG = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
119 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
120
6479112a673b Uploaded
fcaramia
parents:
diff changeset
121 return ($ignore,$genomic_fasta,$single_end,$paired_end,$full,$report,$no_overlap,$merge_non_CpG,$vanilla);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
122 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
123
6479112a673b Uploaded
fcaramia
parents:
diff changeset
124
6479112a673b Uploaded
fcaramia
parents:
diff changeset
125 sub process_Bismark_results_file{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
126
6479112a673b Uploaded
fcaramia
parents:
diff changeset
127 if ($single){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
128 if ($vanilla){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
129 warn "Bismark Single-end vanilla format specified\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
130 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
131 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
132 warn "Bismark Single-end SAM format specified (default)\n"; # default
6479112a673b Uploaded
fcaramia
parents:
diff changeset
133 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
134 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
135 elsif ($paired){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
136 if ($vanilla){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
137 warn "Bismark Paired-end vanilla format specified\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
138 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
139 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
140 warn "Bismark Paired-end SAM format specified (default)\n"; # default
6479112a673b Uploaded
fcaramia
parents:
diff changeset
141 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
142 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
143
6479112a673b Uploaded
fcaramia
parents:
diff changeset
144 if ($ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
145 warn "First $ignore bases will be disregarded when processing the methylation call string\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
146 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
147
6479112a673b Uploaded
fcaramia
parents:
diff changeset
148 if ($full){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
149 warn "Strand-specific outputs will be skipped. Separate output files for cytosines in CpG, CHG and CHH context will be generated\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
150 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
151 if ($merge_non_CpG){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
152 warn "Merge CHG and CHH context to non-CpG context specified\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
153 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
154 warn "\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
155
6479112a673b Uploaded
fcaramia
parents:
diff changeset
156 sleep (3);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
157
6479112a673b Uploaded
fcaramia
parents:
diff changeset
158 foreach my $filename (@filenames){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
159 %fhs = ();
6479112a673b Uploaded
fcaramia
parents:
diff changeset
160 %counting =(
6479112a673b Uploaded
fcaramia
parents:
diff changeset
161 total_meCHG_count => 0,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
162 total_meCHH_count => 0,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
163 total_meCpG_count => 0,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
164 total_unmethylated_CHG_count => 0,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
165 total_unmethylated_CHH_count => 0,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
166 total_unmethylated_CpG_count => 0,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
167 sequences_count => 0,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
168 );
6479112a673b Uploaded
fcaramia
parents:
diff changeset
169 warn "\nNow reading in Bismark result file $filename\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
170
6479112a673b Uploaded
fcaramia
parents:
diff changeset
171 if ($filename =~ /\.gz$/){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
172 open (IN,"zcat $filename |") or die "Can't open gzipped file $filename: $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
173 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
174 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
175 open (IN,$filename) or die "Can't open file $filename: $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
176 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
177
6479112a673b Uploaded
fcaramia
parents:
diff changeset
178 ### Vanilla and SAM output need to read different numbers of header lines
6479112a673b Uploaded
fcaramia
parents:
diff changeset
179 if ($vanilla){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
180 my $bismark_version = <IN>; ## discarding the Bismark version info
6479112a673b Uploaded
fcaramia
parents:
diff changeset
181 chomp $bismark_version;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
182 $bismark_version =~ s/\r//; # replaces \r line feed
6479112a673b Uploaded
fcaramia
parents:
diff changeset
183 $bismark_version =~ s/Bismark version: //;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
184 if ($bismark_version =~ /^\@/){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
185 warn "Detected \@ as the first character of the version information. Is it possible that the file is in SAM format?\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
186 sleep (2);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
187 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
188
6479112a673b Uploaded
fcaramia
parents:
diff changeset
189 unless ($version eq $bismark_version){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
190 die "The methylation extractor and Bismark itself need to be of the same version!\n\nVersions used:\nmethylation extractor: '$version'\nBismark: '$bismark_version'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
191 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
192 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
193 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
194 # If the read is in SAM format (default) it can either start with @ header lines or start with alignments directly.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
195 # We are reading from it further down
6479112a673b Uploaded
fcaramia
parents:
diff changeset
196 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
197
6479112a673b Uploaded
fcaramia
parents:
diff changeset
198 my $output_filename = (split (/\//,$filename))[-1];
6479112a673b Uploaded
fcaramia
parents:
diff changeset
199
6479112a673b Uploaded
fcaramia
parents:
diff changeset
200 ### OPENING OUTPUT-FILEHANDLES
6479112a673b Uploaded
fcaramia
parents:
diff changeset
201 if ($report){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
202 my $report_filename = $output_filename;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
203 $report_filename =~ s/$/_splitting_report.txt/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
204 open (REPORT,'>',$report_filename) or die "Failed to write to file $report_filename $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
205 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
206
6479112a673b Uploaded
fcaramia
parents:
diff changeset
207 if ($report){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
208 print REPORT "$output_filename\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
209 print REPORT "Parameters used to extract methylation information:\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
210 if ($paired){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
211 if ($vanilla){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
212 print REPORT "Bismark result file: paired-end (vanilla Bismark format)\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
213 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
214 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
215 print REPORT "Bismark result file: paired-end (SAM format)\n"; # default
6479112a673b Uploaded
fcaramia
parents:
diff changeset
216 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
217 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
218
6479112a673b Uploaded
fcaramia
parents:
diff changeset
219 if ($single){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
220 if ($vanilla){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
221 print REPORT "Bismark result file: single-end (vanilla Bismark format)\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
222 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
223 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
224 print REPORT "Bismark result file: single-end (SAM format)\n"; # default
6479112a673b Uploaded
fcaramia
parents:
diff changeset
225 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
226 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
227
6479112a673b Uploaded
fcaramia
parents:
diff changeset
228 if ($ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
229 print REPORT "Ignoring first $ignore bases\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
230 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
231
6479112a673b Uploaded
fcaramia
parents:
diff changeset
232 if ($full){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
233 print REPORT "Output specified: comprehensive\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
234 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
235 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
236 print REPORT "Output specified: strand-specific (default)\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
237 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
238
6479112a673b Uploaded
fcaramia
parents:
diff changeset
239 if ($no_overlap){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
240 print REPORT "No overlapping methylation calls specified\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
241 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
242 if ($genomic_fasta){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
243 print REPORT "Genomic equivalent sequences will be printed out in FastA format\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
244 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
245 if ($merge_non_CpG){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
246 print REPORT "Methylation in CHG and CHH context will be merged into \"non-CpG context\" output\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
247 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
248
6479112a673b Uploaded
fcaramia
parents:
diff changeset
249 print REPORT "\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
250 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
251
6479112a673b Uploaded
fcaramia
parents:
diff changeset
252 ### CpG-context and non-CpG context. THIS SECTION IS OPTIONAL
6479112a673b Uploaded
fcaramia
parents:
diff changeset
253
6479112a673b Uploaded
fcaramia
parents:
diff changeset
254 ### if --comprehensive AND --merge_non_CpG was specified we are only writing out one CpG-context and one Any-Other-context result file
6479112a673b Uploaded
fcaramia
parents:
diff changeset
255 if ($full and $merge_non_CpG){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
256 my $cpg_output = my $other_c_output = $output_filename;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
257 ### C in CpG context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
258 $cpg_output =~ s/^/CpG_context_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
259 $cpg_output =~ s/$/.txt/ unless ($cpg_output =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
260 open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
261 print "Writing result file containing methylation information for C in CpG context to $cpg_output\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
262 print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
263
6479112a673b Uploaded
fcaramia
parents:
diff changeset
264 ### C in any other context than CpG
6479112a673b Uploaded
fcaramia
parents:
diff changeset
265 $other_c_output =~ s/^/Non_CpG_context_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
266 $other_c_output =~ s/$/.txt/ unless ($other_c_output =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
267 open ($fhs{other_context},'>',$other_c_output) or die "Failed to write to $other_c_output $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
268 print "Writing result file containing methylation information for C in any other context to $other_c_output\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
269 print {$fhs{other_context}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
270 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
271
6479112a673b Uploaded
fcaramia
parents:
diff changeset
272 ### if only --merge_non_CpG was specified we will write out 8 different output files, depending on where the (first) unique best alignment has been found
6479112a673b Uploaded
fcaramia
parents:
diff changeset
273 elsif ($merge_non_CpG){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
274
6479112a673b Uploaded
fcaramia
parents:
diff changeset
275 my $cpg_ot = my $cpg_ctot = my $cpg_ctob = my $cpg_ob = $output_filename;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
276
6479112a673b Uploaded
fcaramia
parents:
diff changeset
277 ### For cytosines in CpG context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
278 $cpg_ot =~ s/^/CpG_OT_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
279 $cpg_ot =~ s/$/.txt/ unless ($cpg_ot =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
280 open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
281 print "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
282 print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
283
6479112a673b Uploaded
fcaramia
parents:
diff changeset
284 $cpg_ctot =~ s/^/CpG_CTOT_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
285 $cpg_ctot =~ s/$/.txt/ unless ($cpg_ctot =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
286 open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
287 print "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
288 print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
289
6479112a673b Uploaded
fcaramia
parents:
diff changeset
290 $cpg_ctob =~ s/^/CpG_CTOB_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
291 $cpg_ctob =~ s/$/.txt/ unless ($cpg_ctob =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
292 open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
293 print "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
294 print {$fhs{2}->{CpG}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
295
6479112a673b Uploaded
fcaramia
parents:
diff changeset
296 $cpg_ob =~ s/^/CpG_OB_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
297 $cpg_ob =~ s/$/.txt/ unless ($cpg_ob =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
298 open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
299 print "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
300 print {$fhs{3}->{CpG}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
301
6479112a673b Uploaded
fcaramia
parents:
diff changeset
302 ### For cytosines in Non-CpG (CC, CT or CA) context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
303 my $other_c_ot = my $other_c_ctot = my $other_c_ctob = my $other_c_ob = $output_filename;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
304
6479112a673b Uploaded
fcaramia
parents:
diff changeset
305 $other_c_ot =~ s/^/Non_CpG_OT_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
306 $other_c_ot =~ s/$/.txt/ unless ($other_c_ot =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
307 open ($fhs{0}->{other_c},'>',$other_c_ot) or die "Failed to write to $other_c_ot $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
308 print "Writing result file containing methylation information for C in any other context from the original top strand to $other_c_ot\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
309 print {$fhs{0}->{other_c}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
310
6479112a673b Uploaded
fcaramia
parents:
diff changeset
311 $other_c_ctot =~ s/^/Non_CpG_CTOT_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
312 $other_c_ctot =~ s/$/.txt/ unless ($other_c_ctot =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
313 open ($fhs{1}->{other_c},'>',$other_c_ctot) or die "Failed to write to $other_c_ctot $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
314 print "Writing result file containing methylation information for C in any other context from the complementary to original top strand to $other_c_ctot\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
315 print {$fhs{1}->{other_c}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
316
6479112a673b Uploaded
fcaramia
parents:
diff changeset
317 $other_c_ctob =~ s/^/Non_CpG_CTOB_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
318 $other_c_ctob =~ s/$/.txt/ unless ($other_c_ctob =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
319 open ($fhs{2}->{other_c},'>',$other_c_ctob) or die "Failed to write to $other_c_ctob $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
320 print "Writing result file containing methylation information for C in any other context from the complementary to original bottom strand to $other_c_ctob\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
321 print {$fhs{2}->{other_c}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
322
6479112a673b Uploaded
fcaramia
parents:
diff changeset
323 $other_c_ob =~ s/^/Non_CpG_OB_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
324 $other_c_ob =~ s/$/.txt/ unless ($other_c_ob =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
325 open ($fhs{3}->{other_c},'>',$other_c_ob) or die "Failed to write to $other_c_ob $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
326 print "Writing result file containing methylation information for C in any other context from the original bottom strand to $other_c_ob\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
327 print {$fhs{3}->{other_c}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
328 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
329
6479112a673b Uploaded
fcaramia
parents:
diff changeset
330 ### THIS SECTION IS THE DEFAULT (CpG, CHG and CHH context)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
331
6479112a673b Uploaded
fcaramia
parents:
diff changeset
332 ### if --comprehensive was specified we are only writing one file per context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
333 elsif ($full){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
334 my $cpg_output = my $chg_output = my $chh_output = $output_filename;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
335 ### C in CpG context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
336 $cpg_output =~ s/^/CpG_context_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
337 $cpg_output =~ s/$/.txt/ unless ($cpg_output =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
338 open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
339 print "Writing result file containing methylation information for C in CpG context to $cpg_output\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
340 print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
341
6479112a673b Uploaded
fcaramia
parents:
diff changeset
342 ### C in CHG context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
343 $chg_output =~ s/^/CHG_context_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
344 $chg_output =~ s/$/.txt/ unless ($chg_output =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
345 open ($fhs{CHG_context},'>',$chg_output) or die "Failed to write to $chg_output $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
346 print "Writing result file containing methylation information for C in CHG context to $chg_output\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
347 print {$fhs{CHG_context}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
348
6479112a673b Uploaded
fcaramia
parents:
diff changeset
349 ### C in CHH context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
350 $chh_output =~ s/^/CHH_context_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
351 $chh_output =~ s/$/.txt/ unless ($chh_output =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
352 open ($fhs{CHH_context},'>',$chh_output) or die "Failed to write to $chh_output $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
353 print "Writing result file containing methylation information for C in CHH context to $chh_output\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
354 print {$fhs{CHH_context}} "Bismark methylation extractor version $version\n"; }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
355
6479112a673b Uploaded
fcaramia
parents:
diff changeset
356 ### else we will write out 12 different output files, depending on where the (first) unique best alignment was found
6479112a673b Uploaded
fcaramia
parents:
diff changeset
357 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
358 my $cpg_ot = my $cpg_ctot = my $cpg_ctob = my $cpg_ob = $output_filename;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
359
6479112a673b Uploaded
fcaramia
parents:
diff changeset
360 ### For cytosines in CpG context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
361 $cpg_ot =~ s/^/CpG_OT_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
362 $cpg_ot =~ s/$/.txt/ unless ($cpg_ot =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
363 open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
364 print "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
365 print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
366
6479112a673b Uploaded
fcaramia
parents:
diff changeset
367 $cpg_ctot =~ s/^/CpG_CTOT_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
368 $cpg_ctot =~ s/$/.txt/ unless ($cpg_ctot =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
369 open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
370 print "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
371 print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
372
6479112a673b Uploaded
fcaramia
parents:
diff changeset
373 $cpg_ctob =~ s/^/CpG_CTOB_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
374 $cpg_ctob =~ s/$/.txt/ unless ($cpg_ctob =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
375 open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
376 print "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
377 print {$fhs{2}->{CpG}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
378
6479112a673b Uploaded
fcaramia
parents:
diff changeset
379 $cpg_ob =~ s/^/CpG_OB_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
380 $cpg_ob =~ s/$/.txt/ unless ($cpg_ob =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
381 open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
382 print "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
383 print {$fhs{3}->{CpG}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
384
6479112a673b Uploaded
fcaramia
parents:
diff changeset
385 ### For cytosines in CHG context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
386 my $chg_ot = my $chg_ctot = my $chg_ctob = my $chg_ob = $output_filename;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
387
6479112a673b Uploaded
fcaramia
parents:
diff changeset
388 $chg_ot =~ s/^/CHG_OT_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
389 $chg_ot =~ s/$/.txt/ unless ($chg_ot =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
390 open ($fhs{0}->{CHG},'>',$chg_ot) or die "Failed to write to $chg_ot $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
391 print "Writing result file containing methylation information for C in CHG context from the original top strand to $chg_ot\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
392 print {$fhs{0}->{CHG}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
393
6479112a673b Uploaded
fcaramia
parents:
diff changeset
394 $chg_ctot =~ s/^/CHG_CTOT_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
395 $chg_ctot =~ s/$/.txt/ unless ($chg_ctot =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
396 open ($fhs{1}->{CHG},'>',$chg_ctot) or die "Failed to write to $chg_ctot $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
397 print "Writing result file containing methylation information for C in CHG context from the complementary to original top strand to $chg_ctot\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
398 print {$fhs{1}->{CHG}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
399
6479112a673b Uploaded
fcaramia
parents:
diff changeset
400 $chg_ctob =~ s/^/CHG_CTOB_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
401 $chg_ctob =~ s/$/.txt/ unless ($chg_ctob =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
402 open ($fhs{2}->{CHG},'>',$chg_ctob) or die "Failed to write to $chg_ctob $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
403 print "Writing result file containing methylation information for C in CHG context from the complementary to original bottom strand to $chg_ctob\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
404 print {$fhs{2}->{CHG}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
405
6479112a673b Uploaded
fcaramia
parents:
diff changeset
406 $chg_ob =~ s/^/CHG_OB_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
407 $chg_ob =~ s/$/.txt/ unless ($chg_ob =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
408 open ($fhs{3}->{CHG},'>',$chg_ob) or die "Failed to write to $chg_ob $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
409 print "Writing result file containing methylation information for C in CHG context from the original bottom strand to $chg_ob\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
410 print {$fhs{3}->{CHG}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
411
6479112a673b Uploaded
fcaramia
parents:
diff changeset
412 ### For cytosines in CHH context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
413 my $chh_ot = my $chh_ctot = my $chh_ctob = my $chh_ob = $output_filename;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
414
6479112a673b Uploaded
fcaramia
parents:
diff changeset
415 $chh_ot =~ s/^/CHH_OT_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
416 $chh_ot =~ s/$/.txt/ unless ($chh_ot =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
417 open ($fhs{0}->{CHH},'>',$chh_ot) or die "Failed to write to $chh_ot $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
418 print "Writing result file containing methylation information for C in CHH context from the original top strand to $chh_ot\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
419 print {$fhs{0}->{CHH}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
420
6479112a673b Uploaded
fcaramia
parents:
diff changeset
421 $chh_ctot =~ s/^/CHH_CTOT_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
422 $chh_ctot =~ s/$/.txt/ unless ($chh_ctot =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
423 open ($fhs{1}->{CHH},'>',$chh_ctot) or die "Failed to write to $chh_ctot $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
424 print "Writing result file containing methylation information for C in CHH context from the complementary to original top strand to $chh_ctot\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
425 print {$fhs{1}->{CHH}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
426
6479112a673b Uploaded
fcaramia
parents:
diff changeset
427 $chh_ctob =~ s/^/CHH_CTOB_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
428 $chh_ctob =~ s/$/.txt/ unless ($chh_ctob =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
429 open ($fhs{2}->{CHH},'>',$chh_ctob) or die "Failed to write to $chh_ctob $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
430 print "Writing result file containing methylation information for C in CHH context from the complementary to original bottom strand to $chh_ctob\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
431 print {$fhs{2}->{CHH}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
432
6479112a673b Uploaded
fcaramia
parents:
diff changeset
433 $chh_ob =~ s/^/CHH_OB_/;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
434 $chh_ob =~ s/$/.txt/ unless ($chh_ob =~ /\.txt$/);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
435 open ($fhs{3}->{CHH},'>',$chh_ob) or die "Failed to write to $chh_ob $!\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
436 print "Writing result file containing methylation information for C in CHH context from the original bottom strand to $chh_ob\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
437 print {$fhs{3}->{CHH}} "Bismark methylation extractor version $version\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
438 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
439
6479112a673b Uploaded
fcaramia
parents:
diff changeset
440 my $methylation_call_strings_processed = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
441 my $line_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
442
6479112a673b Uploaded
fcaramia
parents:
diff changeset
443 ### proceeding differently now for single-end or paired-end Bismark files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
444
6479112a673b Uploaded
fcaramia
parents:
diff changeset
445 ### PROCESSING SINGLE-END RESULT FILES
6479112a673b Uploaded
fcaramia
parents:
diff changeset
446 if ($single){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
447
6479112a673b Uploaded
fcaramia
parents:
diff changeset
448 ### also proceeding differently now for SAM format or vanilla Bismark format files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
449 if ($vanilla){ # old vanilla Bismark output format
6479112a673b Uploaded
fcaramia
parents:
diff changeset
450 while (<IN>){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
451 ++$line_count;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
452 warn "Processed lines: $line_count\n" if ($line_count%500000==0);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
453
6479112a673b Uploaded
fcaramia
parents:
diff changeset
454 ### $seq here is the chromosomal sequence (to use for the repeat analysis for example)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
455 my ($id,$strand,$chrom,$start,$seq,$meth_call,$read_conversion,$genome_conversion) = (split("\t"))[0,1,2,3,6,7,8,9];
6479112a673b Uploaded
fcaramia
parents:
diff changeset
456
6479112a673b Uploaded
fcaramia
parents:
diff changeset
457 ### we need to remove 2 bp of the genomic sequence as we were extracting read + 2bp long fragments to make a methylation call at the first or
6479112a673b Uploaded
fcaramia
parents:
diff changeset
458 ### last position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
459 chomp $genome_conversion;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
460
6479112a673b Uploaded
fcaramia
parents:
diff changeset
461 my $index;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
462 if ($meth_call){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
463
6479112a673b Uploaded
fcaramia
parents:
diff changeset
464 if ($read_conversion eq 'CT' and $genome_conversion eq 'CT'){ ## original top strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
465 $index = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
466 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
467 elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT'){ ## complementary to original top strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
468 $index = 1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
469 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
470 elsif ($read_conversion eq 'CT' and $genome_conversion eq 'GA'){ ## original bottom strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
471 $index = 3;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
472 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
473 elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA'){ ## complementary to original bottom strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
474 $index = 2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
475 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
476 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
477 die "Unexpected combination of read and genome conversion: '$read_conversion' / '$genome_conversion'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
478 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
479
6479112a673b Uploaded
fcaramia
parents:
diff changeset
480 ### Clipping off the first <int> number of bases from the methylation call string as specified with --ignore <int>
6479112a673b Uploaded
fcaramia
parents:
diff changeset
481 if ($ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
482 $meth_call = substr($meth_call,$ignore,length($meth_call)-$ignore);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
483
6479112a673b Uploaded
fcaramia
parents:
diff changeset
484 ### If we are clipping off some bases at the start we need to adjust the start position of the alignments accordingly!
6479112a673b Uploaded
fcaramia
parents:
diff changeset
485 if ($strand eq '+'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
486 $start += $ignore;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
487 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
488 elsif ($strand eq '-'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
489 $start += length($meth_call)-1; ## $meth_call is already shortened!
6479112a673b Uploaded
fcaramia
parents:
diff changeset
490 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
491 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
492 die "Alignment did not have proper strand information: $strand\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
493 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
494 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
495 ### printing out the methylation state of every C in the read
6479112a673b Uploaded
fcaramia
parents:
diff changeset
496 print_individual_C_methylation_states_single_end($meth_call,$chrom,$start,$id,$strand,$index);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
497
6479112a673b Uploaded
fcaramia
parents:
diff changeset
498 ++$methylation_call_strings_processed; # 1 per single-end result
6479112a673b Uploaded
fcaramia
parents:
diff changeset
499 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
500 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
501 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
502 else{ # processing single-end SAM format (default)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
503 while (<IN>){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
504 ### SAM format can either start with header lines (starting with @) or start with alignments directly
6479112a673b Uploaded
fcaramia
parents:
diff changeset
505 if (/^\@/){ # skipping header lines (starting with @)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
506 warn "skipping SAM header line:\t$_";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
507 next;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
508 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
509
6479112a673b Uploaded
fcaramia
parents:
diff changeset
510 ++$line_count;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
511 warn "Processed lines: $line_count\n" if ($line_count%500000==0);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
512
6479112a673b Uploaded
fcaramia
parents:
diff changeset
513 # example read in SAM format
6479112a673b Uploaded
fcaramia
parents:
diff changeset
514 # 1_R1/1 67 5 103172224 255 40M = 103172417 233 AATATTTTTTTTATTTTAAAATGTGTATTGATTTAAATTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XX:Z:4T1T24TT7 XM:Z:....h.h........................hh....... XR:Z:CT XG:Z:CT
6479112a673b Uploaded
fcaramia
parents:
diff changeset
515 ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
516
6479112a673b Uploaded
fcaramia
parents:
diff changeset
517 # < 0.7.6 my ($id,$chrom,$start,$meth_call,$read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
6479112a673b Uploaded
fcaramia
parents:
diff changeset
518 # < 0.7.6 $meth_call =~ s/^XM:Z://;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
519 # < 0.7.6 $read_conversion =~ s/^XR:Z://;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
520 # < 0.7.6 $genome_conversion =~ s/^XG:Z://;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
521
6479112a673b Uploaded
fcaramia
parents:
diff changeset
522 my ($id,$chrom,$start,$cigar) = (split("\t"))[0,2,3,5];
6479112a673b Uploaded
fcaramia
parents:
diff changeset
523
6479112a673b Uploaded
fcaramia
parents:
diff changeset
524 ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
6479112a673b Uploaded
fcaramia
parents:
diff changeset
525 my $meth_call; ### Thanks to Zachary Zeno for this solution
6479112a673b Uploaded
fcaramia
parents:
diff changeset
526 my $read_conversion;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
527 my $genome_conversion;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
528
6479112a673b Uploaded
fcaramia
parents:
diff changeset
529 while ( /(XM|XR|XG):Z:([^\t]+)/g ) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
530 my $tag = $1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
531 my $value = $2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
532
6479112a673b Uploaded
fcaramia
parents:
diff changeset
533 if ($tag eq "XM"){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
534 $meth_call = $value;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
535 $meth_call =~ s/\r//;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
536 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
537 elsif ($tag eq "XR") {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
538 $read_conversion = $value;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
539 $read_conversion =~ s/\r//;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
540 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
541 elsif ($tag eq "XG") {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
542 $genome_conversion = $value;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
543 $genome_conversion =~ s/\r//;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
544 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
545 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
546
6479112a673b Uploaded
fcaramia
parents:
diff changeset
547 my $strand;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
548 chomp $genome_conversion;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
549 # print "$meth_call\n$read_conversion\n$genome_conversion\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
550
6479112a673b Uploaded
fcaramia
parents:
diff changeset
551 my $index;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
552 if ($meth_call){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
553 if ($read_conversion eq 'CT' and $genome_conversion eq 'CT'){ ## original top strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
554 $index = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
555 $strand = '+';
6479112a673b Uploaded
fcaramia
parents:
diff changeset
556 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
557 elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT'){ ## complementary to original top strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
558 $index = 1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
559 $strand = '-';
6479112a673b Uploaded
fcaramia
parents:
diff changeset
560 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
561 elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA'){ ## complementary to original bottom strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
562 $index = 2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
563 $strand = '+';
6479112a673b Uploaded
fcaramia
parents:
diff changeset
564 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
565 elsif ($read_conversion eq 'CT' and $genome_conversion eq 'GA'){ ## original bottom strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
566 $index = 3;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
567 $strand = '-';
6479112a673b Uploaded
fcaramia
parents:
diff changeset
568 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
569 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
570 die "Unexpected combination of read and genome conversion: '$read_conversion' / '$genome_conversion'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
571 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
572
6479112a673b Uploaded
fcaramia
parents:
diff changeset
573 ### If the read is in SAM format we need to reverse the methylation call if the read has been reverse-complemented for the output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
574 if ($strand eq '-'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
575 $meth_call = reverse $meth_call;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
576 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
577
6479112a673b Uploaded
fcaramia
parents:
diff changeset
578 ### Clipping off the first <int> number of bases from the methylation call string as specified with --ignore <int>
6479112a673b Uploaded
fcaramia
parents:
diff changeset
579 if ($ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
580 # print "\n\n$meth_call\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
581 $meth_call = substr($meth_call,$ignore,length($meth_call)-$ignore);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
582 # print "$meth_call\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
583 ### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly
6479112a673b Uploaded
fcaramia
parents:
diff changeset
584
6479112a673b Uploaded
fcaramia
parents:
diff changeset
585 my @len = split (/\D+/,$cigar); # storing the length per operation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
586 my @ops = split (/\d+/,$cigar); # storing the operation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
587 shift @ops; # remove the empty first element
6479112a673b Uploaded
fcaramia
parents:
diff changeset
588 die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
589
6479112a673b Uploaded
fcaramia
parents:
diff changeset
590 my @comp_cigar; # building an array with all CIGAR operations
6479112a673b Uploaded
fcaramia
parents:
diff changeset
591 foreach my $index (0..$#len){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
592 foreach (1..$len[$index]){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
593 # print "$ops[$index]";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
594 push @comp_cigar, $ops[$index];
6479112a673b Uploaded
fcaramia
parents:
diff changeset
595 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
596 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
597 # print "original CIGAR: $cigar\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
598 # print "original CIGAR: @comp_cigar\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
599
6479112a673b Uploaded
fcaramia
parents:
diff changeset
600 ### If we are clipping off some bases at the start we need to adjust the start position of the alignments accordingly!
6479112a673b Uploaded
fcaramia
parents:
diff changeset
601 if ($strand eq '+'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
602
6479112a673b Uploaded
fcaramia
parents:
diff changeset
603 my $D_count = 0; # counting all deletions that affect the ignored genomic position, i.e. Deletions and insertions
6479112a673b Uploaded
fcaramia
parents:
diff changeset
604 my $I_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
605
6479112a673b Uploaded
fcaramia
parents:
diff changeset
606 for (1..$ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
607 my $op = shift @comp_cigar; # adjusting composite CIGAR string by removing $ignore operations from the start
6479112a673b Uploaded
fcaramia
parents:
diff changeset
608 # print "$_ deleted $op\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
609
6479112a673b Uploaded
fcaramia
parents:
diff changeset
610 while ($op eq 'D'){ # repeating this for deletions (D)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
611 $D_count++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
612 $op = shift @comp_cigar;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
613 # print "$_ deleted $op\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
614 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
615 if ($op eq 'I'){ # adjusting the genomic position for insertions (I)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
616 $I_count++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
617 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
618 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
619 $start += $ignore + $D_count - $I_count;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
620 # print "start $start\t ignore: $ignore\t D count: $D_count I_count: $I_count\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
621 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
622 elsif ($strand eq '-'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
623
6479112a673b Uploaded
fcaramia
parents:
diff changeset
624 for (1..$ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
625 my $op = pop @comp_cigar; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
6479112a673b Uploaded
fcaramia
parents:
diff changeset
626 while ($op eq 'D'){ # repeating this for deletions (D)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
627 $op = pop @comp_cigar;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
628 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
629 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
630
6479112a673b Uploaded
fcaramia
parents:
diff changeset
631 ### For reverse strand alignments we need to determine the number of matching bases (M) or deletions (D) in the read from the CIGAR
6479112a673b Uploaded
fcaramia
parents:
diff changeset
632 ### string to be able to work out the starting position of the read which is on the 3' end of the sequence
6479112a673b Uploaded
fcaramia
parents:
diff changeset
633 my $MD_count = 0; # counting all operations that affect the genomic position, i.e. M and D. Insertions do not affect the start position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
634 foreach (@comp_cigar){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
635 ++$MD_count if ($_ eq 'M' or $_ eq 'D');
6479112a673b Uploaded
fcaramia
parents:
diff changeset
636 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
637 $start += $MD_count - 1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
638 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
639
6479112a673b Uploaded
fcaramia
parents:
diff changeset
640 ### reconstituting shortened CIGAR string
6479112a673b Uploaded
fcaramia
parents:
diff changeset
641 my $new_cigar;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
642 my $count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
643 my $last_op;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
644 # print "ignore adjusted: @comp_cigar\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
645 foreach my $op (@comp_cigar){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
646 unless (defined $last_op){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
647 $last_op = $op;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
648 ++$count;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
649 next;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
650 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
651 if ($last_op eq $op){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
652 ++$count;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
653 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
654 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
655 $new_cigar .= "$count$last_op";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
656 $last_op = $op;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
657 $count = 1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
658 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
659 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
660 $new_cigar .= "$count$last_op"; # appending the last operation and count
6479112a673b Uploaded
fcaramia
parents:
diff changeset
661 $cigar = $new_cigar;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
662 # print "ignore adjusted scalar: $cigar\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
663 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
664 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
665 ### printing out the methylation state of every C in the read
6479112a673b Uploaded
fcaramia
parents:
diff changeset
666 print_individual_C_methylation_states_single_end($meth_call,$chrom,$start,$id,$strand,$index,$cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
667
6479112a673b Uploaded
fcaramia
parents:
diff changeset
668 ++$methylation_call_strings_processed; # 1 per single-end result
6479112a673b Uploaded
fcaramia
parents:
diff changeset
669 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
670 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
671 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
672
6479112a673b Uploaded
fcaramia
parents:
diff changeset
673 ### PROCESSING PAIRED-END RESULT FILES
6479112a673b Uploaded
fcaramia
parents:
diff changeset
674 elsif ($paired){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
675
6479112a673b Uploaded
fcaramia
parents:
diff changeset
676 ### proceeding differently now for SAM format or vanilla Bismark format files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
677 if ($vanilla){ # old vanilla Bismark paired-end output format
6479112a673b Uploaded
fcaramia
parents:
diff changeset
678 while (<IN>){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
679 ++$line_count;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
680 warn "processed line: $line_count\n" if ($line_count%500000==0);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
681
6479112a673b Uploaded
fcaramia
parents:
diff changeset
682 ### $seq here is the chromosomal sequence (to use for the repeat analysis for example)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
683 my ($id,$strand,$chrom,$start_read_1,$end_read_2,$seq_1,$meth_call_1,$seq_2,$meth_call_2,$first_read_conversion,$genome_conversion) = (split("\t"))[0,1,2,3,4,6,7,9,10,11,12,13];
6479112a673b Uploaded
fcaramia
parents:
diff changeset
684
6479112a673b Uploaded
fcaramia
parents:
diff changeset
685 my $index;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
686 chomp $genome_conversion;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
687
6479112a673b Uploaded
fcaramia
parents:
diff changeset
688 if ($first_read_conversion eq 'CT' and $genome_conversion eq 'CT'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
689 $index = 0; ## this is OT
6479112a673b Uploaded
fcaramia
parents:
diff changeset
690 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
691 elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'GA'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
692 $index = 2; ## this is CTOB!!!
6479112a673b Uploaded
fcaramia
parents:
diff changeset
693 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
694 elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'CT'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
695 $index = 1; ## this is CTOT!!!
6479112a673b Uploaded
fcaramia
parents:
diff changeset
696 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
697 elsif ($first_read_conversion eq 'CT' and $genome_conversion eq 'GA'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
698 $index = 3; ## this is OB
6479112a673b Uploaded
fcaramia
parents:
diff changeset
699 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
700 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
701 die "Unexpected combination of read and genome conversion: $first_read_conversion / $genome_conversion\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
702 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
703
6479112a673b Uploaded
fcaramia
parents:
diff changeset
704 if ($meth_call_1 and $meth_call_2){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
705 ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>'
6479112a673b Uploaded
fcaramia
parents:
diff changeset
706 if ($ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
707 $meth_call_1 = substr($meth_call_1,$ignore,length($meth_call_1)-$ignore);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
708 $meth_call_2 = substr($meth_call_2,$ignore,length($meth_call_2)-$ignore);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
709
6479112a673b Uploaded
fcaramia
parents:
diff changeset
710 ### we also need to adjust the start and end positions of the alignments accordingly if '--ignore' was specified
6479112a673b Uploaded
fcaramia
parents:
diff changeset
711 $start_read_1 += $ignore;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
712 $end_read_2 -= $ignore;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
713 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
714 my $end_read_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
715 my $start_read_2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
716
6479112a673b Uploaded
fcaramia
parents:
diff changeset
717 if ($strand eq '+'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
718
6479112a673b Uploaded
fcaramia
parents:
diff changeset
719 $end_read_1 = $start_read_1+length($meth_call_1)-1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
720 $start_read_2 = $end_read_2-length($meth_call_2)+1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
721
6479112a673b Uploaded
fcaramia
parents:
diff changeset
722 ## we first pass the first read which is in + orientation on the forward strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
723 print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id,'+',$index,0,0);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
724
6479112a673b Uploaded
fcaramia
parents:
diff changeset
725 # we next pass the second read which is in - orientation on the reverse strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
726 ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
6479112a673b Uploaded
fcaramia
parents:
diff changeset
727 print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$end_read_2,$id,'-',$index,$no_overlap,$end_read_1);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
728 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
729
6479112a673b Uploaded
fcaramia
parents:
diff changeset
730 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
731
6479112a673b Uploaded
fcaramia
parents:
diff changeset
732 $end_read_1 = $start_read_1+length($meth_call_2)-1; # read 1 is the second reported read!
6479112a673b Uploaded
fcaramia
parents:
diff changeset
733 $start_read_2 = $end_read_2-length($meth_call_1)+1; # read 2 is the first reported read!
6479112a673b Uploaded
fcaramia
parents:
diff changeset
734
6479112a673b Uploaded
fcaramia
parents:
diff changeset
735 ## we first pass the first read which is in - orientation on the reverse strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
736 print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$end_read_2,$id,'-',$index,0,0);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
737
6479112a673b Uploaded
fcaramia
parents:
diff changeset
738 # we next pass the second read which is in + orientation on the forward strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
739 ### if --no_overlap was specified we also pass the end of read 2. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
6479112a673b Uploaded
fcaramia
parents:
diff changeset
740 print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_1,$id,'+',$index,$no_overlap,$start_read_2);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
741 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
742
6479112a673b Uploaded
fcaramia
parents:
diff changeset
743 $methylation_call_strings_processed += 2; # paired-end = 2 methylation calls
6479112a673b Uploaded
fcaramia
parents:
diff changeset
744 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
745 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
746 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
747 else{ # Bismark paired-end SAM output format (default)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
748 while (<IN>){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
749 ### SAM format can either start with header lines (starting with @) or start with alignments directly
6479112a673b Uploaded
fcaramia
parents:
diff changeset
750 if (/^\@/){ # skipping header lines (starting with @)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
751 warn "skipping SAM header line:\t$_";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
752 next;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
753 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
754
6479112a673b Uploaded
fcaramia
parents:
diff changeset
755 ++$line_count;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
756 warn "Processed lines: $line_count\n" if ($line_count%500000==0);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
757
6479112a673b Uploaded
fcaramia
parents:
diff changeset
758 # example paired-end reads in SAM format (2 consecutive lines)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
759 # 1_R1/1 67 5 103172224 255 40M = 103172417 233 AATATTTTTTTTATTTTAAAATGTGTATTGATTTAAATTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XX:Z:4T1T24TT7 XM:Z:....h.h........................hh....... XR:Z:CT XG:Z:CT
6479112a673b Uploaded
fcaramia
parents:
diff changeset
760 # 1_R1/2 131 5 103172417 255 40M = 103172224 -233 TATTTTTTTTTAGAGTATTTTTTAATGGTTATTAGATTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:6 XX:Z:T5T1T9T9T7T3 XM:Z:h.....h.h.........h.........h.......h... XR:Z:GA XG:Z:CT
6479112a673b Uploaded
fcaramia
parents:
diff changeset
761
6479112a673b Uploaded
fcaramia
parents:
diff changeset
762 # < version 0.7.6 my ($id_1,$chrom,$start_read_1,$meth_call_1,$first_read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
6479112a673b Uploaded
fcaramia
parents:
diff changeset
763
6479112a673b Uploaded
fcaramia
parents:
diff changeset
764 my ($id_1,$chrom,$start_read_1,$cigar_1) = (split("\t"))[0,2,3,5]; ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
6479112a673b Uploaded
fcaramia
parents:
diff changeset
765 my $meth_call_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
766 my $first_read_conversion;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
767 my $genome_conversion;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
768
6479112a673b Uploaded
fcaramia
parents:
diff changeset
769 while ( /(XM|XR|XG):Z:([^\t]+)/g ) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
770 my $tag = $1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
771 my $value = $2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
772
6479112a673b Uploaded
fcaramia
parents:
diff changeset
773 if ($tag eq "XM"){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
774 $meth_call_1 = $value;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
775 $meth_call_1 =~ s/\r//;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
776 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
777 elsif ($tag eq "XR") {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
778 $first_read_conversion = $value;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
779 $first_read_conversion =~ s/\r//;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
780 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
781 elsif ($tag eq "XG") {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
782 $genome_conversion = $value;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
783 $genome_conversion =~ s/\r//;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
784 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
785 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
786
6479112a673b Uploaded
fcaramia
parents:
diff changeset
787 $_ = <IN>; # reading in the paired read
6479112a673b Uploaded
fcaramia
parents:
diff changeset
788
6479112a673b Uploaded
fcaramia
parents:
diff changeset
789 # < version 0.7.6 my ($id_2,$start_read_2,$meth_call_2,$second_read_conversion) = (split("\t"))[0,3,13,14];
6479112a673b Uploaded
fcaramia
parents:
diff changeset
790 # < version 0.7.6 $meth_call_1 =~ s/^XM:Z://;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
791 # < version 0.7.6 $meth_call_2 =~ s/^XM:Z://;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
792 # < version 0.7.6 $first_read_conversion =~ s/^XR:Z://;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
793 # < version 0.7.6 $second_read_conversion =~ s/^XR:Z://;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
794
6479112a673b Uploaded
fcaramia
parents:
diff changeset
795 my ($id_2,$start_read_2,$cigar_2) = (split("\t"))[0,3,5]; ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
6479112a673b Uploaded
fcaramia
parents:
diff changeset
796
6479112a673b Uploaded
fcaramia
parents:
diff changeset
797 my $meth_call_2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
798 my $second_read_conversion;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
799
6479112a673b Uploaded
fcaramia
parents:
diff changeset
800 while ( /(XM|XR):Z:([^\t]+)/g ) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
801 my $tag = $1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
802 my $value = $2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
803
6479112a673b Uploaded
fcaramia
parents:
diff changeset
804 if ($tag eq "XM"){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
805 $meth_call_2 = $value;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
806 $meth_call_2 =~ s/\r//;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
807 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
808 elsif ($tag eq "XR") {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
809 $second_read_conversion = $value;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
810 $second_read_conversion = s/\r//;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
811 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
812 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
813
6479112a673b Uploaded
fcaramia
parents:
diff changeset
814 # < version 0.7.6 $genome_conversion =~ s/^XG:Z://;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
815 chomp $genome_conversion; # in case it captured a new line character
6479112a673b Uploaded
fcaramia
parents:
diff changeset
816
6479112a673b Uploaded
fcaramia
parents:
diff changeset
817 # print join ("\t",$meth_call_1,$meth_call_2,$first_read_conversion,$second_read_conversion,$genome_conversion),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
818
6479112a673b Uploaded
fcaramia
parents:
diff changeset
819 my $index;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
820 my $strand;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
821
6479112a673b Uploaded
fcaramia
parents:
diff changeset
822 if ($first_read_conversion eq 'CT' and $genome_conversion eq 'CT'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
823 $index = 0; ## this is OT
6479112a673b Uploaded
fcaramia
parents:
diff changeset
824 $strand = '+';
6479112a673b Uploaded
fcaramia
parents:
diff changeset
825 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
826 elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'CT'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
827 $index = 1; ## this is CTOT
6479112a673b Uploaded
fcaramia
parents:
diff changeset
828 $strand = '-';
6479112a673b Uploaded
fcaramia
parents:
diff changeset
829 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
830 elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'GA'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
831 $index = 2; ## this is CTOB
6479112a673b Uploaded
fcaramia
parents:
diff changeset
832 $strand = '+';
6479112a673b Uploaded
fcaramia
parents:
diff changeset
833 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
834 elsif ($first_read_conversion eq 'CT' and $genome_conversion eq 'GA'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
835 $index = 3; ## this is OB
6479112a673b Uploaded
fcaramia
parents:
diff changeset
836 $strand = '-';
6479112a673b Uploaded
fcaramia
parents:
diff changeset
837 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
838 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
839 die "Unexpected combination of read and genome conversion: $first_read_conversion / $genome_conversion\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
840 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
841
6479112a673b Uploaded
fcaramia
parents:
diff changeset
842 ### reversing the methylation call of the read that was reverse-complemented
6479112a673b Uploaded
fcaramia
parents:
diff changeset
843 if ($strand eq '+'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
844 $meth_call_2 = reverse $meth_call_2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
845 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
846 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
847 $meth_call_1 = reverse $meth_call_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
848 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
849
6479112a673b Uploaded
fcaramia
parents:
diff changeset
850 if ($meth_call_1 and $meth_call_2){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
851
6479112a673b Uploaded
fcaramia
parents:
diff changeset
852 my $end_read_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
853
6479112a673b Uploaded
fcaramia
parents:
diff changeset
854 ### READ 1
6479112a673b Uploaded
fcaramia
parents:
diff changeset
855 my @len_1 = split (/\D+/,$cigar_1); # storing the length per operation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
856 my @ops_1 = split (/\d+/,$cigar_1); # storing the operation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
857 shift @ops_1; # remove the empty first element
6479112a673b Uploaded
fcaramia
parents:
diff changeset
858 die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len_1 == scalar @ops_1);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
859
6479112a673b Uploaded
fcaramia
parents:
diff changeset
860 my @comp_cigar_1; # building an array with all CIGAR operations
6479112a673b Uploaded
fcaramia
parents:
diff changeset
861 foreach my $index (0..$#len_1){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
862 foreach (1..$len_1[$index]){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
863 # print "$ops_1[$index]";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
864 push @comp_cigar_1, $ops_1[$index];
6479112a673b Uploaded
fcaramia
parents:
diff changeset
865 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
866 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
867 # print "original CIGAR read 1: $cigar_1\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
868 # print "original CIGAR read 1: @comp_cigar_1\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
869
6479112a673b Uploaded
fcaramia
parents:
diff changeset
870 ### READ 2
6479112a673b Uploaded
fcaramia
parents:
diff changeset
871 my @len_2 = split (/\D+/,$cigar_2); # storing the length per operation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
872 my @ops_2 = split (/\d+/,$cigar_2); # storing the operation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
873 shift @ops_2; # remove the empty first element
6479112a673b Uploaded
fcaramia
parents:
diff changeset
874 die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len_2 == scalar @ops_2);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
875 my @comp_cigar_2; # building an array with all CIGAR operations for read 2
6479112a673b Uploaded
fcaramia
parents:
diff changeset
876 foreach my $index (0..$#len_2){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
877 foreach (1..$len_2[$index]){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
878 # print "$ops_2[$index]";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
879 push @comp_cigar_2, $ops_2[$index];
6479112a673b Uploaded
fcaramia
parents:
diff changeset
880 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
881 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
882 # print "original CIGAR read 2: $cigar_2\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
883 # print "original CIGAR read 2: @comp_cigar_2\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
884
6479112a673b Uploaded
fcaramia
parents:
diff changeset
885 if ($ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
886 ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>'
6479112a673b Uploaded
fcaramia
parents:
diff changeset
887 ### the methylation calls have already been reversed where necessary
6479112a673b Uploaded
fcaramia
parents:
diff changeset
888 $meth_call_1 = substr($meth_call_1,$ignore,length($meth_call_1)-$ignore);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
889 $meth_call_2 = substr($meth_call_2,$ignore,length($meth_call_2)-$ignore);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
890
6479112a673b Uploaded
fcaramia
parents:
diff changeset
891 ### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly
6479112a673b Uploaded
fcaramia
parents:
diff changeset
892
6479112a673b Uploaded
fcaramia
parents:
diff changeset
893 if ($strand eq '+'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
894
6479112a673b Uploaded
fcaramia
parents:
diff changeset
895 ### if the (read 1) strand information is '+', read 1 needs to be trimmed from the start
6479112a673b Uploaded
fcaramia
parents:
diff changeset
896 my $D_count_1 = 0; # counting all deletions that affect the ignored genomic position for read 1, i.e. Deletions and insertions
6479112a673b Uploaded
fcaramia
parents:
diff changeset
897 my $I_count_1 = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
898
6479112a673b Uploaded
fcaramia
parents:
diff changeset
899 for (1..$ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
900 my $op = shift @comp_cigar_1; # adjusting composite CIGAR string of read 1 by removing $ignore operations from the start
6479112a673b Uploaded
fcaramia
parents:
diff changeset
901 # print "$_ deleted $op\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
902
6479112a673b Uploaded
fcaramia
parents:
diff changeset
903 while ($op eq 'D'){ # repeating this for deletions (D)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
904 $D_count_1++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
905 $op = shift @comp_cigar_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
906 # print "$_ deleted $op\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
907 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
908 if ($op eq 'I'){ # adjusting the genomic position for insertions (I)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
909 $I_count_1++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
910 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
911 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
912
6479112a673b Uploaded
fcaramia
parents:
diff changeset
913 $start_read_1 += $ignore + $D_count_1 - $I_count_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
914 # print "start read 1 $start_read_1\t ignore: $ignore\t D count 1: $D_count_1\tI_count 1: $I_count_1\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
915
6479112a673b Uploaded
fcaramia
parents:
diff changeset
916 ### if the (read 1) strand information is '+', read 2 needs to be trimmed from the back
6479112a673b Uploaded
fcaramia
parents:
diff changeset
917
6479112a673b Uploaded
fcaramia
parents:
diff changeset
918 for (1..$ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
919 my $op = pop @comp_cigar_2; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
6479112a673b Uploaded
fcaramia
parents:
diff changeset
920 while ($op eq 'D'){ # repeating this for deletions (D)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
921 $op = pop @comp_cigar_2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
922 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
923 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
924 # the start position of reads mapping to the reverse strand is being adjusted further below
6479112a673b Uploaded
fcaramia
parents:
diff changeset
925 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
926
6479112a673b Uploaded
fcaramia
parents:
diff changeset
927 elsif ($strand eq '-'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
928
6479112a673b Uploaded
fcaramia
parents:
diff changeset
929 ### if the (read 1) strand information is '-', read 1 needs to be trimmed from the back
6479112a673b Uploaded
fcaramia
parents:
diff changeset
930 for (1..$ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
931 my $op = pop @comp_cigar_1; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
6479112a673b Uploaded
fcaramia
parents:
diff changeset
932 while ($op eq 'D'){ # repeating this for deletions (D)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
933 $op = pop @comp_cigar_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
934 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
935 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
936 # the start position of reads mapping to the reverse strand is being adjusted further below
6479112a673b Uploaded
fcaramia
parents:
diff changeset
937
6479112a673b Uploaded
fcaramia
parents:
diff changeset
938 ### if the (read 1) strand information is '-', read 2 needs to be trimmed from the start
6479112a673b Uploaded
fcaramia
parents:
diff changeset
939 my $D_count_2 = 0; # counting all deletions that affect the ignored genomic position for read 2, i.e. Deletions and insertions
6479112a673b Uploaded
fcaramia
parents:
diff changeset
940 my $I_count_2 = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
941
6479112a673b Uploaded
fcaramia
parents:
diff changeset
942 for (1..$ignore){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
943 my $op = shift @comp_cigar_2; # adjusting composite CIGAR string of read 2 by removing $ignore operations from the start
6479112a673b Uploaded
fcaramia
parents:
diff changeset
944 # print "$_ deleted $op\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
945
6479112a673b Uploaded
fcaramia
parents:
diff changeset
946 while ($op eq 'D'){ # repeating this for deletions (D)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
947 $D_count_2++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
948 $op = shift @comp_cigar_2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
949 # print "$_ deleted $op\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
950 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
951 if ($op eq 'I'){ # adjusting the genomic position for insertions (I)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
952 $I_count_2++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
953 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
954 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
955
6479112a673b Uploaded
fcaramia
parents:
diff changeset
956 $start_read_2 += $ignore + $D_count_2 - $I_count_2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
957 # print "start read 2 $start_read_2\t ignore: $ignore\t D count 2: $D_count_2\tI_count 2: $I_count_2\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
958
6479112a673b Uploaded
fcaramia
parents:
diff changeset
959 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
960
6479112a673b Uploaded
fcaramia
parents:
diff changeset
961 ### reconstituting shortened CIGAR string 1
6479112a673b Uploaded
fcaramia
parents:
diff changeset
962 my $new_cigar_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
963 my $count_1 = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
964 my $last_op_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
965 # print "ignore adjusted CIGAR 1: @comp_cigar_1\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
966 foreach my $op (@comp_cigar_1){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
967 unless (defined $last_op_1){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
968 $last_op_1 = $op;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
969 ++$count_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
970 next;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
971 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
972 if ($last_op_1 eq $op){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
973 ++$count_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
974 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
975 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
976 $new_cigar_1 .= "$count_1$last_op_1";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
977 $last_op_1 = $op;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
978 $count_1 = 1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
979 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
980 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
981 $new_cigar_1 .= "$count_1$last_op_1"; # appending the last operation and count
6479112a673b Uploaded
fcaramia
parents:
diff changeset
982 $cigar_1 = $new_cigar_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
983 # print "ignore adjusted CIGAR 1 scalar: $cigar_1\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
984
6479112a673b Uploaded
fcaramia
parents:
diff changeset
985 ### reconstituting shortened CIGAR string 2
6479112a673b Uploaded
fcaramia
parents:
diff changeset
986 my $new_cigar_2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
987 my $count_2 = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
988 my $last_op_2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
989 # print "ignore adjusted CIGAR 2: @comp_cigar_2\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
990 foreach my $op (@comp_cigar_2){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
991 unless (defined $last_op_2){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
992 $last_op_2 = $op;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
993 ++$count_2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
994 next;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
995 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
996 if ($last_op_2 eq $op){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
997 ++$count_2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
998 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
999 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1000 $new_cigar_2 .= "$count_2$last_op_2";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1001 $last_op_2 = $op;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1002 $count_2 = 1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1003 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1004 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1005 $new_cigar_2 .= "$count_2$last_op_2"; # appending the last operation and count
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1006 $cigar_2 = $new_cigar_2;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1007 # print "ignore adjusted CIGAR 2 scalar: $cigar_2\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1008
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1009 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1010
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1011 if ($strand eq '+'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1012 ### adjusting the start position for all reads mapping to the reverse strand, in this case read 2
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1013 @comp_cigar_2 = reverse@comp_cigar_2; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1014 # print "reverse: @comp_cigar_2\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1015
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1016 my $MD_count_1 = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1017 foreach (@comp_cigar_1){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1018 ++$MD_count_1 if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1019 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1020
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1021 my $MD_count_2 = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1022 foreach (@comp_cigar_2){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1023 ++$MD_count_2 if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1024 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1025
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1026 $end_read_1 = $start_read_1 + $MD_count_1 - 1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1027 $start_read_2 += $MD_count_2 - 1; ## Passing on the start position on the reverse strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1028 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1029 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1030 ### adjusting the start position for all reads mapping to the reverse strand, in this case read 1
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1031
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1032 @comp_cigar_1 = reverse@comp_cigar_1; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1033 # print "reverse: @comp_cigar_1\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1034
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1035 my $MD_count_1 = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1036 foreach (@comp_cigar_1){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1037 ++$MD_count_1 if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1038 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1039
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1040 $end_read_1 = $start_read_1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1041 $start_read_1 += $MD_count_1 - 1; ### Passing on the start position on the reverse strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1042
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1043 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1044
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1045 if ($strand eq '+'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1046 ## we first pass the first read which is in + orientation on the forward strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1047 print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'+',$index,0,0,$cigar_1);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1048
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1049 # we next pass the second read which is in - orientation on the reverse strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1050 ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1051 print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'-',$index,$no_overlap,$end_read_1,$cigar_2);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1052 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1053
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1054 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1055 ## we first pass the first read which is in - orientation on the reverse strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1056 print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'-',$index,0,0,$cigar_1);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1057
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1058 # we next pass the second read which is in + orientation on the forward strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1059 ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1060 print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'+',$index,$no_overlap,$end_read_1,$cigar_2);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1061 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1062
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1063 $methylation_call_strings_processed += 2; # paired-end = 2 methylation calls
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1064 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1065 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1066 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1067 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1068 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1069 die "Single-end or paired-end reads not specified properly\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1070 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1071
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1072 print "\n\nProcessed $line_count lines from $filename in total\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1073 print "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1074 if ($report){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1075 print REPORT "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1076 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1077 print_splitting_report ();
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1078 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1079 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1080
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1081
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1082 sub print_splitting_report{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1083
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1084 ### Calculating methylation percentages if applicable
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1085
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1086 my $percent_meCpG;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1087 if (($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}) > 0){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1088 $percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1089 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1090
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1091 my $percent_meCHG;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1092 if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1093 $percent_meCHG = sprintf("%.1f",100*$counting{total_meCHG_count}/($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}));
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1094 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1095
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1096 my $percent_meCHH;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1097 if (($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}) > 0){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1098 $percent_meCHH = sprintf("%.1f",100*$counting{total_meCHH_count}/($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}));
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1099 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1100
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1101 my $percent_non_CpG_methylation;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1102 if ($merge_non_CpG){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1103 if ( ($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}+$counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1104 $percent_non_CpG_methylation = sprintf("%.1f",100* ( $counting{total_meCHH_count}+$counting{total_meCHG_count} ) / ( $counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}+$counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count} ) );
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1105 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1106 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1107
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1108 if ($report){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1109 ### detailed information about Cs analysed
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1110 print REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1111
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1112 my $total_number_of_C = $counting{total_meCHG_count}+$counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1113 print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1114
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1115 print REPORT "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1116 print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1117 print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1118
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1119 print REPORT "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1120 print REPORT "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1121 print REPORT "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1122
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1123 ### calculating methylated CpG percentage if applicable
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1124 if ($percent_meCpG){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1125 print REPORT "C methylated in CpG context:\t${percent_meCpG}%\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1126 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1127 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1128 print REPORT "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1129 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1130
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1131 ### 2-Context Output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1132 if ($merge_non_CpG){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1133 if ($percent_non_CpG_methylation){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1134 print REPORT "C methylated in non-CpG context:\t${percent_non_CpG_methylation}%\n\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1135 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1136 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1137 print REPORT "Can't determine percentage of methylated Cs in non-CpG context if value was 0\n\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1138 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1139 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1140
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1141 ### 3 Context Output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1142 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1143 ### calculating methylated CHG percentage if applicable
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1144 if ($percent_meCHG){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1145 print REPORT "C methylated in CHG context:\t${percent_meCHG}%\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1146 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1147 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1148 print REPORT "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1149 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1150
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1151 ### calculating methylated CHH percentage if applicable
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1152 if ($percent_meCHH){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1153 print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1154 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1155 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1156 print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1157 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1158 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1159 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1160
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1161 ### detailed information about Cs analysed for on-screen report
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1162 print "Final Cytosine Methylation Report\n",'='x33,"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1163
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1164 my $total_number_of_C = $counting{total_meCHG_count}+$counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1165 print "Total number of C's analysed:\t$total_number_of_C\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1166
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1167 print "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1168 print "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1169 print "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1170
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1171 print "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1172 print "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1173 print "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1174
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1175 ### printing methylated CpG percentage if applicable
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1176 if ($percent_meCpG){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1177 print "C methylated in CpG context:\t${percent_meCpG}%\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1178 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1179 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1180 print "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1181 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1182
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1183 ### 2-Context Output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1184 if ($merge_non_CpG){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1185 if ($percent_non_CpG_methylation){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1186 print "C methylated in non-CpG context:\t${percent_non_CpG_methylation}%\n\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1187 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1188 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1189 print "Can't determine percentage of methylated Cs in non-CpG context if value was 0\n\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1190 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1191 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1192
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1193 ### 3-Context Output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1194 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1195 ### printing methylated CHG percentage if applicable
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1196 if ($percent_meCHG){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1197 print "C methylated in CHG context:\t${percent_meCHG}%\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1198 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1199 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1200 print "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1201 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1202
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1203 ### printing methylated CHH percentage if applicable
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1204 if ($percent_meCHH){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1205 print "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1206 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1207 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1208 print "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1209 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1210 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1211 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1212
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1213
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1214
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1215
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1216
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1217 sub print_individual_C_methylation_states_paired_end_files{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1218
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1219 my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$no_overlap,$end_read_1,$cigar) = @_;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1220 my @methylation_calls = split(//,$meth_call);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1221
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1222 #################################################################
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1223 ### . for bases not involving cytosines ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1224 ### X for methylated C in CHG context (was protected) ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1225 ### x for not methylated C in CHG context (was converted) ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1226 ### H for methylated C in CHH context (was protected) ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1227 ### h for not methylated C in CHH context (was converted) ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1228 ### Z for methylated C in CpG context (was protected) ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1229 ### z for not methylated C in CpG context (was converted) ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1230 #################################################################
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1231
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1232 my $methyl_CHG_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1233 my $methyl_CHH_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1234 my $methyl_CpG_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1235 my $unmethylated_CHG_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1236 my $unmethylated_CHH_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1237 my $unmethylated_CpG_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1238
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1239 my @len;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1240 my @ops;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1241 my $pos_offset = 0; # this is only relevant for SAM reads with insertions or deletions
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1242 my $cigar_offset = 0; # again, this is only relevant for SAM reads containing indels
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1243 my @comp_cigar;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1244
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1245 if ($cigar){ # parsing CIGAR string
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1246 @len = split (/\D+/,$cigar); # storing the length per operation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1247 @ops = split (/\d+/,$cigar); # storing the operation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1248 shift @ops; # remove the empty first element
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1249 die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1250
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1251 foreach my $index (0..$#len){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1252 foreach (1..$len[$index]){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1253 # print "$ops[$index]";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1254 push @comp_cigar, $ops[$index];
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1255 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1256 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1257 # warn "\nDetected CIGAR string: $cigar\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1258 # warn "Length of methylation call: ",length $meth_call,"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1259 # warn "number of operations: ",scalar @ops,"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1260 # warn "number of length digits: ",scalar @len,"\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1261 # print @comp_cigar,"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1262 # print "$meth_call\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1263 # sleep (1);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1264 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1265
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1266
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1267 if ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1268
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1269 ### the CIGAR string needs to be reversed, the methylation call has already been reversed above
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1270 @comp_cigar = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1271 # print "reverse CIGAR string: @comp_cigar\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1272
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1273 ### the start position of paired-end files has already been corrected, see above
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1274 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1275
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1276 ### THIS IS AN OPTIONAL 2-CONTEXT (CpG and non-CpG) SECTION IF --merge_non_CpG was specified
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1277
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1278 if ($merge_non_CpG) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1279
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1280 if ($no_overlap) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1281
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1282 ### single-file CpG and non-CpG context output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1283 if ($full) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1284 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1285 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1286
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1287 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1288 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1289 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1290 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1291 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1292 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1293
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1294 ### Returning as soon as the methylation calls start overlapping
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1295 if ($start+$index+$pos_offset >= $end_read_1) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1296 return;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1297 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1298
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1299 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1300 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1301 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1302 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1303 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1304 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1305 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1306 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1307 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1308 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1309 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1310 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1311 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1312 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1313 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1314 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1315 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1316 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1317 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1318 elsif ($methylation_calls[$index] eq '.'){}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1319 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1320 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1321 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1322 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1323 } elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1324 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1325
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1326 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1327 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1328 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1329 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1330 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1331 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1332
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1333 ### Returning as soon as the methylation calls start overlapping
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1334 if ($start-$index+$pos_offset <= $end_read_1) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1335 return;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1336 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1337
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1338 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1339 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1340 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1341 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1342 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1343 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1344 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1345 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1346 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1347 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1348 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1349 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1350 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1351 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1352 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1353 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1354 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1355 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1356 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1357 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1358 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1359 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1360 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1361 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1362 } else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1363 die "The read orientation was neither + nor -: '$strand'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1364 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1365 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1366
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1367 ### strand-specific methylation output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1368 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1369 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1370 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1371
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1372 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1373 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1374 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1375 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1376 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1377 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1378
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1379 ### Returning as soon as the methylation calls start overlapping
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1380 if ($start+$index+$pos_offset >= $end_read_1) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1381 return;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1382 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1383
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1384 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1385 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1386 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1387 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1388 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1389 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1390 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1391 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1392 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1393 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1394 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1395 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1396 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1397 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1398 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1399 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1400 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1401 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1402 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1403 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1404 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1405 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1406 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1407 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1408 } elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1409 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1410
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1411 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1412 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1413 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1414 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1415 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1416 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1417
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1418 ### Returning as soon as the methylation calls start overlapping
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1419 if ($start-$index+$pos_offset <= $end_read_1) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1420 return;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1421 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1422
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1423 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1424 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1425 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1426 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1427 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1428 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1429 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1430 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1431 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1432 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1433 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1434 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1435 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1436 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1437 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1438 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1439 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1440 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1441 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1442 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1443 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1444 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1445 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1446 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1447 } else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1448 die "The strand orientation was neither + nor -: '$strand'/n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1449 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1450 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1451 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1452
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1453 ### this is the default paired-end procedure allowing overlaps and using every single C position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1454 ### Still within the 2-CONTEXT ONLY optional section
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1455 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1456 ### single-file CpG and non-CpG context output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1457 if ($full) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1458 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1459 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1460
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1461 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1462 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1463 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1464 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1465 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1466 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1467
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1468 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1469 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1470 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1471 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1472 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1473 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1474 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1475 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1476 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1477 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1478 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1479 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1480 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1481 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1482 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1483 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1484 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1485 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1486 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1487 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1488 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1489 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1490 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1491 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1492 } elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1493 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1494
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1495 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1496 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1497 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1498 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1499 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1500 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1501
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1502 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1503 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1504 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1505 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1506 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1507 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1508 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1509 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1510 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1511 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1512 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1513 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1514 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1515 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1516 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1517 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1518 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1519 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1520 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1521 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1522 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1523 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1524 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1525 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1526 } else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1527 die "The strand orientation as neither + nor -: '$strand'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1528 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1529 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1530
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1531 ### strand-specific methylation output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1532 ### still within the 2-CONTEXT optional section
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1533 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1534 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1535 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1536
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1537 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1538 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1539 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1540 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1541 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1542 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1543
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1544 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1545 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1546 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1547 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1548 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1549 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1550 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1551 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1552 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1553 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1554 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1555 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1556 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1557 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1558 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1559 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1560 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1561 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1562 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1563 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1564 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1565 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1566 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1567 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1568 } elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1569 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1570
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1571 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1572 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1573 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1574 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1575 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1576 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1577
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1578 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1579 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1580 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1581 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1582 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1583 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1584 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1585 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1586 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1587 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1588 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1589 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1590 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1591 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1592 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1593 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1594 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1595 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1596 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1597 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1598 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1599 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1600 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1601 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1602 } else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1603 die "The strand orientation as neither + nor -: '$strand'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1604 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1605 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1606 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1607 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1608
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1609 ############################################
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1610 ### THIS IS THE DEFAULT 3-CONTEXT OUTPUT ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1611 ############################################
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1612
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1613 elsif ($no_overlap) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1614 ### single-file CpG, CHG and CHH context output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1615 if ($full) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1616 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1617 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1618
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1619 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1620 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1621 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1622 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1623 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1624 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1625
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1626 ### Returning as soon as the methylation calls start overlapping
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1627 if ($start+$index+$pos_offset >= $end_read_1) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1628 return;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1629 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1630
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1631 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1632 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1633 print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1634 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1635 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1636 print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1637 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1638 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1639 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1640 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1641 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1642 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1643 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1644 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1645 print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1646 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1647 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1648 print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1649 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1650 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1651 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1652 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1653 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1654 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1655 } elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1656 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1657
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1658 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1659 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1660 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1661 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1662 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1663 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1664
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1665 ### Returning as soon as the methylation calls start overlapping
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1666 if ($start-$index+$pos_offset <= $end_read_1) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1667 return;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1668 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1669
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1670 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1671 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1672 print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1673 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1674 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1675 print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1676 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1677 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1678 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1679 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1680 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1681 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1682 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1683 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1684 print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1685 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1686 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1687 print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1688 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1689 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1690 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1691 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1692 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1693 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1694 } else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1695 die "The strand orientation as neither + nor -: '$strand'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1696 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1697 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1698
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1699 ### strand-specific methylation output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1700 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1701 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1702 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1703
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1704 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1705 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1706 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1707 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1708 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1709 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1710
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1711 ### Returning as soon as the methylation calls start overlapping
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1712 if ($start+$index+$pos_offset >= $end_read_1) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1713 return;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1714 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1715
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1716 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1717 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1718 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1719 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1720 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1721 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1722 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1723 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1724 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1725 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1726 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1727 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1728 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1729 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1730 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1731 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1732 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1733 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1734 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1735 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1736 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1737 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1738 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1739 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1740 } elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1741 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1742
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1743 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1744 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1745 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1746 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1747 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1748 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1749
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1750 ### Returning as soon as the methylation calls start overlapping
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1751 if ($start-$index+$pos_offset <= $end_read_1) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1752 return;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1753 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1754
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1755 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1756 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1757 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1758 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1759 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1760 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1761 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1762 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1763 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1764 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1765 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1766 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1767 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1768 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1769 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1770 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1771 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1772 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1773 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1774 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1775 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1776 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1777 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1778 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1779 } else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1780 die "The strand orientation as neither + nor -: '$strand'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1781 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1782 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1783 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1784
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1785 ### this is the default paired-end procedure allowing overlaps and using every single C position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1786 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1787 ### single-file CpG, CHG and CHH context output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1788 if ($full) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1789 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1790 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1791
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1792 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1793 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1794 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1795 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1796 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1797 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1798
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1799 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1800 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1801 print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1802 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1803 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1804 print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1805 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1806 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1807 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1808 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1809 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1810 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1811 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1812 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1813 print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1814 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1815 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1816 print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1817 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1818 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1819 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1820 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1821 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1822 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1823 } elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1824 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1825
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1826 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1827 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1828 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1829 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1830 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1831 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1832
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1833 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1834 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1835 print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1836 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1837 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1838 print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1839 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1840 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1841 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1842 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1843 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1844 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1845 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1846 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1847 print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1848 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1849 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1850 print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1851 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1852 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1853 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1854 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1855 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1856 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1857 } else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1858 die "The strand orientation as neither + nor -: '$strand'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1859 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1860 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1861
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1862 ### strand-specific methylation output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1863 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1864 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1865 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1866
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1867 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1868 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1869 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1870 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1871 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1872 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1873
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1874 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1875 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1876 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1877 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1878 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1879 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1880 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1881 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1882 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1883 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1884 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1885 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1886 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1887 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1888 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1889 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1890 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1891 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1892 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1893 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1894 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1895 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1896 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1897 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1898 } elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1899 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1900
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1901 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1902 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1903 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1904 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1905 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1906 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1907
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1908 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1909 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1910 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1911 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1912 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1913 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1914 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1915 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1916 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1917 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1918 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1919 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1920 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1921 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1922 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1923 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1924 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1925 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1926 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1927 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1928 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1929 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1930 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1931 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1932 } else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1933 die "The strand orientation as neither + nor -: '$strand'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1934 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1935 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1936 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1937 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1938
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1939 sub check_cigar_string {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1940 my ($index,$cigar_offset,$pos_offset,$strand,$comp_cigar) = @_;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1941 # print "$index\t$cigar_offset\t$pos_offset\t$strand\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1942 my ($new_cigar_offset,$new_pos_offset) = (0,0);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1943
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1944 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1945 # print "### $strand strand @$comp_cigar[$index + $cigar_offset]\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1946
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1947 if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1948 # warn "position needs no adjustment\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1949 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1950
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1951 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){ # insertion in the read sequence
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1952 $new_pos_offset -= 1; # we need to subtract the length of inserted bases from the genomic position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1953 # warn "adjusted genomic position by -1 bp (insertion)\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1954 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1955
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1956 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1957 $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1958 $new_pos_offset += 1; # we need to add the length of deleted bases to get the genomic position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1959 # warn "adjusted genomic position by +1 bp (deletion). Now looping through the CIGAR string until we hit another M or I\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1960
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1961 while ( ($index + $cigar_offset + $new_cigar_offset) < (scalar @$comp_cigar) ){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1962 if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1963 # warn "position needs no adjustment\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1964 last;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1965 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1966 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1967 $new_pos_offset -= 1; # we need to subtract the length of inserted bases from the genomic position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1968 # warn "adjusted genomic position by another -1 bp (insertion)\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1969 last;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1970 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1971 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1972 $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1973 $new_pos_offset += 1; # we need to add the length of deleted bases to get the genomic position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1974 # warn "adjusted genomic position by another +1 bp (deletion)\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1975 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1976 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1977 die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1978 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1979 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1980 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1981 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1982 die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1983 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1984 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1985
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1986 elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1987 # print "### $strand strand @$comp_cigar[$index + $cigar_offset]\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1988
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1989 if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1990 # warn "position needs no adjustment\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1991 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1992
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1993 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){ # insertion in the read sequence
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1994 $new_pos_offset += 1; # we need to add the length of inserted bases to the genomic position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1995 # warn "adjusted genomic position by +1 bp (insertion)\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1996 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1997
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1998 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
6479112a673b Uploaded
fcaramia
parents:
diff changeset
1999 $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2000 $new_pos_offset -= 1; # we need to subtract the length of deleted bases to get the genomic position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2001 # warn "adjusted genomic position by -1 bp (deletion). Now looping through the CIGAR string until we hit another M or I\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2002
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2003 while ( ($index + $cigar_offset + $new_cigar_offset) < (scalar @$comp_cigar) ){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2004 if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2005 # warn "Found new 'M' operation; position needs no adjustment\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2006 last;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2007 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2008 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2009 $new_pos_offset += 1; # we need to subtract the length of inserted bases from the genomic position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2010 # warn "Found new 'I' operation; adjusted genomic position by another +1 bp (insertion)\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2011 last;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2012 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2013 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2014 $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2015 $new_pos_offset -= 1; # we need to subtract the length of deleted bases to get the genomic position
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2016 # warn "adjusted genomic position by another -1 bp (deletion)\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2017 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2018 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2019 die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2020 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2021 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2022 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2023 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2024 die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2025 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2026 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2027 # print "new cigar offset: $new_cigar_offset\tnew pos offset: $new_pos_offset\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2028 return ($new_cigar_offset,$new_pos_offset);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2029 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2030
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2031 sub print_individual_C_methylation_states_single_end{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2032
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2033 my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$cigar) = @_;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2034 my @methylation_calls = split(//,$meth_call);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2035
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2036 #################################################################
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2037 ### . for bases not involving cytosines ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2038 ### X for methylated C in CHG context (was protected) ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2039 ### x for not methylated C in CHG context (was converted) ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2040 ### H for methylated C in CHH context (was protected) ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2041 ### h for not methylated C in CHH context (was converted) ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2042 ### Z for methylated C in CpG context (was protected) ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2043 ### z for not methylated C in CpG context (was converted) ###
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2044 #################################################################
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2045
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2046 my $methyl_CHG_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2047 my $methyl_CHH_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2048 my $methyl_CpG_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2049 my $unmethylated_CHG_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2050 my $unmethylated_CHH_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2051 my $unmethylated_CpG_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2052
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2053 my @len;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2054 my @ops;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2055 my $pos_offset = 0; # this is only relevant for SAM reads with insertions or deletions
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2056 my $cigar_offset = 0; # again, this is only relevant for SAM reads containing indels
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2057
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2058 my @comp_cigar;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2059
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2060 if ($cigar){ # parsing CIGAR string
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2061 @len = split (/\D+/,$cigar); # storing the length per operation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2062 @ops = split (/\d+/,$cigar); # storing the operation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2063 shift @ops; # remove the empty first element
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2064 die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2065
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2066 foreach my $index (0..$#len){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2067 foreach (1..$len[$index]){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2068 # print "$ops[$index]";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2069 push @comp_cigar, $ops[$index];
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2070 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2071 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2072 # warn "\nDetected CIGAR string: $cigar\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2073 # warn "Length of methylation call: ",length $meth_call,"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2074 # warn "number of operations: ",scalar @ops,"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2075 # warn "number of length digits: ",scalar @len,"\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2076 # print @comp_cigar,"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2077 # print "$meth_call\n\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2078 # sleep (1);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2079 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2080
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2081 ### adjusting the start position for all reads mapping to the reverse strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2082 if ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2083
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2084 @comp_cigar = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2085 # print @comp_cigar,"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2086
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2087 unless ($ignore){ ### if --ignore was specified the start position has already been corrected
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2088
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2089 if ($cigar){ ### SAM format
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2090 my $MD_count = 0;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2091 foreach (@comp_cigar){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2092 ++$MD_count if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2093 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2094 $start += $MD_count - 1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2095 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2096 else{ ### vanilla format
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2097 $start += length($meth_call)-1;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2098 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2099 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2100 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2101
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2102 ### THIS IS THE CpG and Non-CpG SECTION (OPTIONAL)
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2103
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2104 ### single-file CpG and other-context output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2105 if ($full and $merge_non_CpG) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2106 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2107 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2108
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2109 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2110 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2111 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2112 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2113 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2114 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2115
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2116 ### methylated Cs (any context) will receive a forward (+) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2117 ### not methylated Cs (any context) will receive a reverse (-) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2118 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2119 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2120 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2121 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2122 elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2123 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2124 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2125 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2126 elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2127 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2128 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2129 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2130 elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2131 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2132 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2133 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2134 elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2135 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2136 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2137 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2138 elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2139 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2140 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2141 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2142 elsif ($methylation_calls[$index] eq '.') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2143 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2144 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2145 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2146 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2147 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2148 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2149 elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2150
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2151 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2152 ### methylated Cs (any context) will receive a forward (+) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2153 ### not methylated Cs (any context) will receive a reverse (-) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2154
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2155 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2156 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2157 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2158 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2159 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2160 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2161
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2162 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2163 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2164 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2165 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2166 elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2167 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2168 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2169 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2170 elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2171 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2172 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2173 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2174 elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2175 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2176 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2177 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2178 elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2179 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2180 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2181 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2182 elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2183 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2184 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2185 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2186 elsif ($methylation_calls[$index] eq '.'){
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2187 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2188 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2189 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2190 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2191 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2192 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2193 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2194 die "The strand information was neither + nor -: $strand\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2195 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2196 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2197
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2198 ### strand-specific methylation output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2199 elsif ($merge_non_CpG) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2200 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2201 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2202 ### methylated Cs (any context) will receive a forward (+) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2203 ### not methylated Cs (any context) will receive a reverse (-) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2204
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2205 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2206 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2207 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2208 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2209 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2210
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2211 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2212 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2213 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2214 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2215 elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2216 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2217 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2218 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2219 elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2220 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2221 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2222 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2223 elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2224 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2225 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2226 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2227 elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2228 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2229 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2230 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2231 elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2232 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2233 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2234 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2235 elsif ($methylation_calls[$index] eq '.') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2236 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2237 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2238 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2239 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2240 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2241 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2242 elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2243
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2244 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2245 ### methylated Cs (any context) will receive a forward (+) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2246 ### not methylated Cs (any context) will receive a reverse (-) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2247
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2248 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2249 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2250 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2251 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2252 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2253
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2254 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2255 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2256 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2257 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2258 elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2259 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2260 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2261 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2262 elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2263 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2264 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2265 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2266 elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2267 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2268 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2269 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2270 elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2271 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2272 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2273 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2274 elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2275 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2276 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2277 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2278 elsif ($methylation_calls[$index] eq '.') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2279 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2280 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2281 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2282 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2283 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2284 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2285 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2286 die "The strand information was neither + nor -: $strand\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2287 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2288 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2289
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2290 ### THIS IS THE 3-CONTEXT (CpG, CHG and CHH) DEFAULT SECTION
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2291
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2292 elsif ($full) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2293 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2294 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2295 ### methylated Cs (any context) will receive a forward (+) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2296 ### not methylated Cs (any context) will receive a reverse (-) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2297
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2298 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2299 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2300 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2301 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2302 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2303
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2304 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2305 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2306 print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2307 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2308 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2309 print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2310 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2311 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2312 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2313 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2314 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2315 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2316 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2317 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2318 print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2319 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2320 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2321 print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2322 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2323 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2324 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2325 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2326 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2327 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2328 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2329 elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2330
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2331 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2332 ### methylated Cs (any context) will receive a forward (+) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2333 ### not methylated Cs (any context) will receive a reverse (-) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2334
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2335 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2336 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2337 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2338 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2339 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2340
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2341 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2342 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2343 print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2344 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2345 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2346 print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2347 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2348 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2349 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2350 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2351 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2352 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2353 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2354 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2355 print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2356 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2357 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2358 print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2359 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2360 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2361 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2362 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2363 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2364 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2365 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2366 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2367 die "The read had a strand orientation which was neither + nor -: $strand\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2368 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2369 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2370
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2371 ### strand-specific methylation output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2372 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2373 if ($strand eq '+') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2374 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2375 ### methylated Cs (any context) will receive a forward (+) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2376 ### not methylated Cs (any context) will receive a reverse (-) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2377
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2378 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2379 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2380 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2381 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2382 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2383
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2384 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2385 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2386 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2387 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2388 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2389 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2390 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2391 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2392 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2393 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2394 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2395 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2396 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2397 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2398 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2399 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2400 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2401 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2402 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2403 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2404 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2405 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2406 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2407 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2408 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2409 elsif ($strand eq '-') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2410
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2411 for my $index (0..$#methylation_calls) {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2412 ### methylated Cs (any context) will receive a forward (+) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2413 ### not methylated Cs (any context) will receive a reverse (-) orientation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2414
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2415 if ($cigar){ # only needed for SAM files
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2416 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2417 $cigar_offset += $cigar_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2418 $pos_offset += $pos_mod;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2419 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2420
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2421 if ($methylation_calls[$index] eq 'X') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2422 $counting{total_meCHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2423 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2424 } elsif ($methylation_calls[$index] eq 'x') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2425 $counting{total_unmethylated_CHG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2426 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2427 } elsif ($methylation_calls[$index] eq 'Z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2428 $counting{total_meCpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2429 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2430 } elsif ($methylation_calls[$index] eq 'z') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2431 $counting{total_unmethylated_CpG_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2432 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2433 } elsif ($methylation_calls[$index] eq 'H') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2434 $counting{total_meCHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2435 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2436 } elsif ($methylation_calls[$index] eq 'h') {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2437 $counting{total_unmethylated_CHH_count}++;
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2438 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2439 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2440 elsif ($methylation_calls[$index] eq '.') {}
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2441 else{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2442 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2443 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2444 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2445 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2446 else {
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2447 die "The strand information was neither + nor -: $strand\n";
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2448 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2449 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2450 }
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2451
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2452 sub print_helpfile{
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2453
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2454 print << 'HOW_TO';
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2455
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2456
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2457 DESCRIPTION
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2458
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2459 The following is a brief description of all options to control the Bismark
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2460 methylation extractor. The script reads in a bisulfite read alignment results file
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2461 produced by the Bismark bisulfite mapper and extracts the methylation information
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2462 for individual cytosines. This information is found in the methylation call field
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2463 which can contain the following characters:
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2464
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2465 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2466 ~~~ X for methylated C in CHG context (was protected) ~~~
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2467 ~~~ x for not methylated C CHG (was converted) ~~~
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2468 ~~~ H for methylated C in CHH context (was protected) ~~~
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2469 ~~~ h for not methylated C in CHH context (was converted) ~~~
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2470 ~~~ Z for methylated C in CpG context (was protected) ~~~
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2471 ~~~ z for not methylated C in CpG context (was converted) ~~~
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2472 ~~~ . for any bases not involving cytosines ~~~
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2473 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2474
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2475 The methylation extractor outputs result files for cytosines in CpG, CHG and CHH
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2476 context (this distinction is actually already made in Bismark itself). As the methylation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2477 information for every C analysed can produce files which easily have tens or even hundreds of
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2478 millions of lines, file sizes can become very large and more difficult to handle. The C
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2479 methylation info additionally splits cytosine methylation calls up into one of the four possible
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2480 strands a given bisulfite read aligned against:
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2481
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2482 OT original top strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2483 CTOT complementary to original top strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2484
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2485 OB original bottom strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2486 CTOB complementary to original bottom strand
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2487
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2488 Thus, by default twelve individual output files are being generated per input file (unless
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2489 --comprehensive is specified, see below). The output files can be imported into a genome
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2490 viewer, such as SeqMonk, and re-combined into a single data group if desired (in fact
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2491 unless the bisulfite reads were generated preserving directionality it doesn't make any
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2492 sense to look at the data in a strand-specific manner). Strand-specific output files can
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2493 optionally be skipped, in which case only three output files for CpG, CHG or CHH context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2494 will be generated. For both the strand-specific and comprehensive outputs there is also
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2495 the option to merge both non-CpG contexts (CHG and CHH) into one single non-CpG context.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2496
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2497
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2498 The output files are in the following format (tab delimited):
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2499
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2500 <sequence_id> <strand> <chromosome> <position> <methylation call>
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2501
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2502
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2503 USAGE: methylation_extractor [options] <filenames>
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2504
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2505
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2506 ARGUMENTS:
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2507
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2508 <filenames> A space-separated list of Bismark result files in SAM format from
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2509 which methylation information is extracted for every cytosine in
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2510 the reads. For alignment files in the older custom Bismark output
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2511 see option '--vanilla'.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2512
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2513 OPTIONS:
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2514
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2515 -s/--single-end Input file(s) are Bismark result file(s) generated from single-end
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2516 read data. Specifying either --single-end or --paired-end is
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2517 mandatory.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2518
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2519 -p/--paired-end Input file(s) are Bismark result file(s) generated from paired-end
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2520 read data. Specifying either --paired-end or --single-end is
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2521 mandatory.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2522
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2523 --vanilla The Bismark result input file(s) are in the old custom Bismark format
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2524 (up to version 0.5.x) and not in SAM format which is the default as
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2525 of Bismark version 0.6.x or higher. Default: OFF.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2526
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2527 --no_overlap For paired-end reads it is theoretically possible that read_1 and
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2528 read_2 overlap. This option avoids scoring overlapping methylation
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2529 calls twice. Whilst this removes a bias towards more methylation calls
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2530 towards the center of sequenced fragments it can de facto remove
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2531 a good proportion of the data.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2532
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2533 --ignore <int> Ignore the first <int> bp at the 5' end of each read when processing the
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2534 methylation call string. This can remove e.g. a restriction enzyme site
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2535 at the start of each read.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2536
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2537 --comprehensive Specifying this option will merge all four possible strand-specific
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2538 methylation info into context-dependent output files. The default
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2539 contexts are:
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2540 - CpG context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2541 - CHG context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2542 - CHH context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2543
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2544 --merge_non_CpG This will produce two output files (in --comprehensive mode) or eight
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2545 strand-specific output files (default) for Cs in
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2546 - CpG context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2547 - non-CpG context
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2548
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2549 --report Prints out a short methylation summary as well as the paramaters used to run
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2550 this script.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2551
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2552 --version Displays version information.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2553
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2554 -h/--help Displays this help file and exits.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2555
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2556
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2557 OUTPUT:
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2558
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2559 The output is in the form:
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2560
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2561 <seq-ID> <methylation state*> <chromosome> <start position (= end position)> <methylation call>
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2562
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2563 * Methylated cytosines receive a '+' orientation,
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2564 * Unmethylated cytosines receive a '-' orientation.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2565
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2566
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2567 This script was last modified on 31 July 2012.
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2568
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2569 HOW_TO
6479112a673b Uploaded
fcaramia
parents:
diff changeset
2570 }