32
|
1 #!/usr/bin/perl
|
|
2 use Getopt::Long;
|
|
3 use Cwd;
|
|
4
|
|
5 my $filename;
|
|
6 my $parent_dir = getcwd();
|
|
7
|
|
8 my %fhs;
|
|
9 my %bases_cov;
|
|
10 my %bases_meth;
|
|
11 my ($cov_cutoff,$cov_file, $meth_file, $context, $cov_out, $meth_out) = process_commandline();
|
|
12
|
|
13 process_results_file();
|
|
14
|
|
15 sub process_commandline{
|
|
16 my $cov_cutoff=1;
|
|
17 my $cov_file=0;
|
|
18 my $meth_file=0;
|
|
19 my $context="cpg,chh,chg";
|
|
20 #my $chrom_out;
|
|
21
|
|
22 # Files to extract:
|
|
23 # c -> Coverage
|
|
24 # m -> Methylation
|
|
25 # b -> Coverage + Methylation
|
|
26
|
|
27 my $files_to_extract;
|
|
28 my $command_line = GetOptions ( 'cutoff=i' => \$cov_cutoff,
|
|
29 'e|extract=s' => \$files_to_extract,
|
|
30 'context=s' => \$context,
|
|
31 #'chrom=s' => \$chrom_out,
|
|
32 'cov_out=s' => \$cov_out,
|
|
33 'meth_out=s' => \$meth_out);
|
|
34
|
|
35
|
|
36 ### EXIT ON ERROR if there are errors with any of the supplied options
|
|
37 unless ($command_line){
|
|
38 die "Please respecify command line options\n";
|
|
39 }
|
|
40
|
|
41 ### no files provided
|
|
42 unless (@ARGV){
|
|
43 die "You need to provide a file to parse!\n";
|
|
44 }
|
|
45 $filename = $ARGV[0];
|
|
46
|
|
47 ### no files to extract specified
|
|
48 unless ($files_to_extract){
|
|
49 die "You need to specify the file you want to extract!\n";
|
|
50 }
|
|
51
|
|
52 if ($files_to_extract eq "c" or $files_to_extract eq "b"){
|
|
53 $cov_file=1;
|
|
54 }
|
|
55 if ($files_to_extract eq "m" or $files_to_extract eq "b"){
|
|
56 $meth_file=1;
|
|
57 }
|
|
58 return ($cov_cutoff,$cov_file, $meth_file, $context, $cov_out, $meth_out);
|
|
59 }
|
|
60
|
|
61 sub process_results_file{
|
|
62 %fhs = ();
|
|
63 #if (defined($chrom_out)){
|
|
64 # $chrom_out = "chr".$chrom_out;
|
|
65 # print "\n$chrom_out\n";
|
|
66 #}
|
|
67 warn "\nNow reading in input file $filename\n\n";
|
|
68 open (IN,$filename) or die "Can't open file $filename: $!\n";
|
|
69
|
|
70 my($dir, $output_filename, $extension) = $filename =~ m/(.*\/)(.*)(\..*)$/;
|
|
71
|
|
72 ### OPENING OUTPUT-FILEHANDLES
|
|
73 my $wig_cov = my $wig_meth = $output_filename;
|
|
74 if ($cov_file == 1){
|
|
75 #$wig_cov =~ s/^/Coverage_/;
|
|
76 #$wig_cov = $dir."/".$wig_cov.".wig";
|
|
77 open ($fhs{wig_cov},'>',$cov_out) or die "Failed to write to $cov_out $!\n";
|
|
78 printf {$fhs{wig_cov}} ('track name="'.$output_filename.'" description="coverage per base" visibility=3 type=wiggle_0 color=50,205,50'."\n");
|
|
79 }
|
|
80 if ($meth_file == 1){
|
|
81 $wig_meth =~ s/^/Methylation_/;
|
|
82 $wig_meth = $dir."/".$wig_meth.".wig";
|
|
83 open ($fhs{wig_meth},'>',$meth_out) or die "Failed to write to $meth_out $!\n";
|
|
84 printf {$fhs{wig_meth}} ('track name="'.$output_filename.'" description="percentage methylation" visibility=3 type=wiggle_0 color=50,205,50'."\n");
|
|
85 }
|
|
86
|
|
87
|
|
88 while (<IN>){
|
|
89 next if $. == 1;
|
|
90 my ($chrom,$start,$strand,$cont,$coverage,$percentage_meth) = (split("\t"))[0,1,2,3,4,6];
|
|
91 $chrom = "\L$chrom";
|
|
92 if ("\U$chrom" !~ /CHR.*/){
|
|
93 $chrom = "chr".$chrom;
|
|
94 }
|
|
95 next if $coverage < $cov_cutoff;
|
|
96 #print "\n$chrom:$chrom_out\n";
|
|
97 next if (defined($chrom_out) and ($chrom ne $chrom_out));
|
|
98 #print "\n2\n";
|
|
99 next if not ("\U$context" =~ $cont);
|
|
100 #print "\n3\n";
|
|
101
|
|
102 if (defined($last_chrom) and $last_chrom ne $chrom){
|
|
103 write_files($last_chrom);
|
|
104 }
|
|
105 if ($cov_file == 1){
|
|
106 $bases_cov{$start}=$strand.$coverage;
|
|
107 }
|
|
108
|
|
109 if ($meth_file == 1){
|
|
110 $bases_meth{$start}=$strand.$percentage_meth;
|
|
111 }
|
|
112
|
|
113 $last_chrom = $chrom;
|
|
114 }
|
|
115 write_files($last_chrom);
|
|
116 }
|
|
117
|
|
118
|
|
119
|
|
120 sub write_files(){
|
|
121 my ($chrom) = @_;
|
|
122 # modify chromosome name, if not starting with "chr.."
|
|
123
|
|
124
|
|
125 if ($cov_file == 1){
|
|
126 printf {$fhs{wig_cov}} ("variableStep chrom=".$chrom." span=1\n");
|
|
127 for my $k1 (sort { $a <=> $b } keys %bases_cov){
|
|
128 printf {$fhs{wig_cov}} ("%u\t%d\n",$k1, $bases_cov{$k1});
|
|
129 }
|
|
130 %bases_cov =();
|
|
131 }
|
|
132
|
|
133 if ($meth_file == 1){
|
|
134 printf {$fhs{wig_meth}} ("variableStep chrom=".$chrom." span=1\n");
|
|
135 for my $k1 (sort { $a <=> $b } keys %bases_meth){
|
|
136 printf {$fhs{wig_meth}} ("%u\t%.2f\n",$k1, $bases_meth{$k1});
|
|
137 }
|
|
138 %bases_meth =();
|
|
139 }
|
|
140 }
|
|
141
|
|
142
|
|
143
|