annotate wig_extractor.pl @ 32:117639df067a draft

Uploaded
author eiriche
date Mon, 03 Dec 2012 03:10:23 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
32
117639df067a Uploaded
eiriche
parents:
diff changeset
1 #!/usr/bin/perl
117639df067a Uploaded
eiriche
parents:
diff changeset
2 use Getopt::Long;
117639df067a Uploaded
eiriche
parents:
diff changeset
3 use Cwd;
117639df067a Uploaded
eiriche
parents:
diff changeset
4
117639df067a Uploaded
eiriche
parents:
diff changeset
5 my $filename;
117639df067a Uploaded
eiriche
parents:
diff changeset
6 my $parent_dir = getcwd();
117639df067a Uploaded
eiriche
parents:
diff changeset
7
117639df067a Uploaded
eiriche
parents:
diff changeset
8 my %fhs;
117639df067a Uploaded
eiriche
parents:
diff changeset
9 my %bases_cov;
117639df067a Uploaded
eiriche
parents:
diff changeset
10 my %bases_meth;
117639df067a Uploaded
eiriche
parents:
diff changeset
11 my ($cov_cutoff,$cov_file, $meth_file, $context, $cov_out, $meth_out) = process_commandline();
117639df067a Uploaded
eiriche
parents:
diff changeset
12
117639df067a Uploaded
eiriche
parents:
diff changeset
13 process_results_file();
117639df067a Uploaded
eiriche
parents:
diff changeset
14
117639df067a Uploaded
eiriche
parents:
diff changeset
15 sub process_commandline{
117639df067a Uploaded
eiriche
parents:
diff changeset
16 my $cov_cutoff=1;
117639df067a Uploaded
eiriche
parents:
diff changeset
17 my $cov_file=0;
117639df067a Uploaded
eiriche
parents:
diff changeset
18 my $meth_file=0;
117639df067a Uploaded
eiriche
parents:
diff changeset
19 my $context="cpg,chh,chg";
117639df067a Uploaded
eiriche
parents:
diff changeset
20 #my $chrom_out;
117639df067a Uploaded
eiriche
parents:
diff changeset
21
117639df067a Uploaded
eiriche
parents:
diff changeset
22 # Files to extract:
117639df067a Uploaded
eiriche
parents:
diff changeset
23 # c -> Coverage
117639df067a Uploaded
eiriche
parents:
diff changeset
24 # m -> Methylation
117639df067a Uploaded
eiriche
parents:
diff changeset
25 # b -> Coverage + Methylation
117639df067a Uploaded
eiriche
parents:
diff changeset
26
117639df067a Uploaded
eiriche
parents:
diff changeset
27 my $files_to_extract;
117639df067a Uploaded
eiriche
parents:
diff changeset
28 my $command_line = GetOptions ( 'cutoff=i' => \$cov_cutoff,
117639df067a Uploaded
eiriche
parents:
diff changeset
29 'e|extract=s' => \$files_to_extract,
117639df067a Uploaded
eiriche
parents:
diff changeset
30 'context=s' => \$context,
117639df067a Uploaded
eiriche
parents:
diff changeset
31 #'chrom=s' => \$chrom_out,
117639df067a Uploaded
eiriche
parents:
diff changeset
32 'cov_out=s' => \$cov_out,
117639df067a Uploaded
eiriche
parents:
diff changeset
33 'meth_out=s' => \$meth_out);
117639df067a Uploaded
eiriche
parents:
diff changeset
34
117639df067a Uploaded
eiriche
parents:
diff changeset
35
117639df067a Uploaded
eiriche
parents:
diff changeset
36 ### EXIT ON ERROR if there are errors with any of the supplied options
117639df067a Uploaded
eiriche
parents:
diff changeset
37 unless ($command_line){
117639df067a Uploaded
eiriche
parents:
diff changeset
38 die "Please respecify command line options\n";
117639df067a Uploaded
eiriche
parents:
diff changeset
39 }
117639df067a Uploaded
eiriche
parents:
diff changeset
40
117639df067a Uploaded
eiriche
parents:
diff changeset
41 ### no files provided
117639df067a Uploaded
eiriche
parents:
diff changeset
42 unless (@ARGV){
117639df067a Uploaded
eiriche
parents:
diff changeset
43 die "You need to provide a file to parse!\n";
117639df067a Uploaded
eiriche
parents:
diff changeset
44 }
117639df067a Uploaded
eiriche
parents:
diff changeset
45 $filename = $ARGV[0];
117639df067a Uploaded
eiriche
parents:
diff changeset
46
117639df067a Uploaded
eiriche
parents:
diff changeset
47 ### no files to extract specified
117639df067a Uploaded
eiriche
parents:
diff changeset
48 unless ($files_to_extract){
117639df067a Uploaded
eiriche
parents:
diff changeset
49 die "You need to specify the file you want to extract!\n";
117639df067a Uploaded
eiriche
parents:
diff changeset
50 }
117639df067a Uploaded
eiriche
parents:
diff changeset
51
117639df067a Uploaded
eiriche
parents:
diff changeset
52 if ($files_to_extract eq "c" or $files_to_extract eq "b"){
117639df067a Uploaded
eiriche
parents:
diff changeset
53 $cov_file=1;
117639df067a Uploaded
eiriche
parents:
diff changeset
54 }
117639df067a Uploaded
eiriche
parents:
diff changeset
55 if ($files_to_extract eq "m" or $files_to_extract eq "b"){
117639df067a Uploaded
eiriche
parents:
diff changeset
56 $meth_file=1;
117639df067a Uploaded
eiriche
parents:
diff changeset
57 }
117639df067a Uploaded
eiriche
parents:
diff changeset
58 return ($cov_cutoff,$cov_file, $meth_file, $context, $cov_out, $meth_out);
117639df067a Uploaded
eiriche
parents:
diff changeset
59 }
117639df067a Uploaded
eiriche
parents:
diff changeset
60
117639df067a Uploaded
eiriche
parents:
diff changeset
61 sub process_results_file{
117639df067a Uploaded
eiriche
parents:
diff changeset
62 %fhs = ();
117639df067a Uploaded
eiriche
parents:
diff changeset
63 #if (defined($chrom_out)){
117639df067a Uploaded
eiriche
parents:
diff changeset
64 # $chrom_out = "chr".$chrom_out;
117639df067a Uploaded
eiriche
parents:
diff changeset
65 # print "\n$chrom_out\n";
117639df067a Uploaded
eiriche
parents:
diff changeset
66 #}
117639df067a Uploaded
eiriche
parents:
diff changeset
67 warn "\nNow reading in input file $filename\n\n";
117639df067a Uploaded
eiriche
parents:
diff changeset
68 open (IN,$filename) or die "Can't open file $filename: $!\n";
117639df067a Uploaded
eiriche
parents:
diff changeset
69
117639df067a Uploaded
eiriche
parents:
diff changeset
70 my($dir, $output_filename, $extension) = $filename =~ m/(.*\/)(.*)(\..*)$/;
117639df067a Uploaded
eiriche
parents:
diff changeset
71
117639df067a Uploaded
eiriche
parents:
diff changeset
72 ### OPENING OUTPUT-FILEHANDLES
117639df067a Uploaded
eiriche
parents:
diff changeset
73 my $wig_cov = my $wig_meth = $output_filename;
117639df067a Uploaded
eiriche
parents:
diff changeset
74 if ($cov_file == 1){
117639df067a Uploaded
eiriche
parents:
diff changeset
75 #$wig_cov =~ s/^/Coverage_/;
117639df067a Uploaded
eiriche
parents:
diff changeset
76 #$wig_cov = $dir."/".$wig_cov.".wig";
117639df067a Uploaded
eiriche
parents:
diff changeset
77 open ($fhs{wig_cov},'>',$cov_out) or die "Failed to write to $cov_out $!\n";
117639df067a Uploaded
eiriche
parents:
diff changeset
78 printf {$fhs{wig_cov}} ('track name="'.$output_filename.'" description="coverage per base" visibility=3 type=wiggle_0 color=50,205,50'."\n");
117639df067a Uploaded
eiriche
parents:
diff changeset
79 }
117639df067a Uploaded
eiriche
parents:
diff changeset
80 if ($meth_file == 1){
117639df067a Uploaded
eiriche
parents:
diff changeset
81 $wig_meth =~ s/^/Methylation_/;
117639df067a Uploaded
eiriche
parents:
diff changeset
82 $wig_meth = $dir."/".$wig_meth.".wig";
117639df067a Uploaded
eiriche
parents:
diff changeset
83 open ($fhs{wig_meth},'>',$meth_out) or die "Failed to write to $meth_out $!\n";
117639df067a Uploaded
eiriche
parents:
diff changeset
84 printf {$fhs{wig_meth}} ('track name="'.$output_filename.'" description="percentage methylation" visibility=3 type=wiggle_0 color=50,205,50'."\n");
117639df067a Uploaded
eiriche
parents:
diff changeset
85 }
117639df067a Uploaded
eiriche
parents:
diff changeset
86
117639df067a Uploaded
eiriche
parents:
diff changeset
87
117639df067a Uploaded
eiriche
parents:
diff changeset
88 while (<IN>){
117639df067a Uploaded
eiriche
parents:
diff changeset
89 next if $. == 1;
117639df067a Uploaded
eiriche
parents:
diff changeset
90 my ($chrom,$start,$strand,$cont,$coverage,$percentage_meth) = (split("\t"))[0,1,2,3,4,6];
117639df067a Uploaded
eiriche
parents:
diff changeset
91 $chrom = "\L$chrom";
117639df067a Uploaded
eiriche
parents:
diff changeset
92 if ("\U$chrom" !~ /CHR.*/){
117639df067a Uploaded
eiriche
parents:
diff changeset
93 $chrom = "chr".$chrom;
117639df067a Uploaded
eiriche
parents:
diff changeset
94 }
117639df067a Uploaded
eiriche
parents:
diff changeset
95 next if $coverage < $cov_cutoff;
117639df067a Uploaded
eiriche
parents:
diff changeset
96 #print "\n$chrom:$chrom_out\n";
117639df067a Uploaded
eiriche
parents:
diff changeset
97 next if (defined($chrom_out) and ($chrom ne $chrom_out));
117639df067a Uploaded
eiriche
parents:
diff changeset
98 #print "\n2\n";
117639df067a Uploaded
eiriche
parents:
diff changeset
99 next if not ("\U$context" =~ $cont);
117639df067a Uploaded
eiriche
parents:
diff changeset
100 #print "\n3\n";
117639df067a Uploaded
eiriche
parents:
diff changeset
101
117639df067a Uploaded
eiriche
parents:
diff changeset
102 if (defined($last_chrom) and $last_chrom ne $chrom){
117639df067a Uploaded
eiriche
parents:
diff changeset
103 write_files($last_chrom);
117639df067a Uploaded
eiriche
parents:
diff changeset
104 }
117639df067a Uploaded
eiriche
parents:
diff changeset
105 if ($cov_file == 1){
117639df067a Uploaded
eiriche
parents:
diff changeset
106 $bases_cov{$start}=$strand.$coverage;
117639df067a Uploaded
eiriche
parents:
diff changeset
107 }
117639df067a Uploaded
eiriche
parents:
diff changeset
108
117639df067a Uploaded
eiriche
parents:
diff changeset
109 if ($meth_file == 1){
117639df067a Uploaded
eiriche
parents:
diff changeset
110 $bases_meth{$start}=$strand.$percentage_meth;
117639df067a Uploaded
eiriche
parents:
diff changeset
111 }
117639df067a Uploaded
eiriche
parents:
diff changeset
112
117639df067a Uploaded
eiriche
parents:
diff changeset
113 $last_chrom = $chrom;
117639df067a Uploaded
eiriche
parents:
diff changeset
114 }
117639df067a Uploaded
eiriche
parents:
diff changeset
115 write_files($last_chrom);
117639df067a Uploaded
eiriche
parents:
diff changeset
116 }
117639df067a Uploaded
eiriche
parents:
diff changeset
117
117639df067a Uploaded
eiriche
parents:
diff changeset
118
117639df067a Uploaded
eiriche
parents:
diff changeset
119
117639df067a Uploaded
eiriche
parents:
diff changeset
120 sub write_files(){
117639df067a Uploaded
eiriche
parents:
diff changeset
121 my ($chrom) = @_;
117639df067a Uploaded
eiriche
parents:
diff changeset
122 # modify chromosome name, if not starting with "chr.."
117639df067a Uploaded
eiriche
parents:
diff changeset
123
117639df067a Uploaded
eiriche
parents:
diff changeset
124
117639df067a Uploaded
eiriche
parents:
diff changeset
125 if ($cov_file == 1){
117639df067a Uploaded
eiriche
parents:
diff changeset
126 printf {$fhs{wig_cov}} ("variableStep chrom=".$chrom." span=1\n");
117639df067a Uploaded
eiriche
parents:
diff changeset
127 for my $k1 (sort { $a <=> $b } keys %bases_cov){
117639df067a Uploaded
eiriche
parents:
diff changeset
128 printf {$fhs{wig_cov}} ("%u\t%d\n",$k1, $bases_cov{$k1});
117639df067a Uploaded
eiriche
parents:
diff changeset
129 }
117639df067a Uploaded
eiriche
parents:
diff changeset
130 %bases_cov =();
117639df067a Uploaded
eiriche
parents:
diff changeset
131 }
117639df067a Uploaded
eiriche
parents:
diff changeset
132
117639df067a Uploaded
eiriche
parents:
diff changeset
133 if ($meth_file == 1){
117639df067a Uploaded
eiriche
parents:
diff changeset
134 printf {$fhs{wig_meth}} ("variableStep chrom=".$chrom." span=1\n");
117639df067a Uploaded
eiriche
parents:
diff changeset
135 for my $k1 (sort { $a <=> $b } keys %bases_meth){
117639df067a Uploaded
eiriche
parents:
diff changeset
136 printf {$fhs{wig_meth}} ("%u\t%.2f\n",$k1, $bases_meth{$k1});
117639df067a Uploaded
eiriche
parents:
diff changeset
137 }
117639df067a Uploaded
eiriche
parents:
diff changeset
138 %bases_meth =();
117639df067a Uploaded
eiriche
parents:
diff changeset
139 }
117639df067a Uploaded
eiriche
parents:
diff changeset
140 }
117639df067a Uploaded
eiriche
parents:
diff changeset
141
117639df067a Uploaded
eiriche
parents:
diff changeset
142
117639df067a Uploaded
eiriche
parents:
diff changeset
143