0
|
1 #!/usr/bin/perl
|
|
2 # Compute gEUMA for each gene from the gUtable file and insertlendis file and print out only genes whose gEUMA is larger than 0.
|
|
3
|
|
4 if(@ARGV<3) { print "usage: $0 gUtable_file insertlendis_file READLENGTH\n"; exit; }
|
|
5
|
|
6 my ($gUtable_file,$insertlendis_file,$READLENGTH) = @ARGV;
|
|
7
|
|
8 open GUTABLE, $gUtable_file or die "Can't open gUtable file $gUtable_file\n";
|
|
9 chomp($header = <GUTABLE>);
|
|
10 @distances = split/\t/,$header;
|
|
11 shift @distances; #'gene'
|
|
12 while(<GUTABLE>){
|
|
13 chomp;
|
|
14 my @line = split/\t/;
|
|
15 my $gene = shift @line;
|
|
16 for my $i (0..$#distances){
|
|
17 $gUtable{$gene}{"$distances[$i]"}=$line[$i];
|
|
18 }
|
|
19 }
|
|
20 close GUTABLE;
|
|
21
|
|
22
|
|
23 my $totalfreq=0;
|
|
24 open LENDIS, $insertlendis_file or die "Can't open insertlendis file $insertlendis_file\n";
|
|
25 while(<LENDIS>){
|
|
26 chomp;
|
|
27 my ($insert_size, $freq) = split/\t/;
|
|
28 $insertlendis{"$insert_size"}=$freq;
|
|
29 $totalfreq+=$freq;
|
|
30 }
|
|
31 close LENDIS;
|
|
32
|
|
33 print "gene\tgEUMA\n"; #header
|
|
34 for my $gene (sort keys %gUtable){
|
|
35 my $EUMA=0;
|
|
36 for my $d (keys %{$gUtable{$gene}}){
|
|
37 $pUMA = $gUtable{$gene}{$d} * ($insertlendis{$d}/$totalfreq) ;
|
|
38 $EUMA += $pUMA;
|
|
39 }
|
|
40 if($EUMA>=0.005) { printf "$gene\t%.2f\n",$EUMA; } # use >=0.005 instead of >0, because I'm plotting two decimals after point. (in bp)
|
|
41 }
|
|
42
|