0
|
1 #!/usr/bin/perl
|
|
2
|
|
3 if(@ARGV<6) { print "usage: $0 final.gFVKM_file mapping_stat_file gene2symbol_file factor_col sample_name gLVKM_file\n"; exit; }
|
|
4
|
|
5 my ($gFVKM_file,$mapping_stat_file,$gene2symbol_file,$factor_col,$sample_name,$gLVKM_file) = @ARGV;
|
|
6
|
|
7 open STAT, $mapping_stat_file or die "Can't open mapping stat file $mapping_stat_file\n";
|
|
8 <STAT>; ##discard header
|
|
9 while(<STAT>){
|
|
10 chomp;
|
|
11 next if !/\S/;
|
|
12 my ($sample,$factor) = (split/\t/)[0,$factor_col];
|
|
13 $norm_factor1{$sample} = $factor/1000000;
|
|
14 if($factor==0) { die "Error: Total reads is zero. Please check datatype option. Use option 'E' if your reference doesn't contain 'NM' prefix for mRNA.\n"; }
|
|
15 }
|
|
16 close STAT;
|
|
17
|
|
18
|
|
19 open GENE2SYM, $gene2symbol_file or die "can't open gene2symbol file $gene2symbol_file\n";
|
|
20 while(<GENE2SYM>){
|
|
21 chomp;
|
|
22 split/\t/;
|
|
23 $gene2symbol{$_[0]}=$_[1];
|
|
24 }
|
|
25 close GENE2SYM;
|
|
26
|
|
27
|
|
28 $"="\t";
|
|
29 open OUT1, ">$gLVKM_file" or die "Can't write to norm1outfile $gLVKM_file\n";
|
|
30 print OUT1 "gene.id\tgene.symbol\tgLVKM\tgFVKM\tfinal.gFVK\torig.gFVK\tderived.gFVK\tgEUMA\t#isoforms\t#measured_isoforms\n";
|
|
31
|
|
32 open GFVKM, $gFVKM_file or die "Can't open gFVKM file $gFVKM_file\n";
|
|
33 <GFVKM>; #discard header
|
|
34 while(<GFVKM>){
|
|
35 chomp;
|
|
36 my @line = split/\t/;
|
|
37 my $gene_id = shift @line;
|
|
38 my $gene_symbol = $gene2symbol{$gene_id};
|
|
39 $norm1=$line[0]/$norm_factor1{$sample_name};
|
|
40 $norm2=log($norm1+1)/log(2);
|
|
41 printf OUT1 "$gene_id\t$gene_symbol\t%.3f\t%.3f\t@line\n",$norm2,$norm1,;
|
|
42 }
|
|
43 close GFVKM;
|
|
44
|
|
45
|
|
46 close OUT1;
|
|
47
|
|
48
|
|
49
|