annotate NEUMA-1.2.1/normalize_iFVKM_6.pl @ 0:c44c43d185ef draft default tip

NEUMA-1.2.1 Uploaded
author chawhwa
date Thu, 08 Aug 2013 00:46:13 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
1 #!/usr/bin/perl
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
2
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
3 if(@ARGV<6) { print "usage: $0 final.iFVKM_file mapping_stat_file gene2symbol_file factor_col sample_name iLVKM_file\n"; exit; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
4 my ($iFVKM_file,$mapping_stat_file,$gene2symbol_file,$factor_col,$sample_name,$iLVKM_file) = @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
5
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
6 open STAT, $mapping_stat_file or die "Can't open mapping stat file $mapping_stat_file\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
7 <STAT>; #discard header
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
8 while(<STAT>){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
9 chomp;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
10 next if !/\S/;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
11 my ($sample,$factor) = (split/\t/)[0,$factor_col];
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
12 $norm_factor1{$sample} = $factor/1000000;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
13 if($factor==0) { die "Error: Total reads is zero. Please check datatype option. Use option 'E' if your reference doesn't contain 'NM' prefix for mRNA.\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
14
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
15 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
16 close STAT;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
17
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
18 open GENE2SYM, $gene2symbol_file or die "can't open gene2symbol file $gene2symbol_file\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
19 while(<GENE2SYM>){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
20 chomp;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
21 split/\t/;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
22 $gene2symbol{$_[0]}=$_[1];
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
23 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
24 close GENE2SYM;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
25
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
26
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
27 $"="\t";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
28 open OUT1, ">$iLVKM_file" or die "Can't write to norm1outfile $iLVKM_file\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
29 print OUT1 "gene.id\tgene.symbol\tisoform.id\tiLVKM\tiFVKM\tfinal.iFVK\torig.iFVK\tiFVKM.factor\tiEUMA\t#isoforms\t#measured_isoforms\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
30
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
31 open IFVKM, $iFVKM_file or die "Can't open iFVKM file $iFVKM_file\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
32 <IFVKM>; #discard header
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
33 while(<IFVKM>){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
34 chomp;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
35 my @line = split/\t/;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
36 my $gene_id = shift @line;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
37 my $isoform_id = shift @line;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
38 my $gene_symbol = $gene2symbol{$gene_id};
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
39 $norm1=$line[0]/$norm_factor1{$sample_name};
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
40 $norm2=log($norm1+1)/log(2);
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
41 printf OUT1 "$gene_id\t$gene_symbol\t$isoform_id\t%.3f\t%.3f\t@line\n",$norm2,$norm1,;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
42 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
43 close IFVKM;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
44
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
45
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
46 close OUT1;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
47
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
48
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
49