diff NEUMA-1.2.1/normalize_gFVKM_5.pl @ 0:c44c43d185ef draft default tip

NEUMA-1.2.1 Uploaded
author chawhwa
date Thu, 08 Aug 2013 00:46:13 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/normalize_gFVKM_5.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,49 @@
+#!/usr/bin/perl
+
+if(@ARGV<6) { print "usage: $0 final.gFVKM_file mapping_stat_file gene2symbol_file factor_col sample_name gLVKM_file\n"; exit; }
+
+my ($gFVKM_file,$mapping_stat_file,$gene2symbol_file,$factor_col,$sample_name,$gLVKM_file) = @ARGV;
+
+open STAT, $mapping_stat_file or die "Can't open mapping stat file $mapping_stat_file\n";
+<STAT>; ##discard header
+while(<STAT>){
+  chomp;
+  next if !/\S/;
+  my ($sample,$factor) = (split/\t/)[0,$factor_col];
+  $norm_factor1{$sample} = $factor/1000000;
+  if($factor==0) { die "Error: Total reads is zero. Please check datatype option. Use option 'E' if your reference doesn't contain 'NM' prefix for mRNA.\n"; }
+}
+close STAT;
+
+
+open GENE2SYM, $gene2symbol_file or die "can't open gene2symbol file $gene2symbol_file\n";
+while(<GENE2SYM>){
+ chomp;
+ split/\t/;
+ $gene2symbol{$_[0]}=$_[1];
+}
+close GENE2SYM;
+
+
+$"="\t";
+open OUT1, ">$gLVKM_file" or die "Can't write to norm1outfile $gLVKM_file\n";
+print OUT1 "gene.id\tgene.symbol\tgLVKM\tgFVKM\tfinal.gFVK\torig.gFVK\tderived.gFVK\tgEUMA\t#isoforms\t#measured_isoforms\n";
+
+open GFVKM, $gFVKM_file or die "Can't open gFVKM file $gFVKM_file\n";
+<GFVKM>;  #discard header
+while(<GFVKM>){
+  chomp;
+  my @line = split/\t/;
+  my $gene_id = shift @line;
+  my $gene_symbol = $gene2symbol{$gene_id};
+  $norm1=$line[0]/$norm_factor1{$sample_name};
+  $norm2=log($norm1+1)/log(2);
+  printf OUT1 "$gene_id\t$gene_symbol\t%.3f\t%.3f\t@line\n",$norm2,$norm1,;
+}
+close GFVKM;
+
+
+close OUT1;
+
+
+