Mercurial > repos > chawhwa > neuma
diff NEUMA-1.2.1/normalize_iFVKM_6.pl @ 0:c44c43d185ef draft default tip
NEUMA-1.2.1 Uploaded
author | chawhwa |
---|---|
date | Thu, 08 Aug 2013 00:46:13 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/NEUMA-1.2.1/normalize_iFVKM_6.pl Thu Aug 08 00:46:13 2013 -0400 @@ -0,0 +1,49 @@ +#!/usr/bin/perl + +if(@ARGV<6) { print "usage: $0 final.iFVKM_file mapping_stat_file gene2symbol_file factor_col sample_name iLVKM_file\n"; exit; } +my ($iFVKM_file,$mapping_stat_file,$gene2symbol_file,$factor_col,$sample_name,$iLVKM_file) = @ARGV; + +open STAT, $mapping_stat_file or die "Can't open mapping stat file $mapping_stat_file\n"; +<STAT>; #discard header +while(<STAT>){ + chomp; + next if !/\S/; + my ($sample,$factor) = (split/\t/)[0,$factor_col]; + $norm_factor1{$sample} = $factor/1000000; + if($factor==0) { die "Error: Total reads is zero. Please check datatype option. Use option 'E' if your reference doesn't contain 'NM' prefix for mRNA.\n"; } + +} +close STAT; + +open GENE2SYM, $gene2symbol_file or die "can't open gene2symbol file $gene2symbol_file\n"; +while(<GENE2SYM>){ + chomp; + split/\t/; + $gene2symbol{$_[0]}=$_[1]; +} +close GENE2SYM; + + +$"="\t"; +open OUT1, ">$iLVKM_file" or die "Can't write to norm1outfile $iLVKM_file\n"; +print OUT1 "gene.id\tgene.symbol\tisoform.id\tiLVKM\tiFVKM\tfinal.iFVK\torig.iFVK\tiFVKM.factor\tiEUMA\t#isoforms\t#measured_isoforms\n"; + +open IFVKM, $iFVKM_file or die "Can't open iFVKM file $iFVKM_file\n"; +<IFVKM>; #discard header +while(<IFVKM>){ + chomp; + my @line = split/\t/; + my $gene_id = shift @line; + my $isoform_id = shift @line; + my $gene_symbol = $gene2symbol{$gene_id}; + $norm1=$line[0]/$norm_factor1{$sample_name}; + $norm2=log($norm1+1)/log(2); + printf OUT1 "$gene_id\t$gene_symbol\t$isoform_id\t%.3f\t%.3f\t@line\n",$norm2,$norm1,; +} +close IFVKM; + + +close OUT1; + + +