Mercurial > repos > chawhwa > neuma
view NEUMA-1.2.1/combine_gFVKM_iFVKM_max.pl @ 0:c44c43d185ef draft default tip
NEUMA-1.2.1 Uploaded
author | chawhwa |
---|---|
date | Thu, 08 Aug 2013 00:46:13 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl #This script was adopted and modified from ``scripts/combine_gFVKM_iFVKM_5.pl`. if(@ARGV<4) { print "usage: $0 gFVKM_file iFVKM_file gene2NM_file EUMA_CUT(eg.50, in bp) output.final.gFVKM output.final.iFVKM\n"; exit; } my ($gFVKM_file, $iFVKM_file, $gene2NM_file, $CUT, $final_gFVKM_file, $final_iFVKM_file ) = @ARGV; my %gFVKM=(); my %gEUMA=(); my %allgenes=(); &read_gFVKM($gFVKM_file,\%gFVKM,\%gEUMA,\%allgenes); my %iFVKM=(); my %iEUMA=(); my %Alltranscirpts=(); &read_iFVKM($iFVKM_file,\%iFVKM,\%iEUMA,\%Allisoforms); my %gene2NM=(); &read_gene2NM($gene2NM_file,\%gene2NM); my %derived_gFVKM = (); &compute_derived_gFVKM(\%derived_gFVKM,\%iFVKM,\%allgenes); my %final_gFVKM=(); &compute_final_gFVKM(\%allgenes,\%gFVKM,\%derived_gFVKM,\%final_gFVKM); my %final_iFVKM=(); my %iFVKM_factor=(); &compute_final_iFVKM(\%final_iFVKM,\%iFVKM,\%final_gFVKM,\%iFVKM_factor,\%Allisoforms) &generate_final_gFVKM_file($final_gFVKM_file,\%allgenes,\%gFVKM,\%derived_gFVKM,\%gEUMA,\%iFVKM,\%gene2NM,\%final_gFVKM); &generate_final_iFVKM_file($final_iFVKM_file,\%iFVKM,\%final_iFVKM,\%iFVKM_factor,\%iEUMA,\%gene2NM,\%Allisoforms); ################# ## subroutines ## ################# sub read_gFVKM { my $gFVKM_file = shift @_; my $pGFVKM = shift @_; my $pgEUMA = shift @_; my $pAllgenes = shift @_; open IN, $gFVKM_file or die "Can't open gFVKM file $gFVKM_file\n"; while(<IN>){ chomp; my ($gene,$gFVKM_0,$gEUMA_0) = (split/\t/)[0,1,3]; next if $gEUMA_0 < $CUT; $pGFVKM->{$gene} = $gFVKM_0; $pgEUMA->{$gene} = $gEUMA_0; $pAllgenes->{$gene}=1; } close IN; } sub read_iFVKM { my $iFVKM_file = shift @_; my $pIFVKM = shift @_; my $piEUMA = shift @_; my $pAllisoforms = shift @_; open IN, $iFVKM_file or die "Can't open iFVKM file $iFVKM_file\n"; while(<IN>){ chomp; my ($gid,$isoform,$iFVKM_0,$iEUMA_0) = (split/\t/)[0,1,2,4]; next if $iEUMA_0 < $CUT; $pIFVKM->{$gid}{$isoform} = $iFVKM_0; $piEUMA->{$gid}{$isoform} = $iEUMA_0; $pAllisoforms->{$gid}{$isoform} = 1; } close IN; } sub read_gene2NM { my $gene2NM_file = shift @_; my $pGene2NM = shift @_; open IN, $gene2NM_file or die "Can't open gene2NM file $gene2NM_file\n"; while(<IN>){ chomp; my ($gid,$isoform) = split/\t/; $pGene2NM->{$gid}{$isoform}=1; } close IN; } sub compute_derived_gFVKM { my $pDerived_gFVKM = shift @_; my $pIFVKM = shift @_; my $pAllgenes = shift @_; #compute derived gFVKM for my $gene (keys %$pIFVKM){ my $sumiFVKM = &sum(values(%{$pIFVKM->{$gene}})); $pDerived_gFVKM->{$gene} = $sumiFVKM; $pAllgenes->{$gene}=1; } } sub compute_final_iFVKM { my $pFinal_iFVKM = shift @_; my $pIFVKM = shift @_; my $pFinalGFVKM = shift @_; my $pIFVKM_factor = shift @_; my $pAllisoforms = shift @_; #rescale iFVKM based on final gFVKM for my $gene (keys %$pIFVKM){ my $sumiFVKM = &sum(values %{$pIFVKM->{$gene}}); if($sumiFVKM == 0) { $pIFVKM_factor->{$gene} = "inf"; } else { $pIFVKM_factor->{$gene} = $pFinalGFVKM->{$gene} / $sumiFVKM; } for my $isoform (keys %{$pIFVKM->{$gene}}) { $pFinal_iFVKM->{$gene} = $pIFVKM->{$gene}; $pAllisoforms->{$gene}{$isoform} =1; } } } # using max of gFVKM and sumiFVKM, instead of average. sub compute_final_gFVKM { my $pAllgenes = shift @_; my $pGFVKM = shift @_; my $pDerived_gFVKM = shift @_; my $pFinal_gFVKM = shift @_; for my $gene (sort keys %$pAllgenes){ my $orig_gFVKM = (exists $pGFVKM->{$gene})?$pGFVKM->{$gene}:'NA'; my $derived_gFVKM_val = (exists $pDerived_gFVKM->{$gene})?$pDerived_gFVKM->{$gene}:'NA'; my $final_gFVKM = 'NA'; if($orig_gFVKM ne 'NA' && $derived_gFVKM_val ne 'NA'){ #$final_gFVKM = ($orig_gFVKM+$derived_gFVKM_val)/2; #average $final_gFVKM = ($orig_gFVKM>=$derived_gFVKM_val?$orig_gFVKM:$derived_gFVKM_val); #maximum } elsif($orig_gFVKM eq 'NA') { $final_gFVKM = $derived_gFVKM_val; } elsif($derived_gFVKM_val eq 'NA') { $final_gFVKM = $orig_gFVKM; } $pFinal_gFVKM->{$gene} = $final_gFVKM; } } sub generate_final_gFVKM_file { my $final_gFVKM_file = shift @_; my $pAllgenes = shift @_; my $pGFVKM = shift @_; my $pDerived_gFVKM = shift @_; my $pgEUMA = shift @_; my $pIFVKM = shift @_; my $pGene2NM = shift @_; my $pFinal_gFVKM = shift @_; open FG_FVKM, ">$final_gFVKM_file" or die "Can't write to final gFVKM file $final_gFVKM_file\n"; print FG_FVKM "gene\tfinal.gFVK\torig.gFVK\tderived_gFVK\tgEUMA\t#isoforms\t#measured_isoforms\n"; for my $gene (sort keys %$pAllgenes){ my $orig_gFVKM = (exists $pGFVKM->{$gene})?$pGFVKM->{$gene}:'NA'; my $derived_gFVKM = (exists $pDerived_gFVKM->{$gene})?$pDerived_gFVKM->{$gene}:'NA'; my $final_gFVKM = (exists $pFinal_gFVKM->{$gene})?$pFinal_gFVKM->{$gene}:'NA'; my $gEUMA=''; if(exists $pgEUMA->{$gene}){ $gEUMA = $pgEUMA->{$gene}; } else { $gEUMA = 'below_cut'; } my $nTr = scalar keys %{$pGene2NM->{$gene}}; my $nTr_measured = scalar keys %{$pIFVKM->{$gene}}; print FG_FVKM "$gene\t$final_gFVKM\t$orig_gFVKM\t$derived_gFVKM\t$gEUMA\t$nTr\t$nTr_measured\n"; } close FG_FVKM; } sub generate_final_iFVKM_file { my $final_iFVKM_file = shift @_; my $pIFVKM = shift @_; my $pFinal_iFVKM = shift @_; my $pIFVKM_factor = shift @_; my $piEUMA = shift @_; my $pGene2NM = shift @_; my $pAllisoforms = shift @_; #print scalar keys %$pDerived_iFVKM; ##DEBUGGING open FI_FVKM, ">$final_iFVKM_file" or die "Can't write to final iFVKM file $final_iFVKM_file\n"; print FI_FVKM "gene\tisoform\tfinal.iFVK\torig.iFVK\tiFVKM.factor\tiEUMA\t#isoforms\t#measured_isoforms\n"; for my $gene (sort keys %$pAllisoforms){ my $nTr = scalar keys %{$pGene2NM->{$gene}}; my $nTr_measured = scalar keys %{$pIFVKM->{$gene}}; my $iFVKM_factor = (exists $pIFVKM_factor->{$gene})?$pIFVKM_factor->{$gene}:'NA'; for my $isoform (sort keys %{$pAllisoforms->{$gene}}){ my $iEUMA=''; if(exists $piEUMA->{$gene}{$isoform}) { $iEUMA = $piEUMA->{$gene}{$isoform}; } else { $iEUMA = 'below_cut'; } my $final_iFVKM = (exists $pFinal_iFVKM->{$gene} && exists ${$pFinal_iFVKM->{$gene}}{$isoform})?$pFinal_iFVKM->{$gene}{$isoform}:$pIFVKM->{$gene}{$isoform}; print FI_FVKM "$gene\t$isoform\t$final_iFVKM\t$pIFVKM->{$gene}{$isoform}\t$iFVKM_factor\t$iEUMA\t$nTr\t$nTr_measured\n"; } } close FI_FVKM; } sub sum { my $s=0; while(defined(my $x=shift @_)){ $s+=$x; } return $s; }