view NEUMA-1.2.1/merge_LVKM_readcount.pl @ 0:c44c43d185ef draft default tip

NEUMA-1.2.1 Uploaded
author chawhwa
date Thu, 08 Aug 2013 00:46:13 -0400
parents
children
line wrap: on
line source

#!/usr/bin/perl

if(@ARGV<4) { print "usage: $0 genewise/isoformwise[g/i] EUMAcut LVKM_out_dir NR(1/0) > output.gNIR(iNIR)\n"; exit; }
my $option = shift @ARGV;  #either g or i
my $EUMAcut = shift @ARGV;
my $LVKM_dir = shift @ARGV;
my $NR_option = shift @ARGV;

my $suffix = $option."LVKM";

my @sample_list = split/\n/,`ls -1 $LVKM_dir`;
for my $i (0..$#sample_list){
 if($NR_option eq '1'){
   if($sample_list[$i] =~ /\.\d+.-NR.[gi]LVKM$/) { $samples{$`}=1; }
 }
 else {
   if($sample_list[$i] =~ /\.\d+.[gi]LVKM$/) { $samples{$`}=1; }
 }
}

@sample_list = keys %samples;


for my $sample (@sample_list){
  next if($sample eq '');
  my $file;
  if($NR_option eq '1'){
    $file =  "$LVKM_dir/$sample.$EUMAcut.-NR.$suffix";
  }
  else {
    $file =  "$LVKM_dir/$sample.$EUMAcut.$suffix";
  }

  print STDERR "$file\n";  #DEBUGGING
  open IN,$file or die "Can't open LVKM file $file\n";
  <IN>;
  while(<IN>){
    chomp;

    my $gene;
    if($option eq 'g'){
      my ($gid,$symbol,$origGFVK,$gEUMA) = (split/\t/)[0,1,5,7];
      $gene = "$gid\t$symbol";
      $temp = $origGFVK * $gEUMA;
      if($temp == 0){
        $EXPR{$gene}{$sample} = 0;
      }else{
        $EXPR{$gene}{$sample} = int(($temp + 0.5) /1000);
      }
    }
    elsif($option eq 'i'){
      my ($gid,$symbol,$NM,$finalIFVK,$iEUMA) = (split/\t/)[0,1,2,5,8];
      $gene = "$gid\t$symbol\t$NM";
      $temp = $finalIFVK * $iEUMA;
      if($temp == 0){
        $EXPR{$gene}{$sample} = 0;
      }else{
        $EXPR{$gene}{$sample} = int(($temp + 0.5) /1000);
      }
    }
  }
  close IN;
}



$"="\t";
if($option eq 'g') { print "gene.id\tgene.symbol\t@sample_list\n"; }
elsif($option eq 'i') { print "gene.id\tgene.symbol\tisoform\t@sample_list\n"; }
for my $gene (keys %EXPR){
 print "$gene";
 for my $sample (@sample_list){
   if(exists ${$EXPR{$gene}}{$sample}){
     print "\t$EXPR{$gene}{$sample}";
   }
   else { print "\tNA"; }
 }
 print "\n";
}