annotate NEUMA-1.2.1/merge_LVKM_readcount.pl @ 0:c44c43d185ef draft default tip

NEUMA-1.2.1 Uploaded
author chawhwa
date Thu, 08 Aug 2013 00:46:13 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
1 #!/usr/bin/perl
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
2
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
3 if(@ARGV<4) { print "usage: $0 genewise/isoformwise[g/i] EUMAcut LVKM_out_dir NR(1/0) > output.gNIR(iNIR)\n"; exit; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
4 my $option = shift @ARGV; #either g or i
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
5 my $EUMAcut = shift @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
6 my $LVKM_dir = shift @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
7 my $NR_option = shift @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
8
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
9 my $suffix = $option."LVKM";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
10
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
11 my @sample_list = split/\n/,`ls -1 $LVKM_dir`;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
12 for my $i (0..$#sample_list){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
13 if($NR_option eq '1'){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
14 if($sample_list[$i] =~ /\.\d+.-NR.[gi]LVKM$/) { $samples{$`}=1; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
15 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
16 else {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
17 if($sample_list[$i] =~ /\.\d+.[gi]LVKM$/) { $samples{$`}=1; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
18 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
19 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
20
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
21 @sample_list = keys %samples;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
22
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
23
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
24 for my $sample (@sample_list){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
25 next if($sample eq '');
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
26 my $file;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
27 if($NR_option eq '1'){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
28 $file = "$LVKM_dir/$sample.$EUMAcut.-NR.$suffix";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
29 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
30 else {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
31 $file = "$LVKM_dir/$sample.$EUMAcut.$suffix";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
32 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
33
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
34 print STDERR "$file\n"; #DEBUGGING
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
35 open IN,$file or die "Can't open LVKM file $file\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
36 <IN>;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
37 while(<IN>){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
38 chomp;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
39
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
40 my $gene;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
41 if($option eq 'g'){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
42 my ($gid,$symbol,$origGFVK,$gEUMA) = (split/\t/)[0,1,5,7];
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
43 $gene = "$gid\t$symbol";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
44 $temp = $origGFVK * $gEUMA;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
45 if($temp == 0){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
46 $EXPR{$gene}{$sample} = 0;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
47 }else{
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
48 $EXPR{$gene}{$sample} = int(($temp + 0.5) /1000);
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
49 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
50 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
51 elsif($option eq 'i'){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
52 my ($gid,$symbol,$NM,$finalIFVK,$iEUMA) = (split/\t/)[0,1,2,5,8];
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
53 $gene = "$gid\t$symbol\t$NM";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
54 $temp = $finalIFVK * $iEUMA;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
55 if($temp == 0){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
56 $EXPR{$gene}{$sample} = 0;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
57 }else{
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
58 $EXPR{$gene}{$sample} = int(($temp + 0.5) /1000);
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
59 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
60 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
61 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
62 close IN;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
63 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
64
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
65
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
66
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
67 $"="\t";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
68 if($option eq 'g') { print "gene.id\tgene.symbol\t@sample_list\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
69 elsif($option eq 'i') { print "gene.id\tgene.symbol\tisoform\t@sample_list\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
70 for my $gene (keys %EXPR){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
71 print "$gene";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
72 for my $sample (@sample_list){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
73 if(exists ${$EXPR{$gene}}{$sample}){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
74 print "\t$EXPR{$gene}{$sample}";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
75 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
76 else { print "\tNA"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
77 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
78 print "\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
79 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
80
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
81
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
82