annotate NEUMA-1.2.1/bowtieout2mappingstat.2.pl @ 0:c44c43d185ef draft default tip

NEUMA-1.2.1 Uploaded
author chawhwa
date Thu, 08 Aug 2013 00:46:13 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
1 #!/usr/bin/perl
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
2 use Getopt::Long;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
3 # This version includes max_insert_length filtered mapping stat, optionally. (version 2)
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
4
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
5
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
6 my $max_insert_length;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
7 my $readlength;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
8 my $datatype = 'E'; # E or R.
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
9 &GetOptions ( 'm|max_insert_length=i' => \$max_insert_length,
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
10 'l|readlength=i' => \$readlength,
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
11 'd|datatype=s' => \$datatype
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
12 );
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
13
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
14 if(defined($max_insert_length) && !defined($readlength)) { die "readlength (-l|--readlength) must be defined if -m (--max_insert_length) is defined.\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
15 if(@ARGV<3){ &help; exit; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
16
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
17
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
18 my $bowtie = shift @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
19 my $sample_name = shift @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
20 my $PE = shift @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
21
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
22 my $prev_head='';
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
23 my $NM=0; my $NR=0; my $NM_lf = 0; my $NR_lf = 0; # lf : insert-length-filtered version
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
24 my $countNM=0; my $countNM_lf =0;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
25 my $countNR=0; my $countNR_lf = 0;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
26 my $countAll=0; my $countAll_lf=0;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
27
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
28
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
29 open IN, $bowtie or die "Can't open bowtiefile $bowtie\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
30 while(<IN>){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
31 chomp;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
32 my ($head,$tr,$pos1)= (split/\t/)[0,2,3];
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
33 if($datatype eq 'R') { ($tr) = (split/\|/,$tr)[3]; $tr=~s/\.\d+$//; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
34
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
35 if($PE==1){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
36 chomp(my $secondline = <IN>);
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
37 my ($head2,$pos2) = (split/\t/,$secondline)[0,3];
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
38 $head = &min($head,$head2);
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
39
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
40 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
41
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
42 if($prev_head eq $head) {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
43
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
44 if(defined($max_insert_length) && $pos2+$readlength-$pos1 <= $max_insert_length) {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
45 if($tr=~/^NM_/) { $NM_lf = 1; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
46 elsif($tr=~/^NR_/) { $NR_lf = 1; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
47 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
48
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
49 if($tr=~/^NM_/) { $NM=1; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
50 elsif($tr=~/^NR_/) { $NR=1; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
51 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
52 else {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
53
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
54 if(defined($max_insert_length) && $pos2+$readlength-$pos1 <= $max_insert_length) {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
55 if($NM_lf==1) { $countNM_lf++; $NM_lf=0; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
56 if($NR_lf==1) { $countNR_lf++; $NR_lf=0; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
57 $countAll_lf++;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
58 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
59 if($NM==1) { $countNM++; $NM=0; } ## if a single read is mapped to both NR and NM, it is counted once for each. Therefore total count may not be the same as the sum of NR and NM counts.
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
60 if($NR==1) { $countNR++; $NR=0; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
61 $countAll++;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
62 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
63 $prev_head=$head;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
64 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
65 # no need to do additional last line
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
66 close IN;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
67
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
68
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
69 if($datatype eq 'E') { ($countNM,$countNR,$countNM_lf,$countNM_lf) = ('NA','NA','NA','NA'); }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
70
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
71 $"="\t";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
72 if(defined($max_insert_length)){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
73 print "sample_name\tmRNA_mapped\tncRNA_mapped\ttotalRNA_mapped\tmRNA_mapped(lenfiltered)\tncRNA_mapped(lenfiltered)\ttotalRNA_mapped(lenfiltered)\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
74 print "$sample_name\t$countNM\t$countNR\t$countAll\t$countNM_lf\t$countNR_lf\t$countAll_lf\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
75 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
76 else {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
77 print "sample_name\tmRNA_mapped\tncRNA_mapped\ttotalRNA_mapped\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
78 print "$sample_name\t$countNM\t$countNR\t$countAll\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
79 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
80
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
81
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
82
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
83 sub min {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
84 if($_[0] lt $_[1]) { $_[0] } else { $_[1] };
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
85 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
86
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
87
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
88 sub help {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
89 print STDERR<<EOF;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
90
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
91 usage: $0 <options> bowtieoutfile samplename PE(1/0) (> mappingstat)
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
92
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
93 Options:
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
94 -m|--max_insert_length <max_insert_length> : the output includes insert-length-filtered mapping stats as well. -l(--readlength) option must be accompanied. This option is valid only for paired-end data. (For single-end, it is ignored)
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
95 -l|--readlength <readlength> : read length. This is required only when -m(--max_insert_length) option is used.
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
96 -d|--datatype <datatype> : Either E (Ensembl) or R (Refseq). Use R if the data is Refseq and the transcript names are in format 'gi|xxx|xxx|name'. (Default E)
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
97
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
98 EOF
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
99 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
100
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
101
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
102
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
103