0
|
1 #!/usr/bin/perl -w
|
|
2 #Filename:
|
|
3 #Author: Tian Dongmei
|
|
4 #Email: tiandm@big.ac.cn
|
|
5 #Date: 2010-01
|
|
6 #Modified:
|
|
7 #Description:
|
|
8 my $version=1.00;
|
|
9
|
|
10 use strict;
|
|
11 use Getopt::Long;
|
|
12 use File::Basename;
|
|
13
|
|
14 my %opts;
|
|
15 GetOptions(\%opts,"i=s","min=i","max=i","o=s","mark:s","h");
|
|
16 if (!(defined $opts{i} and defined $opts{o} and defined $opts{min} and defined $opts{max}) || defined $opts{h}) { #necessary arguments
|
|
17 &usage;
|
|
18 }
|
|
19
|
|
20 my $mark=defined $opts{'mark'} ? $opts{'mark'} : "Sample";
|
|
21 my @mark=split /\#/,$mark;
|
|
22
|
|
23
|
|
24 open OUT,">$opts{o}";
|
|
25 open IN,"<$opts{i}";
|
|
26 my %hash;my %reads;
|
|
27 while (my $aline=<IN>) {
|
|
28 chomp $aline;
|
|
29 my $seq=<IN>;
|
|
30 chomp $seq;
|
|
31
|
|
32 if($aline=~/:([\d|_]+)_x(\d+)$/){
|
|
33 my @ss=split/_/,$1;
|
|
34 for (my $i=0;$i<@ss;$i++) {
|
|
35 $hash{length($seq)}[$i]++ if($ss[$i]>0);
|
|
36 $hash{length($seq)}[$i] +=0 if($ss[$i]==0);
|
|
37 $reads{length($seq)}[$i]+=$ss[$i];
|
|
38 }
|
|
39 }
|
|
40 #else{$reads{length($seq)}+=1;}
|
|
41 if (length ($seq)>=$opts{'min'} && length ($seq) <=$opts{'max'}) {
|
|
42 print OUT "$aline\n$seq\n";
|
|
43 }
|
|
44 }
|
|
45 close IN;
|
|
46 close OUT;
|
|
47
|
|
48 my $dir=dirname($opts{'o'});
|
|
49 chdir $dir;
|
|
50 my $lengthfile=$dir."/reads_length_distribution.txt";
|
|
51 print "$lengthfile\n";
|
|
52 open OUT, ">$lengthfile";
|
|
53 open R,">$dir/length_distribution.R";
|
|
54
|
|
55 print OUT "Tags length\t@mark\n";
|
|
56
|
|
57 my $samNo=@mark;
|
|
58 my $avalue="";
|
|
59 my @length=sort{$a<=>$b} keys %hash;
|
|
60 foreach (@length) {
|
|
61 print OUT $_,"\t@{$hash{$_}}\n";
|
|
62 my $vv=join ", ",@{$hash{$_}};
|
|
63 $avalue .="$vv,";
|
|
64 }
|
|
65 $avalue =~s/,$//;
|
|
66 my $lengths=join ",",@length;
|
|
67 my $marks=join "\",\"",@mark;
|
|
68
|
|
69 print R "a<-c($avalue)
|
|
70 b<-matrix(a,ncol=$samNo,byrow=T)
|
|
71 cl<-colors()
|
|
72 names=c($lengths)
|
|
73 legends=c(\"$marks\")
|
|
74 png(\"Tags_length.png\",width=800,height=600)
|
|
75 barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Tags Length Distribution\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\")
|
|
76 abline(h=0)
|
|
77 dev.off()
|
|
78
|
|
79 ";
|
|
80 $avalue="";
|
|
81 print OUT "\nReads length\t@mark\n";
|
|
82 foreach (@length) {
|
|
83 print OUT $_,"\t@{$reads{$_}}\n";
|
|
84 my $vv=join ", ", @{$reads{$_}};
|
|
85 $avalue .= "$vv,";
|
|
86 }
|
|
87 $avalue =~s/,$//;
|
|
88
|
|
89 print R "a<-c($avalue)\n
|
|
90 b<-matrix(a,ncol=$samNo,byrow=T)
|
|
91
|
|
92 png(\"Reads_length.png\",width=800,height=600)
|
|
93 barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Reads Length Distribution\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\")
|
|
94 abline(h=0)
|
|
95 dev.off()
|
|
96
|
|
97 ";
|
|
98 close OUT;
|
|
99 close R;
|
|
100
|
|
101 system ("R CMD BATCH $dir/length_distribution.R");
|
|
102
|
|
103 #system ("rm $dir/length_distribution.R");
|
|
104 #system ("rm $dir/length_distribution.Rout");
|
|
105 #system ("rm $dir/.RData");
|
|
106 sub usage{
|
|
107 print <<"USAGE";
|
|
108 Version $version
|
|
109 Usage:
|
|
110 $0 -i -o -min -max -mark
|
|
111 options:
|
|
112
|
|
113 -i input file
|
|
114 -o output file
|
|
115 -min reads min length.
|
|
116 -max reads max length.
|
|
117 -mark string #sample name eg: samA#samB#samC
|
|
118 -h help
|
|
119 USAGE
|
|
120 exit(1);
|
|
121 }
|
|
122
|