Mercurial > repos > big-tiandm > mirplant
comparison filterReadsByLength.pl @ 1:f97b6074c41b draft default tip
Uploaded
author | big-tiandm |
---|---|
date | Wed, 02 Jul 2014 02:41:30 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:217f9663d891 | 1:f97b6074c41b |
---|---|
1 #!/usr/bin/perl -w | |
2 #Filename: | |
3 #Author: Tian Dongmei | |
4 #Email: tiandm@big.ac.cn | |
5 #Date: 2010-01 | |
6 #Modified: | |
7 #Description: | |
8 my $version=1.00; | |
9 | |
10 use strict; | |
11 use Getopt::Long; | |
12 use File::Basename; | |
13 | |
14 my %opts; | |
15 GetOptions(\%opts,"i=s","min=i","max=i","o=s","mark:s","h"); | |
16 if (!(defined $opts{i} and defined $opts{o} and defined $opts{min} and defined $opts{max}) || defined $opts{h}) { #necessary arguments | |
17 &usage; | |
18 } | |
19 | |
20 my $mark=defined $opts{'mark'} ? $opts{'mark'} : "Sample"; | |
21 my @mark=split /,/,$mark; | |
22 | |
23 | |
24 open OUT,">$opts{o}"; | |
25 open IN,"<$opts{i}"; | |
26 my %hash;my %reads; | |
27 while (my $aline=<IN>) { | |
28 chomp $aline; | |
29 my $seq=<IN>; | |
30 chomp $seq; | |
31 | |
32 if($aline=~/:([\d|_]+)_x(\d+)$/){ | |
33 my @ss=split/_/,$1; | |
34 for (my $i=0;$i<@ss;$i++) { | |
35 $hash{length($seq)}[$i]++ if($ss[$i]>0); | |
36 $hash{length($seq)}[$i] +=0 if($ss[$i]>0); | |
37 $reads{length($seq)}[$i]+=$ss[$i]; | |
38 } | |
39 } | |
40 #else{$reads{length($seq)}+=1;} | |
41 if (length ($seq)>=$opts{'min'} && length ($seq) <=$opts{'max'}) { | |
42 print OUT "$aline\n$seq\n"; | |
43 } | |
44 } | |
45 close IN; | |
46 close OUT; | |
47 | |
48 my $dir=dirname($opts{'o'}); | |
49 chdir $dir; | |
50 my $lengthfile=$dir."/reads_length_distribution.txt"; | |
51 open OUT, ">$lengthfile"; | |
52 open R,">$dir/length_distribution.R"; | |
53 | |
54 print OUT "Tags length\t@mark\n"; | |
55 | |
56 my $samNo=@mark; | |
57 my $avalue=""; | |
58 my @length=sort{$a<=>$b} keys %hash; | |
59 foreach (@length) { | |
60 print OUT $_,"\t@{$hash{$_}}\n"; | |
61 my $vv=join ", ",@{$hash{$_}}; | |
62 $avalue .="$vv,"; | |
63 } | |
64 $avalue =~s/,$//; | |
65 my $lengths=join ",",@length; | |
66 my $marks=join "\",\"",@mark; | |
67 | |
68 print R "a<-c($avalue) | |
69 b<-matrix(a,ncol=$samNo,byrow=T) | |
70 cl<-colors() | |
71 names=c($lengths) | |
72 legends=c(\"$marks\") | |
73 png(\"Tags_length.png\",width=800,height=600) | |
74 barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Tags Length Distribution\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\") | |
75 abline(h=0) | |
76 dev.off() | |
77 | |
78 "; | |
79 $avalue=""; | |
80 print OUT "\nReads length\t@mark\n"; | |
81 foreach (@length) { | |
82 print OUT $_,"\t@{$reads{$_}}\n"; | |
83 my $vv=join ", ", @{$reads{$_}}; | |
84 $avalue .= "$vv,"; | |
85 } | |
86 $avalue =~s/,$//; | |
87 | |
88 print R "a<-c($avalue)\n | |
89 b<-matrix(a,ncol=$samNo,byrow=T) | |
90 | |
91 png(\"Reads_length.png\",width=800,height=600) | |
92 barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Reads Length Distribution\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\") | |
93 abline(h=0) | |
94 dev.off() | |
95 | |
96 "; | |
97 close OUT; | |
98 close R; | |
99 | |
100 system ("R CMD BATCH $dir/length_distribution.R"); | |
101 | |
102 #system ("rm $dir/length_distribution.R"); | |
103 #system ("rm $dir/length_distribution.Rout"); | |
104 #system ("rm $dir/.RData"); | |
105 sub usage{ | |
106 print <<"USAGE"; | |
107 Version $version | |
108 Usage: | |
109 $0 -i -o -min -max -mark | |
110 options: | |
111 | |
112 -i input file | |
113 -o output file | |
114 -min reads min length. | |
115 -max reads max length. | |
116 -mark string #sample name eg: samA,samB,samC | |
117 -h help | |
118 USAGE | |
119 exit(1); | |
120 } | |
121 |