Mercurial > repos > big-tiandm > sirna_plant
comparison conventional.pl @ 0:07745c0958dd draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 18 Sep 2014 21:40:25 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:07745c0958dd |
---|---|
1 #!/usr/bin/perl -w | |
2 #Filename: | |
3 #Author: Chentt | |
4 #Email: chentt@big.ac.cn | |
5 #Date: 2014/04/09 | |
6 #Modified: | |
7 #Description: islands merged of merged samples | |
8 my $version=1.00; | |
9 | |
10 use strict; | |
11 use Getopt::Long; | |
12 | |
13 my %opts; | |
14 GetOptions(\%opts,"i=s","d=i","o=s","N=i","t=s","mark=s","h"); | |
15 if (!(defined $opts{i} and defined $opts{d} and defined $opts{N} and defined $opts{mark} and defined $opts{t} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments | |
16 &usage; | |
17 } | |
18 | |
19 my $filein=$opts{'i'}; | |
20 my $fileout=$opts{'o'}; | |
21 my $distance=$opts{'d'}; | |
22 my $tempout=$opts{'t'}; | |
23 my $mark=$opts{'mark'}; | |
24 my @sample=split/\#/,$mark; | |
25 $mark=join"\"\t\"",@sample; | |
26 | |
27 open IN,"<$filein"; #input file | |
28 open OUT,">$fileout"; #output file | |
29 print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n"; | |
30 open TMP,">$tempout"; | |
31 print TMP "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n"; | |
32 my %hash; | |
33 while (my $aline=<IN>) { | |
34 chomp $aline; | |
35 if($aline=~/^\#/){ | |
36 #print OUT "$aline\n"; | |
37 next; | |
38 } | |
39 my @tmp=split/\t/,$aline; | |
40 my $chr=shift @tmp; | |
41 #shift @tmp; | |
42 push @{$hash{$chr}},[@tmp]; | |
43 } | |
44 | |
45 close IN; | |
46 | |
47 foreach my $key (keys %hash) { | |
48 my @tag=sort{$a->[1] <=> $b->[1]} @{$hash{$key}}; | |
49 my @sample; | |
50 my $start=$tag[0][1]; | |
51 my $end=$tag[0][2]; | |
52 push @sample,[@{$tag[0]}]; | |
53 for (my $i=1;$i<@tag-1;$i++) { | |
54 if ($tag[$i][1]-$end<=$distance) { | |
55 if ($tag[$i][2]>$end) { | |
56 $end=$tag[$i][2]; | |
57 } | |
58 push @sample,[@{$tag[$i]}]; | |
59 } | |
60 else{ | |
61 my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample); | |
62 my $cluster_exp=join"\t",@cluster_exp; | |
63 if ($max_length>30) { | |
64 print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; | |
65 $max_length="\>30"; | |
66 } | |
67 else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";} | |
68 print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n"; | |
69 $start=$tag[$i][1]; | |
70 $end=$tag[$i][2]; | |
71 | |
72 @sample=(); | |
73 push @sample,[@{$tag[$i]}]; | |
74 } | |
75 } | |
76 if ($tag[$#tag][1]-$end<=$distance) { | |
77 if ($tag[$#tag][2]>$end) { | |
78 $end=$tag[$#tag][2]; | |
79 } | |
80 push @sample,[@{$tag[$#tag]}]; | |
81 my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample); | |
82 my $cluster_exp=join"\t",@cluster_exp; | |
83 if ($max_length>30) { | |
84 $max_length="\>30"; | |
85 print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; | |
86 } | |
87 else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";} | |
88 print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n"; | |
89 } | |
90 else{ | |
91 my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample); | |
92 my $cluster_exp=join"\t",@cluster_exp; | |
93 if ($max_length>30) { | |
94 $max_length="\>30"; | |
95 print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; | |
96 } | |
97 else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";} | |
98 print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n"; | |
99 | |
100 } | |
101 } | |
102 close OUT; | |
103 close TMP; | |
104 sub Max_length{ | |
105 my @exp=@{$_[0]}; | |
106 my %sample_length; | |
107 my $total_exp; | |
108 my @each; | |
109 my @tag; | |
110 for (my $i=0;$i<=$#exp ;$i++) { | |
111 my $length=$exp[$i][2]-$exp[$i][1]+1; | |
112 #if ($length>30) { | |
113 # $length=40; | |
114 #} | |
115 my $exp=0; | |
116 foreach (1..$opts{'N'}) { | |
117 $exp+=$exp[$i][$_+2]; | |
118 $each[$_-1]+=$exp[$i][$_+2]; | |
119 } | |
120 $sample_length{$length}+=$exp; | |
121 $total_exp+=$exp; | |
122 push @tag,($exp[$i][1].",".$exp[$i][2].",".$exp[$i][0].",".$exp); | |
123 } | |
124 my $max=0; | |
125 my $max_key; | |
126 foreach my $key (sort keys %sample_length) { | |
127 my $p=$sample_length{$key}/$total_exp; | |
128 if ($p>$max) { | |
129 $max=$p; | |
130 $max_key=$key; | |
131 } | |
132 $sample_length{$key}=sprintf("%.2f",$p); | |
133 } | |
134 my $tag_n=@tag; | |
135 my $tag=join";",@tag; | |
136 $tag=$tag_n."\t".$tag; | |
137 return($max_key,$sample_length{$max_key},$tag,@each); | |
138 } | |
139 | |
140 sub usage{ | |
141 print <<"USAGE"; | |
142 Version $version | |
143 Usage: | |
144 $0 -i -o -d -N -t -mark | |
145 options: | |
146 -i input file | |
147 -d distance of two islands | |
148 -mark sample name; | |
149 -o output file | |
150 -N sample number | |
151 -t temp output file | |
152 -h help | |
153 USAGE | |
154 exit(1); | |
155 } | |
156 |