50
|
1 #!/usr/bin/perl -w
|
|
2 #Filename:
|
|
3 #Author: Chentt
|
|
4 #Email: chentt@big.ac.cn
|
|
5 #Date: 2014/04/09
|
|
6 #Modified:
|
|
7 #Description: islands merged of merged samples
|
|
8 my $version=1.00;
|
|
9
|
|
10 use strict;
|
|
11 use Getopt::Long;
|
|
12
|
|
13 my %opts;
|
|
14 GetOptions(\%opts,"i=s","d=i","o=s","N=i","t=s","mark=s","h");
|
|
15 if (!(defined $opts{i} and defined $opts{d} and defined $opts{N} and defined $opts{mark} and defined $opts{t} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
|
|
16 &usage;
|
|
17 }
|
|
18
|
|
19 my $filein=$opts{'i'};
|
|
20 my $fileout=$opts{'o'};
|
|
21 my $distance=$opts{'d'};
|
|
22 my $tempout=$opts{'t'};
|
|
23 my $mark=$opts{'mark'};
|
|
24 my @sample=split/\#/,$mark;
|
|
25 $mark=join"\"\t\"",@sample;
|
|
26
|
|
27 open IN,"<$filein"; #input file
|
|
28 open OUT,">$fileout"; #output file
|
|
29 print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n";
|
|
30 open TMP,">$tempout";
|
|
31 print TMP "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n";
|
|
32 my %hash;
|
|
33 while (my $aline=<IN>) {
|
|
34 chomp $aline;
|
|
35 if($aline=~/^\#/){
|
|
36 #print OUT "$aline\n";
|
|
37 next;
|
|
38 }
|
|
39 my @tmp=split/\t/,$aline;
|
|
40 my $chr=shift @tmp;
|
|
41 #shift @tmp;
|
|
42 push @{$hash{$chr}},[@tmp];
|
|
43 }
|
|
44
|
|
45 close IN;
|
|
46
|
|
47 foreach my $key (keys %hash) {
|
|
48 my @tag=sort{$a->[1] <=> $b->[1]} @{$hash{$key}};
|
|
49 my @sample;
|
|
50 my $start=$tag[0][1];
|
|
51 my $end=$tag[0][2];
|
|
52 push @sample,[@{$tag[0]}];
|
|
53 for (my $i=1;$i<@tag-1;$i++) {
|
|
54 if ($tag[$i][1]-$end<=$distance) {
|
|
55 if ($tag[$i][2]>$end) {
|
|
56 $end=$tag[$i][2];
|
|
57 }
|
|
58 push @sample,[@{$tag[$i]}];
|
|
59 }
|
|
60 else{
|
|
61 my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample);
|
|
62 my $cluster_exp=join"\t",@cluster_exp;
|
|
63 if ($max_length>30) {
|
|
64 print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
|
|
65 $max_length="\>30";
|
|
66 }
|
|
67 else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";}
|
|
68 print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n";
|
|
69 $start=$tag[$i][1];
|
|
70 $end=$tag[$i][2];
|
|
71
|
|
72 @sample=();
|
|
73 push @sample,[@{$tag[$i]}];
|
|
74 }
|
|
75 }
|
|
76 if ($tag[$#tag][1]-$end<=$distance) {
|
|
77 if ($tag[$#tag][2]>$end) {
|
|
78 $end=$tag[$#tag][2];
|
|
79 }
|
|
80 push @sample,[@{$tag[$#tag]}];
|
|
81 my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample);
|
|
82 my $cluster_exp=join"\t",@cluster_exp;
|
|
83 if ($max_length>30) {
|
|
84 $max_length="\>30";
|
|
85 print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
|
|
86 }
|
|
87 else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";}
|
|
88 print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n";
|
|
89 }
|
|
90 else{
|
|
91 my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample);
|
|
92 my $cluster_exp=join"\t",@cluster_exp;
|
|
93 if ($max_length>30) {
|
|
94 $max_length="\>30";
|
|
95 print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
|
|
96 }
|
|
97 else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";}
|
|
98 print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n";
|
|
99
|
|
100 }
|
|
101 }
|
|
102 close OUT;
|
|
103 close TMP;
|
|
104 sub Max_length{
|
|
105 my @exp=@{$_[0]};
|
|
106 my %sample_length;
|
|
107 my $total_exp;
|
|
108 my @each;
|
|
109 my @tag;
|
|
110 for (my $i=0;$i<=$#exp ;$i++) {
|
|
111 my $length=$exp[$i][2]-$exp[$i][1]+1;
|
|
112 #if ($length>30) {
|
|
113 # $length=40;
|
|
114 #}
|
|
115 my $exp=0;
|
|
116 foreach (1..$opts{'N'}) {
|
|
117 $exp+=$exp[$i][$_+2];
|
|
118 $each[$_-1]+=$exp[$i][$_+2];
|
|
119 }
|
|
120 $sample_length{$length}+=$exp;
|
|
121 $total_exp+=$exp;
|
|
122 push @tag,($exp[$i][1].",".$exp[$i][2].",".$exp[$i][0].",".$exp);
|
|
123 }
|
|
124 my $max=0;
|
|
125 my $max_key;
|
|
126 foreach my $key (sort keys %sample_length) {
|
|
127 my $p=$sample_length{$key}/$total_exp;
|
|
128 if ($p>$max) {
|
|
129 $max=$p;
|
|
130 $max_key=$key;
|
|
131 }
|
|
132 $sample_length{$key}=sprintf("%.2f",$p);
|
|
133 }
|
|
134 my $tag_n=@tag;
|
|
135 my $tag=join";",@tag;
|
|
136 $tag=$tag_n."\t".$tag;
|
|
137 return($max_key,$sample_length{$max_key},$tag,@each);
|
|
138 }
|
|
139
|
|
140 sub usage{
|
|
141 print <<"USAGE";
|
|
142 Version $version
|
|
143 Usage:
|
|
144 $0 -i -o -d -N -t -mark
|
|
145 options:
|
|
146 -i input file
|
|
147 -d distance of two islands
|
|
148 -mark sample name;
|
|
149 -o output file
|
|
150 -N sample number
|
|
151 -t temp output file
|
|
152 -h help
|
|
153 USAGE
|
|
154 exit(1);
|
|
155 }
|
|
156
|