annotate conventional.pl @ 52:8b8c356e6db5 draft

Uploaded
author big-tiandm
date Fri, 05 Dec 2014 01:28:46 -0500
parents 7b5a48b972e9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
50
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
1 #!/usr/bin/perl -w
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
2 #Filename:
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
3 #Author: Chentt
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
4 #Email: chentt@big.ac.cn
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
5 #Date: 2014/04/09
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
6 #Modified:
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
7 #Description: islands merged of merged samples
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
8 my $version=1.00;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
9
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
10 use strict;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
11 use Getopt::Long;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
12
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
13 my %opts;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
14 GetOptions(\%opts,"i=s","d=i","o=s","N=i","t=s","mark=s","h");
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
15 if (!(defined $opts{i} and defined $opts{d} and defined $opts{N} and defined $opts{mark} and defined $opts{t} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
16 &usage;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
17 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
18
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
19 my $filein=$opts{'i'};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
20 my $fileout=$opts{'o'};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
21 my $distance=$opts{'d'};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
22 my $tempout=$opts{'t'};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
23 my $mark=$opts{'mark'};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
24 my @sample=split/\#/,$mark;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
25 $mark=join"\"\t\"",@sample;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
26
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
27 open IN,"<$filein"; #input file
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
28 open OUT,">$fileout"; #output file
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
29 print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
30 open TMP,">$tempout";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
31 print TMP "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
32 my %hash;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
33 while (my $aline=<IN>) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
34 chomp $aline;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
35 if($aline=~/^\#/){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
36 #print OUT "$aline\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
37 next;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
38 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
39 my @tmp=split/\t/,$aline;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
40 my $chr=shift @tmp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
41 #shift @tmp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
42 push @{$hash{$chr}},[@tmp];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
43 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
44
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
45 close IN;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
46
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
47 foreach my $key (keys %hash) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
48 my @tag=sort{$a->[1] <=> $b->[1]} @{$hash{$key}};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
49 my @sample;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
50 my $start=$tag[0][1];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
51 my $end=$tag[0][2];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
52 push @sample,[@{$tag[0]}];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
53 for (my $i=1;$i<@tag-1;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
54 if ($tag[$i][1]-$end<=$distance) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
55 if ($tag[$i][2]>$end) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
56 $end=$tag[$i][2];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
57 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
58 push @sample,[@{$tag[$i]}];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
59 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
60 else{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
61 my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
62 my $cluster_exp=join"\t",@cluster_exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
63 if ($max_length>30) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
64 print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
65 $max_length="\>30";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
66 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
67 else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
68 print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
69 $start=$tag[$i][1];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
70 $end=$tag[$i][2];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
71
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
72 @sample=();
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
73 push @sample,[@{$tag[$i]}];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
74 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
75 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
76 if ($tag[$#tag][1]-$end<=$distance) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
77 if ($tag[$#tag][2]>$end) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
78 $end=$tag[$#tag][2];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
79 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
80 push @sample,[@{$tag[$#tag]}];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
81 my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
82 my $cluster_exp=join"\t",@cluster_exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
83 if ($max_length>30) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
84 $max_length="\>30";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
85 print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
86 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
87 else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
88 print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
89 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
90 else{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
91 my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
92 my $cluster_exp=join"\t",@cluster_exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
93 if ($max_length>30) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
94 $max_length="\>30";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
95 print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
96 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
97 else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
98 print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
99
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
100 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
101 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
102 close OUT;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
103 close TMP;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
104 sub Max_length{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
105 my @exp=@{$_[0]};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
106 my %sample_length;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
107 my $total_exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
108 my @each;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
109 my @tag;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
110 for (my $i=0;$i<=$#exp ;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
111 my $length=$exp[$i][2]-$exp[$i][1]+1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
112 #if ($length>30) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
113 # $length=40;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
114 #}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
115 my $exp=0;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
116 foreach (1..$opts{'N'}) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
117 $exp+=$exp[$i][$_+2];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
118 $each[$_-1]+=$exp[$i][$_+2];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
119 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
120 $sample_length{$length}+=$exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
121 $total_exp+=$exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
122 push @tag,($exp[$i][1].",".$exp[$i][2].",".$exp[$i][0].",".$exp);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
123 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
124 my $max=0;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
125 my $max_key;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
126 foreach my $key (sort keys %sample_length) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
127 my $p=$sample_length{$key}/$total_exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
128 if ($p>$max) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
129 $max=$p;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
130 $max_key=$key;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
131 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
132 $sample_length{$key}=sprintf("%.2f",$p);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
133 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
134 my $tag_n=@tag;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
135 my $tag=join";",@tag;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
136 $tag=$tag_n."\t".$tag;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
137 return($max_key,$sample_length{$max_key},$tag,@each);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
138 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
139
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
140 sub usage{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
141 print <<"USAGE";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
142 Version $version
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
143 Usage:
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
144 $0 -i -o -d -N -t -mark
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
145 options:
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
146 -i input file
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
147 -d distance of two islands
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
148 -mark sample name;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
149 -o output file
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
150 -N sample number
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
151 -t temp output file
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
152 -h help
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
153 USAGE
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
154 exit(1);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
155 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
156