annotate Annotate.pl @ 26:80d514e98f72 draft

Uploaded
author big-tiandm
date Thu, 06 Nov 2014 02:18:09 -0500
parents 07745c0958dd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
1 #!/usr/bin/perl -w
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
2 #Filename:
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
3 #Author: Chentt
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
4 #Email: chentt@big.ac.cn
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
5 #Date: 2014/4/10
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
6 #Modified:
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
7 #Description: cluster annotate by priority
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
8 my $version=1.00;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
9
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
10 use strict;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
11 use Getopt::Long;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
12
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
13 my %opts;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
14 GetOptions(\%opts,"i=s","d=i","g=s","o=s","t=s","h");
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
15 if (!(defined $opts{i} and defined $opts{g} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
16 &usage;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
17 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
18 #my $genelistout=$opts{'t'};
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
19 my $dis=defined $opts{'d'}? $opts{'d'}:1000;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
20 my %gene;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
21
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
22 #open OUT,">$genelistout"; #output file
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
23 #print OUT "#ID\tchr\tstart\tend\tstrand\ns";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
24 open IN,"<$opts{g}";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
25 while (my $aline=<IN>) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
26 chomp $aline;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
27 next if($aline=~/^\#/);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
28 my @tmp=split/\t/,$aline;#ID chr start end strand
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
29 #push @{$gene1{$tmp[0]}},[$tmp[2],$tmp[3],$tmp[1]];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
30 $gene{$tmp[1]}{$tmp[0]}=[$tmp[2],$tmp[3],$tmp[4]];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
31 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
32 #while (my $aline=<IN>) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
33 # chomp $aline;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
34 # next if($aline=~/^\#/);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
35 # my @tmp=split/\t/,$aline;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
36 # my $ID;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
37 # if ($tmp[2] eq "gene") {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
38 # $tmp[0]=~s/Chr\./Chr/;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
39 # $tmp[0]=~s/Chr/chr/;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
40 # my @infor=split/;/,$tmp[8];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
41 # for (my $i=0;$i<@infor ;$i++) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
42 # if ($infor[$i]=~/Alias\=(\S+)$/) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
43 # $ID=$1;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
44 # last;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
45 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
46 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
47 # $gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand]
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
48 # print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
49 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
50 #}
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
51 close IN;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
52 #close OUT;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
53
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
54
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
55 my $filein=$opts{'i'};
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
56 my $fileout=$opts{'o'};
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
57
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
58 open IN,"<$filein"; #input file
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
59 open OUT,">$fileout"; #output file
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
60 while (my $aline=<IN>) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
61 chomp $aline;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
62 my @tmp=split/\t/,$aline;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
63 if($aline=~/^\#/){print OUT "$aline\tP_annotate\n";next}
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
64 my @result;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
65 #shift @tmp;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
66 my @id=split/:/,$tmp[0];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
67 $id[0]=~s/Chr0/Chr/;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
68 my @posi=split/-/,$id[1];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
69 foreach my $key (keys %{$gene{$id[0]}}) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
70 if ($posi[0]<$gene{$id[0]}{$key}[1] && $posi[1]>$gene{$id[0]}{$key}[0]) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
71 push @result,"gene-body;$key;$gene{$id[0]}{$key}[2]";#$te{$key}";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
72 next;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
73 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
74 #if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-1000) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
75 if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-$dis) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
76 push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+");
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
77 push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-");
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
78 next;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
79 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
80 #if ($posi[0]<$gene{$id[0]}{$key}[1]+1000 && $posi[1]>$gene{$id[0]}{$key}[1]) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
81 if ($posi[0]<$gene{$id[0]}{$key}[1]+$dis && $posi[1]>$gene{$id[0]}{$key}[1]) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
82 push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+");
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
83 push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-");
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
84 next;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
85 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
86 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
87 my $result;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
88 if (!(@result)) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
89 $result="intergenic";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
90 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
91 elsif($#result==0){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
92 $result=$result[0];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
93
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
94 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
95 else{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
96 $result=join "\t",@result;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
97 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
98 # else{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
99 # my $te_num=0;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
100 # my @te_overlap;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
101 # my @te_up_down;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
102 # my @non_overlap;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
103 # my @non_up_down;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
104 # for (my $k=0;$k<@result ;$k++) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
105 # my @rr=split/\;/,$result[$k];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
106 # if ($rr[3] eq "Y") {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
107 # $te_num++;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
108 # if ($rr[0] eq "overlap") {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
109 # push @te_overlap,$result[$k];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
110 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
111 # else{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
112 # push @te_up_down,$result[$k];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
113 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
114 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
115 # else{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
116 # if ($rr[0] eq "overlap") {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
117 # push @non_overlap,$result[$k];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
118 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
119 # else{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
120 # push @non_up_down,$result[$k];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
121 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
122 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
123 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
124 # if ($te_num==0) {#non TE
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
125 # if (!(@te_overlap)) {#down up
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
126 # if ($#non_up_down==0) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
127 # $result=$non_up_down[0];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
128 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
129 # else{#overlap
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
130 # my $all_2=join "\t",@non_up_down;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
131 # $result="up&down1-kb\t".$all_2;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
132 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
133 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
134 # else{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
135 # $result=join "\t",@non_overlap;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
136 # if ($#non_overlap>=1) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
137 # print "$aline\t$result\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
138 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
139 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
140 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
141 # else{#TE
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
142 # if (!(@te_overlap)) {#down up
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
143 # if ($#te_up_down==0) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
144 # $result=$te_up_down[0];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
145 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
146 # else{#overlap
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
147 # my $all_2=join "\t",@te_up_down;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
148 # $result="up&down1-kb\t".$all_2;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
149 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
150 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
151 # else{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
152 # $result=join "\t",@te_overlap;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
153 # if ($#te_overlap>=1) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
154 # print "$aline\t$result\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
155 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
156 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
157 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
158 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
159 print OUT "$aline\t$result\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
160 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
161
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
162 close IN;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
163 close OUT;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
164 sub usage{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
165 print <<"USAGE";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
166 Version $version
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
167 Usage:
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
168 $0 -i -o -g -d
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
169 options:
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
170 -i input file
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
171 -g genelist file
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
172 -d int the length of the upstream and downstream,default 1000
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
173 -o output file
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
174 -h help
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
175 USAGE
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
176 exit(1);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
177 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
178