Mercurial > repos > big-tiandm > sirna_plant
view Annotate.pl @ 11:a0222bdfe2ac draft
Uploaded
author | big-tiandm |
---|---|
date | Wed, 29 Oct 2014 04:18:44 -0400 |
parents | 07745c0958dd |
children |
line wrap: on
line source
#!/usr/bin/perl -w #Filename: #Author: Chentt #Email: chentt@big.ac.cn #Date: 2014/4/10 #Modified: #Description: cluster annotate by priority my $version=1.00; use strict; use Getopt::Long; my %opts; GetOptions(\%opts,"i=s","d=i","g=s","o=s","t=s","h"); if (!(defined $opts{i} and defined $opts{g} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments &usage; } #my $genelistout=$opts{'t'}; my $dis=defined $opts{'d'}? $opts{'d'}:1000; my %gene; #open OUT,">$genelistout"; #output file #print OUT "#ID\tchr\tstart\tend\tstrand\ns"; open IN,"<$opts{g}"; while (my $aline=<IN>) { chomp $aline; next if($aline=~/^\#/); my @tmp=split/\t/,$aline;#ID chr start end strand #push @{$gene1{$tmp[0]}},[$tmp[2],$tmp[3],$tmp[1]]; $gene{$tmp[1]}{$tmp[0]}=[$tmp[2],$tmp[3],$tmp[4]]; } #while (my $aline=<IN>) { # chomp $aline; # next if($aline=~/^\#/); # my @tmp=split/\t/,$aline; # my $ID; # if ($tmp[2] eq "gene") { # $tmp[0]=~s/Chr\./Chr/; # $tmp[0]=~s/Chr/chr/; # my @infor=split/;/,$tmp[8]; # for (my $i=0;$i<@infor ;$i++) { # if ($infor[$i]=~/Alias\=(\S+)$/) { # $ID=$1; # last; # } # } # $gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand] # print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n"; # } #} close IN; #close OUT; my $filein=$opts{'i'}; my $fileout=$opts{'o'}; open IN,"<$filein"; #input file open OUT,">$fileout"; #output file while (my $aline=<IN>) { chomp $aline; my @tmp=split/\t/,$aline; if($aline=~/^\#/){print OUT "$aline\tP_annotate\n";next} my @result; #shift @tmp; my @id=split/:/,$tmp[0]; $id[0]=~s/Chr0/Chr/; my @posi=split/-/,$id[1]; foreach my $key (keys %{$gene{$id[0]}}) { if ($posi[0]<$gene{$id[0]}{$key}[1] && $posi[1]>$gene{$id[0]}{$key}[0]) { push @result,"gene-body;$key;$gene{$id[0]}{$key}[2]";#$te{$key}"; next; } #if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-1000) { if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-$dis) { push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+"); push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-"); next; } #if ($posi[0]<$gene{$id[0]}{$key}[1]+1000 && $posi[1]>$gene{$id[0]}{$key}[1]) { if ($posi[0]<$gene{$id[0]}{$key}[1]+$dis && $posi[1]>$gene{$id[0]}{$key}[1]) { push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+"); push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-"); next; } } my $result; if (!(@result)) { $result="intergenic"; } elsif($#result==0){ $result=$result[0]; } else{ $result=join "\t",@result; } # else{ # my $te_num=0; # my @te_overlap; # my @te_up_down; # my @non_overlap; # my @non_up_down; # for (my $k=0;$k<@result ;$k++) { # my @rr=split/\;/,$result[$k]; # if ($rr[3] eq "Y") { # $te_num++; # if ($rr[0] eq "overlap") { # push @te_overlap,$result[$k]; # } # else{ # push @te_up_down,$result[$k]; # } # } # else{ # if ($rr[0] eq "overlap") { # push @non_overlap,$result[$k]; # } # else{ # push @non_up_down,$result[$k]; # } # } # } # if ($te_num==0) {#non TE # if (!(@te_overlap)) {#down up # if ($#non_up_down==0) { # $result=$non_up_down[0]; # } # else{#overlap # my $all_2=join "\t",@non_up_down; # $result="up&down1-kb\t".$all_2; # } # } # else{ # $result=join "\t",@non_overlap; # if ($#non_overlap>=1) { # print "$aline\t$result\n"; # } # } # } # else{#TE # if (!(@te_overlap)) {#down up # if ($#te_up_down==0) { # $result=$te_up_down[0]; # } # else{#overlap # my $all_2=join "\t",@te_up_down; # $result="up&down1-kb\t".$all_2; # } # } # else{ # $result=join "\t",@te_overlap; # if ($#te_overlap>=1) { # print "$aline\t$result\n"; # } # } # } # } print OUT "$aline\t$result\n"; } close IN; close OUT; sub usage{ print <<"USAGE"; Version $version Usage: $0 -i -o -g -d options: -i input file -g genelist file -d int the length of the upstream and downstream,default 1000 -o output file -h help USAGE exit(1); }