Mercurial > repos > mcharles > genephys
view genephys/extractgenomicsegment.pl @ 0:448b10ffb095 draft
Uploaded
author | mcharles |
---|---|
date | Tue, 12 Aug 2014 05:49:33 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl #V1.10 use strict; my $inputfile1 = $ARGV[0]; my $inputfile2 = $ARGV[1]; my $WINDOW = $ARGV[2]; my $OFFSET = $ARGV[3]; if (!$WINDOW){$WINDOW = 200000;} if (!$OFFSET){$OFFSET = 100000;} open(IF1, $inputfile1) or die("Can't open $inputfile1\n"); open(IF2, $inputfile2) or die("Can't open $inputfile2\n"); my $current_annotation=""; my @list_marquer; my %chr; my %position; # print "$inputfile2\n"; while (my $line=<IF1>){ my @cols = split(/\t/,$line); my %current; # Number#Map#Name#Chr#Position#GeneAT#FunctionAT my $Number = $cols[0]; my $Map = $cols[2]; my $Name = $cols[7]; my $Locus = $cols[8]; my $Chr = $cols[19]; my $Position = $cols[20]; $Position =~ s/\s+//g; my $GeneAT=$cols[32]; my $FunctionAT=$cols[37]; $chr{$Name} = $Chr; $position{$Name} = $Position; ### Modification 1.10 if ($Locus ne $Name){ $chr{$Locus} = $Chr; $position{$Locus} = $Position; } ### #print "$Number#$Map#$Name#$Chr#$Position#$GeneAT#$FunctionAT\n"; } close (IF1); # my @key = keys(%chr); # for (my $i=0;$i<=$#key;$i++){ # print $key[$i],"\n"; # } while (my $line=<IF2>){ my @cols = split (/\s+/,$line); for (my $i=0;$i<=$#cols;$i++){ my $current = $cols[$i]; chomp($current); if ($current !~ /^\s+$/){ push(@list_marquer,$current); } } } close (IF2); my %coord_by_chr; for (my $i=0;$i<=$#list_marquer;$i++){ my $current_name = $list_marquer[$i]; my $current_chr = $chr{$current_name}; my $current_position = $position{$current_name}; if ($current_position =~ /^\d+$/){ my @tbl_coord_for_current_chr; if ($coord_by_chr{$current_chr}){ @tbl_coord_for_current_chr = @{$coord_by_chr{$current_chr}}; } push(@tbl_coord_for_current_chr,$current_position); $coord_by_chr{$current_chr}=\@tbl_coord_for_current_chr; } elsif (($current_position eq "-")||($current_position =~/none/i)){ } else { chomp($current_position); #$current_position =~ s/\s+//g; print STDERR "Error Parsing $current_name\tposition not recognized : $current_position \n"; print $list_marquer[$i],"\n"; #exit(0); } } # foreach my $key (keys %coord_by_chr){ # my @tbl_coord = @{$coord_by_chr{$key}}; # print "\n$key\n"; # @tbl_coord = sort { $a <=> $b } @tbl_coord; # for (my $i=0;$i<=$#tbl_coord;$i++){ # print $tbl_coord[$i],"\n"; # } # } foreach my $key (sort keys %coord_by_chr){ my @tbl_coord = @{$coord_by_chr{$key}}; # print "TEST : $key\n"; @tbl_coord = sort { $a <=> $b } @tbl_coord; my $current_start; my $current_stop; my $current_start_offset; my $current_stop_offset; for (my $i=0;$i<=$#tbl_coord;$i++){ if (!$current_start){$current_start=$tbl_coord[$i];$current_stop=$tbl_coord[$i]} # print "$i : $current_start / $current_stop\n"; if ($tbl_coord[$i]>$current_stop+$WINDOW){ #OFFSET if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;} $current_stop_offset = $current_stop + $OFFSET; ####### print $key,":",$current_start_offset,"..",$current_stop_offset,"\n"; $current_start = $tbl_coord[$i]; $current_stop = $tbl_coord[$i]; if ($i==$#tbl_coord){ #OFFSET if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;} $current_stop_offset = $current_stop + $OFFSET; ####### print $key,":",$current_start_offset,"..",$current_stop_offset,"\n"; } } else { $current_stop=$tbl_coord[$i]; if ($i==$#tbl_coord){ #OFFSET if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;} $current_stop_offset = $current_stop + $OFFSET; ####### print $key,":",$current_start_offset,"..",$current_stop_offset,"\n"; } } } } #Traitement du dernier # if ($#tbl_coord == 0){ # print $key,":",$tbl_coord[$i],"\n"; # } # else { # if ($i==0){ # push (@current_table,$tbl_coord[$i]); # } # else { # if ($tbl_coord[$i]>$current_table[$#current_table]+$WINDOW){ # print $key,":",$current_table[0],":",$current_table[$#current_table],"\n"; # undef @current_table; # push (@current_table,$tbl_coord[$i]); # } # else { # push (@current_table,$tbl_coord[$i]); # } # } # } # print "\n"; # foreach my $key (keys %coord_by_chr){ # print "\n$key\n"; # @tbl_coord = sort { $a <=> $b } @tbl_coord; # for (my $i=0;$i<=$#tbl_coord;$i++){ # print $tbl_coord[$i],"\n"; # } # }