view genephys/extractgenomicsegment.pl @ 0:448b10ffb095 draft

Uploaded
author mcharles
date Tue, 12 Aug 2014 05:49:33 -0400
parents
children
line wrap: on
line source

#!/usr/bin/perl
#V1.10
use strict;


my $inputfile1 = $ARGV[0];
my $inputfile2 = $ARGV[1];
my $WINDOW = $ARGV[2];
my $OFFSET = $ARGV[3];

if (!$WINDOW){$WINDOW = 200000;}
if (!$OFFSET){$OFFSET = 100000;}

open(IF1, $inputfile1)  or die("Can't open $inputfile1\n");
open(IF2, $inputfile2)  or die("Can't open $inputfile2\n");
my $current_annotation="";
my @list_marquer;
my %chr;
my %position;

# print "$inputfile2\n";

while (my $line=<IF1>){
	my @cols = split(/\t/,$line);
	my %current;
	# Number#Map#Name#Chr#Position#GeneAT#FunctionAT
	
	my $Number = $cols[0];
	my $Map = $cols[2];
	my $Name = $cols[7];
	my $Locus = $cols[8];
	my $Chr = $cols[19];
	my $Position = $cols[20];
	$Position =~ s/\s+//g;
	my $GeneAT=$cols[32];
	my $FunctionAT=$cols[37];
	$chr{$Name} = $Chr;
	$position{$Name} = $Position;
	
	### Modification 1.10
	if ($Locus ne $Name){ 
		$chr{$Locus} = $Chr;
		$position{$Locus} = $Position;	
	}
	###
	
	#print "$Number#$Map#$Name#$Chr#$Position#$GeneAT#$FunctionAT\n";
}
close (IF1);

# my @key = keys(%chr);
# for (my $i=0;$i<=$#key;$i++){
	# print $key[$i],"\n";
# }

while (my $line=<IF2>){
	my @cols = split (/\s+/,$line);
	for (my $i=0;$i<=$#cols;$i++){
		my $current = $cols[$i];
		chomp($current);
		if ($current !~ /^\s+$/){
			push(@list_marquer,$current);
		}
	}
}
close (IF2);

my %coord_by_chr;
for (my $i=0;$i<=$#list_marquer;$i++){
	my $current_name = $list_marquer[$i];
	my $current_chr = $chr{$current_name};
	my $current_position = $position{$current_name};
	
	if ($current_position =~ /^\d+$/){
		my @tbl_coord_for_current_chr;
		if ($coord_by_chr{$current_chr}){
			@tbl_coord_for_current_chr = @{$coord_by_chr{$current_chr}};
		}
		push(@tbl_coord_for_current_chr,$current_position);
		$coord_by_chr{$current_chr}=\@tbl_coord_for_current_chr;
	}
	elsif (($current_position eq "-")||($current_position =~/none/i)){
		
	}
	else {
		chomp($current_position);
		#$current_position =~ s/\s+//g;
		print STDERR "Error Parsing $current_name\tposition not recognized : $current_position \n";
		print $list_marquer[$i],"\n";
		#exit(0);
	}
}

# foreach my $key (keys %coord_by_chr){
	# my @tbl_coord = @{$coord_by_chr{$key}};
	# print "\n$key\n";
	# @tbl_coord = sort { $a <=> $b } @tbl_coord;
	# for (my $i=0;$i<=$#tbl_coord;$i++){
		# print $tbl_coord[$i],"\n";
	# }
# }

foreach my $key (sort keys %coord_by_chr){
	my @tbl_coord = @{$coord_by_chr{$key}};
	# print "TEST : $key\n";
	@tbl_coord = sort { $a <=> $b } @tbl_coord;
	my $current_start;
	my $current_stop;
	my $current_start_offset;
	my $current_stop_offset;
	
	
	for (my $i=0;$i<=$#tbl_coord;$i++){
		if (!$current_start){$current_start=$tbl_coord[$i];$current_stop=$tbl_coord[$i]}
		
		# print "$i : $current_start / $current_stop\n";
		if ($tbl_coord[$i]>$current_stop+$WINDOW){
			#OFFSET
			if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;}
			$current_stop_offset = $current_stop + $OFFSET;
			#######
			print $key,":",$current_start_offset,"..",$current_stop_offset,"\n";
			
			$current_start = $tbl_coord[$i];
			$current_stop = $tbl_coord[$i];

			if ($i==$#tbl_coord){				
				#OFFSET
				if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;}
				$current_stop_offset = $current_stop + $OFFSET;
				#######
				print $key,":",$current_start_offset,"..",$current_stop_offset,"\n";
			}
		}
		else {
			$current_stop=$tbl_coord[$i];
			if ($i==$#tbl_coord){
				#OFFSET
				if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;}
				$current_stop_offset = $current_stop + $OFFSET;
				#######
				print $key,":",$current_start_offset,"..",$current_stop_offset,"\n";
			}
		}
	}
}
#Traitement du dernier

# if ($#tbl_coord == 0){
	# print $key,":",$tbl_coord[$i],"\n";
# }
# else {
	# if ($i==0){
		# push (@current_table,$tbl_coord[$i]);
	# }
	# else {
		# if ($tbl_coord[$i]>$current_table[$#current_table]+$WINDOW){
			# print $key,":",$current_table[0],":",$current_table[$#current_table],"\n";
			# undef @current_table;
			# push (@current_table,$tbl_coord[$i]);
		# }
		# else {
			# push (@current_table,$tbl_coord[$i]);
		# }
	# }
# }


# print "\n";
# foreach my $key (keys %coord_by_chr){
	# print "\n$key\n";
	# @tbl_coord = sort { $a <=> $b } @tbl_coord;
	# for (my $i=0;$i<=$#tbl_coord;$i++){
		# print $tbl_coord[$i],"\n";
	# }
# }