view genephys/extractgenomicsequencefromsegment.pl @ 0:448b10ffb095 draft

Uploaded
author mcharles
date Tue, 12 Aug 2014 05:49:33 -0400
parents
children
line wrap: on
line source

#!/usr/bin/perl
#V1.10
my $inputsegment = $ARGV[0];
my $inputfasta = $ARGV[1];

open(IS, $inputsegment) or die ("Can't open $inputsegment\n");
open(IF, $inputfasta) or die ("Can't open $inputfasta\n");


my @header;
my @start;
my @end;
my @segment_header;

while (my $ligne = <IS>){
	if ($ligne=~/(.*?):(\d+)\.+(\d+)/){
		push (@header,$1);
		push (@start,$2);
		push (@end,$3);
		push (@segment_header,$1.":".$2."..".$3);
	}
}

close (IS);

#print "TEST : $#header\n";

my %genome;

my $current_header;
my $current_seq="";
while (my $ligne = <IF>){
	if ($ligne =~ /^\>(.*?)\s*$/){
		if ($current_header){
			$genome{$current_header} = $current_seq;
		}
		
		# my $length = length($current_seq);
		# print "TEST : $current_header\t$length\n";
		# print "TEST : $current_header\n";
		$current_header=$1;
		$current_seq = "";
		$current_position=0;
	}
	else {
		if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){
			$current_seq .= $1;
		}
		else {
			print STDERR "Erreur Parsing n°1\n$ligne\n";
		}
	}
}

#TRAITEMENT DU DERNIER
if ($current_header){
	$genome{$current_header} = $current_seq;
	undef($current_seq);
}

# foreach my $key (keys %genome){
	# print $key,"\t",length($genome{$key}),"\n";
# }

for (my $i=0;$i<=$#header;$i++){
	my $compt=0;
	my $current_seq="";
	print ">",$header[$i],":",$start[$i],"..",$end[$i],"\n";
	### Modification 1.10
	if ($end[$i]>length($genome{$header[$i]})){
		$end[$i] = length($genome{$header[$i]});
	}
	###

	my @SEQ = split(//,$genome{$header[$i]});
	for (my $coord = $start[$i]-1; $coord<=$end[$i]-1;$coord++){
		$compt++;
		# print "TEST : $compt\n";
		if ($compt > 60 ){
			$current_seq .= "\n";
			$compt=1;
		}
		$current_seq .= $SEQ[$coord];
		
	}
	print "$current_seq\n";
}

close (IF);