Mercurial > repos > mcharles > rapsosnp
view rapsodyn/extractseq.pl @ 14:93e6f2af1ce2 draft
Uploaded
author | mcharles |
---|---|
date | Mon, 26 Jan 2015 18:10:52 -0500 |
parents | 0a6c1cfe4dc8 |
children |
line wrap: on
line source
#!/usr/bin/perl #V1.10 manage empty files #V1.02 Trop de pb avec nbci blast+, changment du header des fasta #V1.01 #Ajout d'un _ a la fin du nom pour eviter les problemes avec ncbi blast+ use strict; use warnings; use Getopt::Long; my $input_variant_file; my $input_assembly_file; my $WINDOWS_LENGTH = 50; GetOptions ( "input_variant_file=s" => \$input_variant_file, "input_assembly_file=s" => \$input_assembly_file, "window_length=i" => \$WINDOWS_LENGTH ) or die("Error in command line arguments\n"); open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n"); if ( -z INV){ print ">empty\nAAAAA"; exit(0); } open(INA, $input_assembly_file) or die ("Can't open $input_assembly_file\n"); my @variant_list; ### Retrieving the assembly my %genome; my $current_header=""; my $current_seq=""; while (my $ligne = <INA>){ if ($ligne =~ /^\>(.*?)\s*$/){ if ($current_header){ $genome{$current_header} = $current_seq; } $current_header=$1; $current_seq = ""; } else { if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){ $current_seq .= $1; } else { print STDERR "Erreur Parsing n°2\n$ligne\n"; } } } #TRAITEMENT DU DERNIER if ($current_header){ $genome{$current_header} = $current_seq; undef($current_seq); } close (INA); ### Retrieving the variant while (my $ligne=<INV>){ if ($ligne !~ /^\s*$/){ my %variant; my @fields = split (/\s+/,$ligne); $variant{"ref"}=$fields[0]; $variant{"position"}=$fields[1]; $variant{"baseref"}=$fields[2]; $variant{"depth"}=$fields[3]; $variant{"pileup"}=$fields[4]; my $start = &max($variant{"position"} - $WINDOWS_LENGTH,1); my $stop = &min ($variant{"position"} + $WINDOWS_LENGTH,length($genome{$variant{"ref"}})); my $length = $stop-$start+1; #print $variant{"position"}," / ",length($genome{$variant{"ref"}})," / ","$start / $stop / $length \n"; $variant{"SEQ"} = substr $genome{$variant{"ref"}},$start-1,$length; my $pileup = $variant{"pileup"}; $pileup =~ s/\$//g; #the read start at this position $pileup =~ s/\^.//g; #the read end at this position my $descriptor = $variant{"position"}."_".$variant{"depth"}."_"; if ($pileup=~/\+([0-9]+)([ACGTNacgtn]+)/){ $descriptor .="I".$1."_".$2; } elsif ($pileup=~/\-([0-9]+)([ACGTNacgtn]+)/){ $descriptor .="D".$1."_".$2; } elsif ($pileup=~/([ACGTNacgtn])/){ $descriptor.="M1"."_".$1; } else { $descriptor.="?_?"; } $variant{"desc"}=$descriptor; #print ">",$variant{"ref"},"_",$descriptor,"_","\n",$variant{"SEQ"},"\n"; #V1.02 : changement du header #print ">",$variant{"ref"},"_",$variant{"position"},"_",$variant{"depth"},"\n",$variant{"SEQ"},"\n"; print ">",$variant{"ref"},"_",$variant{"position"},"_",$variant{"depth"},"\n",$variant{"SEQ"},"\n"; push(@variant_list,\%variant); } } close (INV); #*********** sub min{ my $first = shift; my $second = shift; if ($first <= $second){ return $first; } else { return $second; } } sub max { my $first = shift; my $second = shift; if ($first >= $second){ return $first; } else { return $second; } }