| 0 | 1 #!/usr/bin/perl | 
| 10 | 2 #V1.10 manage empty files | 
|  | 3 #V1.02 Trop de pb avec nbci blast+, changment du header des fasta | 
| 7 | 4 #V1.01 #Ajout d'un _ a la fin du nom pour eviter les problemes avec ncbi blast+ | 
| 0 | 5 | 
|  | 6 use strict; | 
|  | 7 use warnings; | 
|  | 8 use Getopt::Long; | 
|  | 9 | 
|  | 10 my $input_variant_file; | 
|  | 11 my $input_assembly_file; | 
|  | 12 my $WINDOWS_LENGTH = 50; | 
|  | 13 | 
|  | 14 GetOptions ( | 
|  | 15 "input_variant_file=s" => \$input_variant_file, | 
|  | 16 "input_assembly_file=s" => \$input_assembly_file, | 
|  | 17 "window_length=i" => \$WINDOWS_LENGTH | 
|  | 18 ) or die("Error in command line arguments\n"); | 
|  | 19 | 
|  | 20 open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n"); | 
| 10 | 21 | 
|  | 22 if ( -z INV){ | 
|  | 23 	print ">empty\nAAAAA"; | 
|  | 24 	exit(0); | 
|  | 25 } | 
|  | 26 | 
| 0 | 27 open(INA, $input_assembly_file) or die ("Can't open $input_assembly_file\n"); | 
|  | 28 | 
|  | 29 my @variant_list; | 
|  | 30 | 
|  | 31 | 
|  | 32 ### Retrieving the assembly | 
|  | 33 my %genome; | 
|  | 34 | 
|  | 35 my $current_header=""; | 
|  | 36 my $current_seq=""; | 
|  | 37 while (my $ligne = <INA>){ | 
|  | 38 	if ($ligne =~ /^\>(.*?)\s*$/){ | 
|  | 39 		if ($current_header){ | 
|  | 40 			$genome{$current_header} = $current_seq; | 
|  | 41 		} | 
|  | 42 		$current_header=$1; | 
|  | 43 		$current_seq = ""; | 
|  | 44 	} | 
|  | 45 	else { | 
|  | 46 		if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){ | 
|  | 47 			$current_seq .= $1; | 
|  | 48 		} | 
|  | 49 		else { | 
|  | 50 			print STDERR "Erreur Parsing n°2\n$ligne\n"; | 
|  | 51 		} | 
|  | 52 	} | 
|  | 53 } | 
|  | 54 #TRAITEMENT DU DERNIER | 
|  | 55 if ($current_header){ | 
|  | 56 	$genome{$current_header} = $current_seq; | 
|  | 57 	undef($current_seq); | 
|  | 58 } | 
|  | 59 close (INA); | 
|  | 60 | 
|  | 61 | 
|  | 62 ### Retrieving the variant | 
|  | 63 while (my $ligne=<INV>){ | 
|  | 64 	if ($ligne !~ /^\s*$/){ | 
|  | 65 		my %variant; | 
|  | 66 		my @fields = split (/\s+/,$ligne); | 
|  | 67 		$variant{"ref"}=$fields[0]; | 
|  | 68 		$variant{"position"}=$fields[1]; | 
|  | 69 		$variant{"baseref"}=$fields[2]; | 
|  | 70 		$variant{"depth"}=$fields[3]; | 
|  | 71 		$variant{"pileup"}=$fields[4]; | 
|  | 72 | 
|  | 73 | 
|  | 74 		my $start = &max($variant{"position"} - $WINDOWS_LENGTH,1); | 
|  | 75 		my $stop = &min ($variant{"position"} + $WINDOWS_LENGTH,length($genome{$variant{"ref"}})); | 
|  | 76 		my $length = $stop-$start+1; | 
|  | 77 | 
|  | 78 		#print $variant{"position"}," / ",length($genome{$variant{"ref"}})," / ","$start / $stop / $length \n"; | 
|  | 79 | 
|  | 80 		$variant{"SEQ"} = substr $genome{$variant{"ref"}},$start-1,$length; | 
|  | 81 | 
|  | 82 		my $pileup = $variant{"pileup"}; | 
|  | 83 		$pileup =~ s/\$//g; #the read start at this position | 
|  | 84 		$pileup =~ s/\^.//g; #the read end at this position | 
|  | 85 		my $descriptor = $variant{"position"}."_".$variant{"depth"}."_"; | 
|  | 86 		if ($pileup=~/\+([0-9]+)([ACGTNacgtn]+)/){ | 
|  | 87 			$descriptor .="I".$1."_".$2; | 
|  | 88 		} | 
|  | 89 		elsif ($pileup=~/\-([0-9]+)([ACGTNacgtn]+)/){ | 
|  | 90 			$descriptor .="D".$1."_".$2; | 
|  | 91 		} | 
|  | 92 		elsif ($pileup=~/([ACGTNacgtn])/){ | 
|  | 93 			$descriptor.="M1"."_".$1; | 
|  | 94 		} | 
|  | 95 		else { | 
|  | 96 			$descriptor.="?_?"; | 
|  | 97 		} | 
|  | 98 		$variant{"desc"}=$descriptor; | 
|  | 99 | 
| 10 | 100 		#print ">",$variant{"ref"},"_",$descriptor,"_","\n",$variant{"SEQ"},"\n"; | 
| 0 | 101 | 
|  | 102 | 
|  | 103 | 
| 10 | 104 		#V1.02 : changement du header | 
| 0 | 105 		#print ">",$variant{"ref"},"_",$variant{"position"},"_",$variant{"depth"},"\n",$variant{"SEQ"},"\n"; | 
| 10 | 106 		print ">",$variant{"ref"},"_",$variant{"position"},"_",$variant{"depth"},"\n",$variant{"SEQ"},"\n"; | 
| 0 | 107 | 
|  | 108 		push(@variant_list,\%variant); | 
|  | 109 	} | 
|  | 110 } | 
|  | 111 close (INV); | 
|  | 112 | 
|  | 113 | 
|  | 114 | 
|  | 115 | 
|  | 116 | 
|  | 117 | 
|  | 118 | 
|  | 119 | 
|  | 120 | 
|  | 121 #*********** | 
|  | 122 sub min{ | 
|  | 123 	my $first = shift; | 
|  | 124 	my $second = shift; | 
|  | 125 	if ($first <= $second){ | 
|  | 126 		return $first; | 
|  | 127 	} | 
|  | 128 	else { | 
|  | 129 		return $second; | 
|  | 130 	} | 
|  | 131 } | 
|  | 132 | 
|  | 133 sub max { | 
|  | 134 	my $first = shift; | 
|  | 135 	my $second = shift; | 
|  | 136 	if ($first >= $second){ | 
|  | 137 		return $first; | 
|  | 138 	} | 
|  | 139 	else { | 
|  | 140 		return $second; | 
|  | 141 	} | 
| 6 | 142 } |