annotate rapsodyn/extractseq.pl @ 1:7f36bd129321 draft

Uploaded
author mcharles
date Wed, 10 Sep 2014 10:24:01 -0400
parents 442a7c88b886
children 1776b8ddd87e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
2 #V1.10
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
3
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
4 use strict;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
5 use warnings;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
6 use Getopt::Long;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
7
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
8 my $input_variant_file;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
9 my $input_assembly_file;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
10 my $WINDOWS_LENGTH = 50;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
11
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
12 GetOptions (
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
13 "input_variant_file=s" => \$input_variant_file,
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
14 "input_assembly_file=s" => \$input_assembly_file,
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
15 "window_length=i" => \$WINDOWS_LENGTH
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
16 ) or die("Error in command line arguments\n");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
17
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
18 open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
19 open(INA, $input_assembly_file) or die ("Can't open $input_assembly_file\n");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
20
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
21 my @variant_list;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
22
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
23
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
24 ### Retrieving the assembly
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
25 my %genome;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
26
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
27 my $current_header="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
28 my $current_seq="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
29 while (my $ligne = <INA>){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
30 if ($ligne =~ /^\>(.*?)\s*$/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
31 if ($current_header){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
32 $genome{$current_header} = $current_seq;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
33 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
34 $current_header=$1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
35 $current_seq = "";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
36 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
37 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
38 if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
39 $current_seq .= $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
40 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
41 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
42 print STDERR "Erreur Parsing n°2\n$ligne\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
43 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
44 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
45 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
46 #TRAITEMENT DU DERNIER
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
47 if ($current_header){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
48 $genome{$current_header} = $current_seq;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
49 undef($current_seq);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
50 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
51 close (INA);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
52
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
53
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
54 ### Retrieving the variant
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
55 while (my $ligne=<INV>){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
56 if ($ligne !~ /^\s*$/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
57 my %variant;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
58 my @fields = split (/\s+/,$ligne);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
59 $variant{"ref"}=$fields[0];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
60 $variant{"position"}=$fields[1];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
61 $variant{"baseref"}=$fields[2];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
62 $variant{"depth"}=$fields[3];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
63 $variant{"pileup"}=$fields[4];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
64
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
65
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
66 my $start = &max($variant{"position"} - $WINDOWS_LENGTH,1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
67 my $stop = &min ($variant{"position"} + $WINDOWS_LENGTH,length($genome{$variant{"ref"}}));
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
68 my $length = $stop-$start+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
69
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
70 #print $variant{"position"}," / ",length($genome{$variant{"ref"}})," / ","$start / $stop / $length \n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
71
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
72 $variant{"SEQ"} = substr $genome{$variant{"ref"}},$start-1,$length;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
73
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
74 my $pileup = $variant{"pileup"};
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
75 $pileup =~ s/\$//g; #the read start at this position
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
76 $pileup =~ s/\^.//g; #the read end at this position
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
77 my $descriptor = $variant{"position"}."_".$variant{"depth"}."_";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
78 if ($pileup=~/\+([0-9]+)([ACGTNacgtn]+)/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
79 $descriptor .="I".$1."_".$2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
80 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
81 elsif ($pileup=~/\-([0-9]+)([ACGTNacgtn]+)/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
82 $descriptor .="D".$1."_".$2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
83 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
84 elsif ($pileup=~/([ACGTNacgtn])/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
85 $descriptor.="M1"."_".$1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
86 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
87 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
88 $descriptor.="?_?";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
89 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
90 $variant{"desc"}=$descriptor;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
91
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
92 print ">",$variant{"ref"},"_",$descriptor,"\n",$variant{"SEQ"},"\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
93
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
94
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
95
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
96 #print ">",$variant{"ref"},"_",$variant{"position"},"_",$variant{"depth"},"\n",$variant{"SEQ"},"\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
97
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
98 push(@variant_list,\%variant);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
99 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
100 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
101 close (INV);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
102
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
103
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
104
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
105
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
106
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
107
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
108
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
109
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
110
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
111 #***********
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
112 sub min{
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
113 my $first = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
114 my $second = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
115 if ($first <= $second){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
116 return $first;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
117 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
118 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
119 return $second;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
120 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
121 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
122
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
123 sub max {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
124 my $first = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
125 my $second = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
126 if ($first >= $second){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
127 return $first;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
128 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
129 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
130 return $second;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
131 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
132 }