annotate VCF2Hapmap/VCF2FastaAndHapmap.pl @ 13:734a3572c1d6 draft

Uploaded
author dereeper
date Tue, 08 Jan 2019 08:45:34 -0500
parents 420b57c3c185
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
2 #!/usr/bin/perl
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
4 use strict;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
5 use Getopt::Long;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
6
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
7 my $usage = qq~Usage:$0 <args> [<opts>]
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
8
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
9 where <args> are:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
10
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
11 -v, --vcf <VCF input>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
12 -o, --out <Output basename>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
13
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
14 <opts> are:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
15
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
16 -r, --reference <Reference fasta file>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
17 -g, --gff <GFF input file to create alignments of genes>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
18 ~;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
19 $usage .= "\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
20
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
21 my ($input,$out,$reference,$gff);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
22
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
23
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
24
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
25 GetOptions(
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
26 "vcf=s" => \$input,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
27 "out=s" => \$out,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
28 "reference=s" => \$reference,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
29 "gff=s" => \$gff
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
30 );
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
31
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
32
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
33 die $usage
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
34 if ( !$input || !$out);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
35
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
36 if ($gff && !$reference)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
37 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
38 die "You must provide a Fasta reference file when providing GFF annotation\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
39 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
40
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
41
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
42 my %ref_sequences;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
43 if ($reference)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
44 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
45 my $id;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
46 my $sequence = "";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
47 open(my $R,$reference) or die "cannot open file: $reference";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
48 while(<$R>)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
49 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
50 my $line =$_;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
51 $line =~s/\n//g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
52 $line =~s/\r//g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
53 if ($line =~ />([^\s]+)/){
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
54 $ref_sequences{$id} = $sequence;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
55 $id=$1;$sequence="";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
56 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
57 else
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
58 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
59 $sequence .= $line;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
60 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
61 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
62 close($R);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
63 $ref_sequences{$id} = $sequence;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
64 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
65
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
66
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
67 my %chr_of_gene;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
68 my %ann;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
69 if ($gff)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
70 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
71 open(my $G,$gff) or die "cannot open file: $gff";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
72 while(<$G>)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
73 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
74 my $line =$_;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
75 $line =~s/\n//g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
76 $line =~s/\r//g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
77 my @i = split(/\t/,$line);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
78 my $chr = $i[0];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
79 my $feature = $i[2];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
80 my $strand = $i[6];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
81 my $start = $i[3];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
82 my $stop = $i[4];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
83 my $inf = $i[8];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
84 if ($feature eq 'gene')
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
85 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
86 if ($inf =~/Name=([\w\-\.]+)[;\s]*/){$inf = $1;}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
87 $ann{$inf}{"start"}=$start;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
88 $ann{$inf}{"stop"}=$stop;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
89 $ann{$inf}{"strand"}=$strand;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
90 $chr_of_gene{$inf} = $chr;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
91 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
92 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
93 close($G);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
94 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
95
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
96
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
97
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
98 my %IUPAC =
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
99 (
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
100 '[A/G]'=> "R",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
101 '[G/A]'=> "R",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
102 '[C/T]'=> "Y",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
103 '[T/C]'=> "Y",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
104 '[T/G]'=> "K",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
105 '[G/T]'=> "K",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
106 '[C/G]'=> "S",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
107 '[G/C]'=> "S",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
108 '[A/T]'=> "W",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
109 '[T/A]'=> "W",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
110 '[A/C]'=> "M",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
111 '[C/A]'=> "M",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
112 '[C/A/T]'=> "H",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
113 '[A/T/C]'=> "H",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
114 '[A/C/T]'=> "H",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
115 '[C/T/A]'=> "H",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
116 '[T/C/A]'=> "H",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
117 '[T/A/C]'=> "H",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
118 '[C/A/G]'=> "V",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
119 '[A/G/C]'=> "V",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
120 '[A/C/G]'=> "V",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
121 '[C/G/A]'=> "V",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
122 '[G/C/A]'=> "V",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
123 '[G/A/C]'=> "V",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
124 '[C/T/G]'=> "B",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
125 '[T/G/C]'=> "B",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
126 '[T/C/G]'=> "B",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
127 '[C/G/T]'=> "B",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
128 '[G/C/T]'=> "B",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
129 '[G/T/C]'=> "B",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
130 '[T/A/G]'=> "D",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
131 '[A/G/T]'=> "D",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
132 '[A/T/G]'=> "D",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
133 '[T/G/A]'=> "D",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
134 '[G/T/A]'=> "D",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
135 '[G/A/T]'=> "D",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
136 );
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
137
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
138 my %snps_of_gene;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
139 my %snps_of_gene2;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
140 my %indiv_order;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
141 my $indiv_list;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
142 my %genotyping_infos;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
143 my $num_line = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
144 my $genename_rank_in_snpeff = 4;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
145
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
146 my $find_annotations = `grep -c 'EFF=' $input`;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
147
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
148 open(my $HAPMAP,">$out.hapmap");
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
149 print $HAPMAP "rs# alleles chrom pos gene feature effect codon_change amino_acid_change MAF missing_data";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
150 open(my $VCF,$input);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
151 while(<$VCF>)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
152 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
153 my $line = $_;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
154 chomp($line);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
155 my @infos = split(/\t/,$line);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
156
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
157 if (/^##INFO=\<ID=EFF/ && /Amino_Acid_length \| Gene_Name \| Transcript_BioType \| Gene_Coding/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
158 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
159 $genename_rank_in_snpeff = 8;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
160 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
161
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
162 if (scalar @infos > 9)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
163 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
164 if (/#CHROM/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
165 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
166 for (my $j=9;$j<=$#infos;$j++)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
167 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
168 my $individu = $infos[$j];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
169 $indiv_list .= " $individu";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
170 $indiv_order{$j} = $individu;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
171 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
172 print $HAPMAP "$indiv_list\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
173 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
174 elsif (!/^#/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
175 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
176 $num_line++;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
177
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
178 my $chromosome = $infos[0];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
179 my $chromosome_position = $infos[1];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
180 my $ref_allele = $infos[3];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
181 my $alt_allele = $infos[4];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
182
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
183 if ($ref_allele =~/\w\w+/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
184 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
185 $ref_allele = "A";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
186 $alt_allele = "T";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
187 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
188 elsif ($alt_allele =~/\w\w+/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
189 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
190 $ref_allele = "T";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
191 $alt_allele = "A";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
192 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
193
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
194 my $info = $infos[7];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
195 my $is_in_exon = "#";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
196 my $is_synonyme = "#";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
197 my $gene;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
198 if ($find_annotations > 1)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
199 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
200 $gene = "intergenic";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
201 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
202 else
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
203 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
204 $gene = $chromosome;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
205 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
206 my $modif_codon = "#";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
207 my $modif_aa = "#";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
208 my $geneposition;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
209 if ($info =~/EFF=(.*)/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
210 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
211 my @annotations = split(",",$1);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
212 foreach my $annotation(@annotations)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
213 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
214 my ($syn, $additional) = split(/\(/,$annotation);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
215
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
216 if ($syn =~/STREAM/){next;}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
217
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
218 $is_in_exon = "exon";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
219 if ($syn =~/UTR/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
220 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
221 $is_in_exon = $syn;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
222 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
223 else
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
224 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
225 $is_synonyme = $syn;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
226 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
227 my @infos_additional = split(/\|/,$additional);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
228 $gene = $infos_additional[$genename_rank_in_snpeff];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
229 $modif_codon = $infos_additional[2];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
230 $modif_aa = $infos_additional[3];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
231
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
232 if ($syn =~/INTERGENIC/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
233 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
234 $is_synonyme = "#";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
235 $gene = "intergenic";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
236 $is_in_exon = "#";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
237 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
238 elsif ($syn =~/SYNONYM/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
239 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
240 $is_in_exon = "exon";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
241 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
242 elsif ($syn =~/INTRON/ or $syn =~/SPLICE_SITE_DONOR/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
243 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
244 $is_in_exon = "intron";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
245 $is_synonyme = "#";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
246 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
247
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
248
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
249 if ($modif_aa =~/(\w)(\d+)$/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
250 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
251 $modif_aa = "$1/$1";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
252 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
253 elsif ($modif_aa =~/(\w)(\d+)(\w)/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
254 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
255 $modif_aa = "$1/$3";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
256 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
257 if ($infos_additional[8] =~/Exon/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
258 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
259 $is_in_exon = "exon";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
260 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
261
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
262 if (!$modif_aa){$modif_aa="#";}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
263 if (!$modif_codon){$modif_codon="#";}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
264 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
265 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
266 $gene =~s/\.\d//g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
267 if ($ann{$gene}{"start"})
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
268 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
269 my $strand = $ann{$gene}{"strand"};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
270 if ($strand eq '-')
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
271 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
272 $geneposition = $ann{$gene}{"stop"} - $chromosome_position;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
273 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
274 else
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
275 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
276 $geneposition = $chromosome_position - $ann{$gene}{"start"};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
277 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
278 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
279 #if ($info =~/GenePos=(\d+);/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
280 #{
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
281 # $geneposition = $1;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
282 #}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
283 my $ratio_missing_data;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
284 my $snp_frequency;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
285 my $genotyping = "";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
286
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
287 if (2 > 1)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
288 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
289
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
290 $genotyping_infos{"ref"} = "$ref_allele$ref_allele";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
291
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
292 my %alleles_found;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
293 my $nb_readable_ind = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
294 for (my $i = 9;$i <= $#infos;$i++)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
295 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
296 my $dnasample = $indiv_order{$i};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
297 my @infos_alleles = split(":",$infos[$i]);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
298 my $genotype = $infos_alleles[0];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
299 $genotype =~s/0/$ref_allele/g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
300
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
301 if ($alt_allele =~/,/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
302 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
303 my @alt_alleles = split(",",$alt_allele);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
304 my $num_all = 1;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
305 foreach my $alt_al(@alt_alleles)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
306 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
307 $genotype =~s/$num_all/$alt_al/g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
308 $num_all++;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
309 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
310 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
311 else
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
312 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
313 $genotype =~s/1/$alt_allele/g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
314 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
315 if ($genotype eq '.'){$genotype = "./.";}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
316 $genotype =~s/\./N/g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
317 if ($genotype !~/N\/N/)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
318 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
319 $nb_readable_ind++;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
320 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
321 my @alleles;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
322 if ($genotype =~/\//)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
323 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
324 @alleles = split(/\//,$genotype);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
325 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
326 else
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
327 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
328 @alleles = split(/\|/,$genotype);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
329 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
330 $genotyping .= join("",@alleles) . " ";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
331 $genotyping_infos{$dnasample} = join("",@alleles);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
332
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
333 foreach my $al(@alleles)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
334 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
335 if ($al ne 'N'){$alleles_found{$al}++;}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
336 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
337 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
338 chop($genotyping);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
339
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
340 $snp_frequency = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
341
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
342 my $max = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
343 my $min = 10000000;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
344 my $total = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
345 foreach my $al(keys(%alleles_found))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
346 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
347 my $nb = $alleles_found{$al};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
348 $total+= $nb;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
349 if ($nb > $max)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
350 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
351 $max = $nb;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
352 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
353 if ($nb < $min)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
354 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
355 $min = $nb;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
356 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
357 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
358 if ($total > 0)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
359 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
360 $snp_frequency = sprintf("%.1f",($min/$total)*100);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
361 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
362
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
363 $ratio_missing_data = 100 - ($nb_readable_ind / ($#infos - 8)) * 100;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
364 $ratio_missing_data = sprintf("%.1f",$ratio_missing_data);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
365
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
366 foreach my $dna(keys(%genotyping_infos))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
367 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
368 $snps_of_gene{$gene}{$geneposition}{$dna} = $genotyping_infos{$dna};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
369 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
370 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
371 my $snp_type = "[$ref_allele/$alt_allele]";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
372 $snps_of_gene2{$chromosome}{$chromosome_position} = $snp_type;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
373 #print $HAPMAP "$chromosome:$chromosome_position\t$snp_type\t$chromosome\t$chromosome_position\t$gene:$geneposition\t$is_in_exon\t$is_synonyme\t$modif_codon\t$modif_aa\t$snp_frequency%\t$nb_readable_ind\t$genotyping\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
374 print $HAPMAP "$chromosome:$chromosome_position\t$ref_allele/$alt_allele\t$chromosome\t$chromosome_position\t$gene:$geneposition\t$is_in_exon\t$is_synonyme\t$modif_codon\t$modif_aa\t$snp_frequency%\t$ratio_missing_data%\t$genotyping\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
375 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
376 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
377 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
378 close($VCF);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
379 close($HAPMAP);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
380
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
381
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
382 if (!$reference){exit;}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
383
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
384 #################################################################
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
385 # generate flanking sequences for Illumina VeraCode technology
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
386 #################################################################
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
387 open(my $FLANKING,">$out.flanking.txt");
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
388 foreach my $seq(keys(%ref_sequences))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
389 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
390 if ($snps_of_gene2{$seq})
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
391 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
392 my $refhash = $snps_of_gene2{$seq};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
393 my %hashreal = %$refhash;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
394
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
395 # create consensus
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
396 my $refseq = $ref_sequences{$seq};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
397 my $consensus = "";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
398 my $previous = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
399 foreach my $pos(sort {$a<=>$b} keys(%hashreal))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
400 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
401 my $length = $pos - $previous - 1;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
402 $consensus .= substr($refseq,$previous,$length);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
403 my $iupac_code = $IUPAC{$snps_of_gene2{$seq}{$pos}};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
404 $consensus .= $iupac_code;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
405 $previous = $pos;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
406 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
407 my $length = length($refseq) - $previous;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
408 $consensus .= substr($refseq,$previous,$length);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
409
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
410 foreach my $pos(sort {$a<=>$b} keys(%hashreal))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
411 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
412 my $snp_name = "$seq-$pos";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
413 my $flanking_length = 60;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
414 my $length = $flanking_length;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
415 my $start = $pos - $flanking_length - 1;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
416 if ($pos <= $flanking_length)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
417 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
418 $length = $pos - 1;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
419 $start = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
420 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
421 my $sequence = substr($consensus,$start,$length);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
422
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
423 $sequence .= $snps_of_gene2{$seq}{$pos};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
424 $sequence .= substr($consensus,$pos,$flanking_length);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
425 print $FLANKING "$snp_name,$sequence,0,0,0,Project_name,0,diploid,Other,Forward\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
426 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
427 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
428 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
429
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
430 close($FLANKING);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
431
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
432 if (!$gff){exit;}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
433
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
434 my @individuals_list = split(/\t/,$indiv_list);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
435 if ((scalar @individuals_list * scalar keys(%snps_of_gene)) > 800000)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
436 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
437 print "Sorry, too many sequences to manage ...\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
438 exit;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
439 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
440
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
441 open(my $ALIGN_EGGLIB,">$out.gene_alignment.fas");
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
442 my %alignments_ind;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
443 foreach my $seq(keys(%snps_of_gene))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
444 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
445 if ($snps_of_gene{$seq})
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
446 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
447 my $refhash = $snps_of_gene{$seq};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
448 my %hashreal = %$refhash;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
449
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
450 # get flanking sequences
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
451 my %flanking5;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
452 my $start = $ann{$seq}{"start"};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
453 my $stop = $ann{$seq}{"stop"};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
454 my $strand = $ann{$seq}{"strand"};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
455 my $genelength = $stop - $start+1;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
456 my $chr = $chr_of_gene{$seq};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
457 my $refseq = substr($ref_sequences{$chr},$start-1,$genelength);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
458 if ($strand eq '-')
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
459 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
460 $refseq =~ tr /atcgATCG/tagcTAGC/; $refseq = reverse($refseq);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
461 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
462 #print "$seq $chr $start $stop $refseq \n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
463 my $previous = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
464 foreach my $pos(sort {$a<=>$b} keys(%hashreal))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
465 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
466 my $length = $pos - $previous - 1;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
467 $flanking5{$pos} = substr($refseq,$previous,$length);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
468 $previous = $pos;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
469 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
470 my $length = length($refseq) - $previous;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
471 my $flanking3 = substr($refseq,$previous,$length);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
472 foreach my $ind(@individuals_list)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
473 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
474 my $nb_missing_data_for_this_individual = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
475 if ($ind)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
476 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
477 my $alignment_for_ind = "";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
478 my $seq_without_underscore = $seq;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
479 $seq_without_underscore =~s/_//g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
480 $alignment_for_ind .= ">$seq_without_underscore" . "_$ind" . "_1\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
481 foreach my $pos(sort {$a<=>$b} keys(%hashreal))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
482 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
483 $alignment_for_ind .= $flanking5{$pos};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
484 my $geno = $snps_of_gene{$seq}{$pos}{$ind};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
485 $geno =~s/N/?/g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
486 if ($geno =~/\?/){$nb_missing_data_for_this_individual++;}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
487 my @alleles = split("",$geno);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
488 $alignment_for_ind .= $alleles[0];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
489 if ($alleles[0] eq $alleles[1])
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
490 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
491 $alignments_ind{$ind} .= $alleles[1];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
492 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
493 else
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
494 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
495 my $snp_type = "[" . $alleles[0] . "/" . $alleles[1] . "]";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
496 $alignments_ind{$ind} .= $IUPAC{$snp_type};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
497 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
498 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
499 $alignment_for_ind .= $flanking3;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
500 $alignment_for_ind .= "\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
501
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
502
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
503 $alignment_for_ind .= ">$seq_without_underscore" . "_$ind" . "_2\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
504 foreach my $pos(sort {$a<=>$b} keys(%hashreal))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
505 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
506 $alignment_for_ind .= $flanking5{$pos};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
507 my $geno = $snps_of_gene{$seq}{$pos}{$ind};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
508 $geno =~s/N/?/g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
509 my @alleles = split("",$geno);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
510 $alignment_for_ind .= $alleles[1];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
511 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
512 $alignment_for_ind .= $flanking3;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
513 $alignment_for_ind .= "\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
514 if (keys(%hashreal) != $nb_missing_data_for_this_individual)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
515 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
516 print $ALIGN_EGGLIB $alignment_for_ind;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
517 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
518 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
519 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
520 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
521 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
522 close($ALIGN_EGGLIB);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
523
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
524
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
525