Mercurial > repos > dereeper > sniplay
annotate GetHaplotypesFromPhasedVCF/GetHaplotypesFromPhasedVCF.pl @ 15:31c23d943c29 draft default tip
Uploaded
author | dereeper |
---|---|
date | Tue, 08 Jan 2019 08:47:56 -0500 |
parents | 88748d846a20 |
children |
rev | line source |
---|---|
6 | 1 #!/usr/bin/perl |
2 | |
3 use strict; | |
4 | |
5 my $vcf = $ARGV[0]; | |
6 my $out = $ARGV[1]; | |
7 | |
8 open(O1,">$out.haplo.fas"); | |
9 open(O2,">$out.distinct_haplotypes.txt"); | |
12
88748d846a20
planemo upload commit 11382afe87364aaafb19973470d5066229a6e34f
dereeper
parents:
6
diff
changeset
|
10 open(O3,">$out.distinct_haplotypes.fa"); |
6 | 11 |
12 my %indiv; | |
13 my %genes; | |
14 my $nb_cols = 0; | |
15 my %phasing; | |
16 open(my $V,$vcf); | |
17 while(<$V>){ | |
18 my $line = $_; | |
19 $line =~s/\n//g; | |
20 $line =~s/\r//g; | |
21 my @infos = split(/\t/,$line); | |
22 if (/#CHROM/){ | |
23 for (my $i = 9; $i <= $#infos; $i++){ | |
24 $indiv{$i} = $infos[$i]; | |
25 } | |
26 } | |
27 else{ | |
28 my $gene = $infos[0]; | |
29 my $ref = $infos[3]; | |
30 my $alt = $infos[4]; | |
31 $nb_cols = $#infos; | |
32 for (my $i = 9; $i <= $#infos; $i++){ | |
33 my ($alleles,$depth) = split(":",$infos[$i]); | |
34 $alleles =~s/0/$ref/g; | |
35 $alleles =~s/1/$alt/g; | |
36 my $ind = $indiv{$i}; | |
37 #if ($alleles =~/\// && $genes{$gene}){next;} | |
38 $phasing{$gene}{$i}.= $alleles ." "; | |
39 } | |
40 $genes{$gene} = 1; | |
41 } | |
42 } | |
43 close($V); | |
44 | |
45 my %haplos; | |
46 my $gene_ok = 0; | |
47 my $gene_shared = 0; | |
48 my $gene_all_shared = 0; | |
49 my $nb_gene = 0; | |
50 my %haplotypes2; | |
51 foreach my $gene(keys(%phasing)) | |
52 { | |
53 my $list = ""; | |
54 my $ok = 0; | |
55 my %haplotypes; | |
56 for (my $i=9;$i <= $nb_cols;$i++){ | |
57 | |
58 my $alleles = $phasing{$gene}{$i}; | |
59 if ($alleles eq "" or $alleles =~ /\./ or $alleles =~/ \w+\//){next;} | |
60 my @snps = split(" ",$alleles); | |
61 if (scalar @snps < 2){next;} | |
62 $alleles =~s/\//\|/g; | |
63 my @al = split(" ",$alleles); | |
64 my $haplo1 = ""; | |
65 my $haplo2 = ""; | |
66 foreach my $a(@al){ | |
67 my ($a1,$a2) = split(/\|/,$a); | |
68 $haplo1.= $a1; | |
69 $haplo2.= $a2; | |
70 } | |
71 $haplotypes{$haplo1}++; | |
72 $haplotypes{$haplo2}++; | |
73 my $ind = $indiv{$i}; | |
74 $haplotypes2{$gene}{$haplo1}.=$ind."_1,"; | |
75 $haplotypes2{$gene}{$haplo2}.=$ind."_2,"; | |
76 $haplos{$gene}{$haplo1}++; | |
77 $haplos{$gene}{$haplo2}++; | |
78 $list .= ">".$gene."_".$ind."_1\n$haplo1\n"; | |
79 $list .= ">".$gene."_".$ind."_2\n$haplo2\n"; | |
80 $ok++; | |
81 } | |
82 #print "$gene $nb_cols $ok\n"; | |
83 #if ($ok > 13 && $found_M1_M2 == 2){ | |
84 if ($ok == ($nb_cols - 8)){ | |
85 $nb_gene++; | |
86 print O1 $list; | |
87 } | |
88 } | |
89 foreach my $gene(sort {$a<=>$b} keys(%haplos)){ | |
90 print O2 "===$gene===\n"; | |
91 my $ref_hash = $haplos{$gene}; | |
92 my %hash = %$ref_hash; | |
93 my $num_haplo = 0; | |
94 foreach my $haplo(keys(%hash)){ | |
95 $num_haplo++; | |
96 my $haplo_name = "haplo".$num_haplo; | |
97 my $nb = $haplos{$gene}{$haplo}; | |
98 my $ind = $haplotypes2{$gene}{$haplo}; | |
99 print O2 $haplo_name.":$nb:".$haplotypes2{$gene}{$haplo}."\n".$haplo."\n"; | |
12
88748d846a20
planemo upload commit 11382afe87364aaafb19973470d5066229a6e34f
dereeper
parents:
6
diff
changeset
|
100 if ($nb >= 1){ |
88748d846a20
planemo upload commit 11382afe87364aaafb19973470d5066229a6e34f
dereeper
parents:
6
diff
changeset
|
101 print O3 ">".$haplo_name."|$nb\n"; |
88748d846a20
planemo upload commit 11382afe87364aaafb19973470d5066229a6e34f
dereeper
parents:
6
diff
changeset
|
102 print O3 $haplo."\n"; |
6 | 103 } |
104 } | |
105 } | |
106 | |
107 close(O1); | |
12
88748d846a20
planemo upload commit 11382afe87364aaafb19973470d5066229a6e34f
dereeper
parents:
6
diff
changeset
|
108 close(O2); |
88748d846a20
planemo upload commit 11382afe87364aaafb19973470d5066229a6e34f
dereeper
parents:
6
diff
changeset
|
109 close(O3); |
6 | 110 #print scalar keys(%haplos); |
111 |