Mercurial > repos > dereeper > snmf
comparison Snmf.pl @ 0:9a1729b89405 draft
Uploaded
author | dereeper |
---|---|
date | Tue, 17 May 2016 10:51:34 -0400 |
parents | |
children | 395c7ab476a6 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9a1729b89405 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use Switch; | |
5 use Getopt::Long; | |
6 use Bio::SeqIO; | |
7 | |
8 my $usage = qq~Usage:$0 <args> [<opts>] | |
9 where <args> are: | |
10 -i, --input <input VCF> | |
11 -o, --output <output> | |
12 -k, --kmin <K min> | |
13 -m, --maxK <K max> | |
14 -d, --directory <temporary directory> | |
15 -t, --threshold <threshold admixture proportion for group assignation> | |
16 ~; | |
17 $usage .= "\n"; | |
18 | |
19 my ($input,$output,$kmin,$kmax,$directory,$threshold); | |
20 | |
21 | |
22 GetOptions( | |
23 "input=s" => \$input, | |
24 "output=s" => \$output, | |
25 "kmin=s" => \$kmin, | |
26 "maxK=s" => \$kmax, | |
27 "directory=s" => \$directory, | |
28 "threshold=s" => \$threshold | |
29 ); | |
30 | |
31 | |
32 die $usage | |
33 if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$threshold); | |
34 | |
35 | |
36 my $PLINK_EXE = "plink"; | |
37 | |
38 system("$PLINK_EXE --vcf $input --allow-extra-chr --recode-vcf --out $directory/input >>$directory/plink.log 2>&1"); | |
39 | |
40 system("vcf2geno $directory/input.vcf $directory/polymorphisms.geno >>$directory/vcf2geno.log 2>&1"); | |
41 | |
42 | |
43 my $ind_cmd = `grep '#CHROM' $input`; | |
44 chomp($ind_cmd); | |
45 my @individuals = split(/\t/,$ind_cmd);shift @individuals;shift @individuals;shift @individuals;shift @individuals;shift @individuals;shift @individuals;shift @individuals;shift @individuals; | |
46 | |
47 ################################### | |
48 # launch admixture for different K | |
49 ################################### | |
50 my %errors; | |
51 for (my $k = $kmin; $k <= $kmax; $k++) | |
52 { | |
53 system("sNMF -x $directory/polymorphisms.geno -K $k -c >>$directory/log.$k 2>&1"); | |
54 | |
55 open(my $O3,">$directory/out.$k.group"); | |
56 open(my $O2,">$directory/out.$k.final.Q"); | |
57 | |
58 my $ent; | |
59 open(my $LOG,"$directory/log.$k"); | |
60 while(<$LOG>){ | |
61 if (/Cross-Entropy \(masked data\).*(\d+\.\d+)$/){ | |
62 $ent = $1; | |
63 $errors{$ent} = $k; | |
64 } | |
65 } | |
66 close($LOG); | |
67 | |
68 open(E,">>$directory/entropy"); | |
69 print E "K=$k $ent\n"; | |
70 close(E); | |
71 | |
72 print $O2 "Indiv"; | |
73 print $O3 "Indiv;Group\n"; | |
74 for (my $j = 0; $j <$k; $j++){ | |
75 print $O2 " Q$j"; | |
76 } | |
77 print $O2 "\n"; | |
78 | |
79 open(my $O,"$directory/polymorphisms.$k.Q"); | |
80 my %hash_groupes; | |
81 my %hash_indv; | |
82 my %group_of_ind; | |
83 my $i = 0; | |
84 while (<$O>){ | |
85 $i++; | |
86 my $line = $_; | |
87 $line =~s/\n//g; | |
88 $line =~s/\r//g; | |
89 my @infos = split(/\s+/,$line); | |
90 my $group = "admix"; | |
91 my $ind = $individuals[$i]; | |
92 for (my $j = 0; $j <$k; $j++){ | |
93 my $val = $infos[$j]; | |
94 if ($val > 0.5){$group = "Q$j";} | |
95 } | |
96 if ($ind){ | |
97 $hash_indv{$ind} = join(" ",@infos); | |
98 $hash_groupes{$group}{"ind"} .= ",".$ind; | |
99 $group_of_ind{$ind} = $group; | |
100 } | |
101 } | |
102 close($O); | |
103 | |
104 foreach my $group(sort keys(%hash_groupes)){ | |
105 my @inds = split(",",$hash_groupes{$group}{"ind"}); | |
106 foreach my $ind(@inds){ | |
107 if ($ind =~/\w+/){ | |
108 print $O3 "$ind;$group\n"; | |
109 print $O2 $ind." ".$hash_indv{$ind}. "\n"; | |
110 } | |
111 } | |
112 } | |
113 | |
114 system("cat $directory/log.$k >>$directory/logs"); | |
115 system("echo '\n\n====================================\n\n' >>$directory/logs"); | |
116 system("cat $directory/out.$k.final.Q >>$directory/outputs.Q"); | |
117 system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); | |
118 } | |
119 | |
120 my @sorted_errors = sort {$a<=>$b} keys(%errors); | |
121 my $best_K = $errors{@sorted_errors[0]}; | |
122 | |
123 | |
124 system("cp -rf $directory/out.$best_K.final.Q $directory/output"); | |
125 | |
126 system("cp -rf $directory/log.$best_K $directory/log"); | |
127 system("cp -rf $directory/out.$best_K.group $directory/groups"); | |
128 | |
129 |