Mercurial > repos > dereeper > admixture
comparison Admixture.pl @ 4:58df6910f1c3 draft
planemo upload
author | dereeper |
---|---|
date | Tue, 12 Apr 2016 09:31:56 -0400 |
parents | |
children | 97c9c8daa3c3 |
comparison
equal
deleted
inserted
replaced
3:9f6977aae93a | 4:58df6910f1c3 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use Getopt::Long; | |
5 use File::Basename; | |
6 | |
7 my $usage = qq~Usage:$0 <args> [<opts>] | |
8 where <args> are: | |
9 -i, --input <input basename (for PED, FAM, BIM)> | |
10 -o, --output <output> | |
11 -k, --kmin <K min. int> | |
12 -m, --maxK <K max. int> | |
13 -d, --directory <temporary directory> | |
14 ~; | |
15 $usage .= "\n"; | |
16 | |
17 my ($input,$output,$kmin,$kmax,$directory); | |
18 | |
19 | |
20 GetOptions( | |
21 "input=s" => \$input, | |
22 "output=s" => \$output, | |
23 "kmin=s" => \$kmin, | |
24 "maxK=s" => \$kmax, | |
25 "directory=s" => \$directory, | |
26 ); | |
27 | |
28 | |
29 die $usage | |
30 if ( !$input || !$output || !$kmin || !$kmax || !$directory ); | |
31 | |
32 if ($kmin =~/^(\d+)\s*$/){ | |
33 $kmin = $1; | |
34 } | |
35 else{ | |
36 die "Error: kmin must be an integer\n"; | |
37 } | |
38 if ($kmax =~/^(\d+)\s*$/){ | |
39 $kmax = $1; | |
40 } | |
41 else{ | |
42 die "Error: kmax must be an integer\n"; | |
43 } | |
44 | |
45 | |
46 | |
47 open(my $P2,"$input.fam"); | |
48 my $n = 0; | |
49 my $ind_num = 0; | |
50 my @individus; | |
51 while(<$P2>) | |
52 { | |
53 my ($ind,$other) = split(/ /,$_); | |
54 push(@individus,$ind); | |
55 } | |
56 close($P2); | |
57 | |
58 my $basename = basename($input); | |
59 | |
60 ################################### | |
61 # launch admixture for different K | |
62 ################################### | |
63 my %errors; | |
64 for (my $k = $kmin; $k <= $kmax; $k++) | |
65 { | |
66 system("admixture --cv $input.bed $k >>$directory/log.$k 2>&1"); | |
67 my $cv_error_line = `grep -h CV $directory/log.$k`; | |
68 if ($cv_error_line =~/: (\d+\.*\d*)$/) | |
69 { | |
70 $errors{$1} = $k; | |
71 } | |
72 system("cat $directory/log.$k >>$directory/logs"); | |
73 system("echo '\n\n====================================\n\n' >>$directory/logs"); | |
74 | |
75 open(my $O2,">$basename.$k.final.Q"); | |
76 open(my $O,"$basename.$k.Q"); | |
77 my %hash_groupes; | |
78 my %hash_indv; | |
79 my %group_of_ind; | |
80 my $i = 0; | |
81 while (<$O>){ | |
82 $i++; | |
83 my $line = $_; | |
84 $line =~s/\n//g; | |
85 $line =~s/\r//g; | |
86 my @infos = split(/\s+/,$line); | |
87 my $group = "admix"; | |
88 my $ind = $individus[$i]; | |
89 for (my $j = 0; $j <$k; $j++){ | |
90 my $val = $infos[$j]; | |
91 if ($val > 0.5){$group = "Q$j";} | |
92 } | |
93 if ($ind){ | |
94 $hash_indv{$ind} = join(" ",@infos); | |
95 $hash_groupes{$group}{"ind"} .= ",".$ind; | |
96 $group_of_ind{$ind} = $group; | |
97 } | |
98 } | |
99 close($O); | |
100 | |
101 foreach my $group(sort keys(%hash_groupes)){ | |
102 my @inds = split(",",$hash_groupes{$group}{"ind"}); | |
103 foreach my $ind(@inds){ | |
104 if ($ind =~/\w+/){ | |
105 #print $O3 "$ind;$group\n"; | |
106 print $O2 $ind." ".$hash_indv{$ind}. "\n"; | |
107 } | |
108 } | |
109 } | |
110 #close($O3); | |
111 close($O2); | |
112 | |
113 system("cat $basename.$k.final.Q >>$directory/outputs.Q"); | |
114 system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); | |
115 system("cat $basename.$k.P >>$directory/outputs.P"); | |
116 system("echo '\n\n====================================\n\n' >>$directory/outputs.P"); | |
117 } | |
118 | |
119 my @sorted_errors = sort {$a<=>$b} keys(%errors); | |
120 my $best_K = $errors{@sorted_errors[0]}; | |
121 | |
122 | |
123 open(BEST1,"$basename.$best_K.final.Q"); | |
124 open(BEST2,">$directory/output"); | |
125 print BEST2 "<Covariate>\n"; | |
126 print BEST2 "<Trait>"; | |
127 for (my $j=1;$j<=$best_K;$j++) | |
128 { | |
129 print BEST2 " Q" . $j; | |
130 } | |
131 print BEST2 "\n"; | |
132 my $i = 0; | |
133 while(<BEST1>) | |
134 { | |
135 my $line = $_; | |
136 $line =~s/ /\t/g; | |
137 print BEST2 $line; | |
138 $i++; | |
139 } | |
140 close(BEST1); | |
141 close(BEST2); | |
142 | |
143 system("cp -rf $directory/log.$best_K $directory/log"); | |
144 | |
145 | |
146 | |
147 |