Mercurial > repos > dereeper > sniplay3
comparison admixture/.svn/text-base/Admixture.pl.svn-base @ 20:13cff72ec2d3 draft
Uploaded
author | dereeper |
---|---|
date | Mon, 23 Mar 2015 05:30:36 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
19:a0a95688cf17 | 20:13cff72ec2d3 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use Switch; | |
5 use Getopt::Long; | |
6 use Bio::SeqIO; | |
7 | |
8 my $usage = qq~Usage:$0 <args> [<opts>] | |
9 where <args> are: | |
10 -i, --input <input HAPMAP> | |
11 -o, --output <output> | |
12 -k, --kmin <K min. int> | |
13 -m, --maxK <K max. int> | |
14 -d, --directory <temporary directory> | |
15 -p, --path <path to executables> | |
16 ~; | |
17 $usage .= "\n"; | |
18 | |
19 my ($input,$output,$kmin,$kmax,$directory,$path); | |
20 | |
21 | |
22 GetOptions( | |
23 "input=s" => \$input, | |
24 "output=s" => \$output, | |
25 "kmin=s" => \$kmin, | |
26 "maxK=s" => \$kmax, | |
27 "directory=s" => \$directory, | |
28 "path=s" => \$path | |
29 ); | |
30 | |
31 | |
32 die $usage | |
33 if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path); | |
34 | |
35 if ($kmin =~/^(\d+)\s*$/){ | |
36 $kmin = $1; | |
37 } | |
38 else{ | |
39 die "Error: kmin must be an integer\n"; | |
40 } | |
41 if ($kmax =~/^(\d+)\s*$/){ | |
42 $kmax = $1; | |
43 } | |
44 else{ | |
45 die "Error: kmax must be an integer\n"; | |
46 } | |
47 | |
48 | |
49 ###################### | |
50 # create map file | |
51 ###################### | |
52 open(my $M,">$directory/input.map"); | |
53 open(my $H,$input); | |
54 <$H>; | |
55 while(<$H>) | |
56 { | |
57 my @infos = split(/\t/,$_); | |
58 print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n"; | |
59 } | |
60 close($H); | |
61 close($M); | |
62 | |
63 ###################### | |
64 # create ped file | |
65 ###################### | |
66 system("$path/transpose.awk $input >$directory/input.ped.2"); | |
67 | |
68 open(my $P,">$directory/input.ped"); | |
69 open(my $P2,"$directory/input.ped.2"); | |
70 my $n = 0; | |
71 my $ind_num = 0; | |
72 my @individus; | |
73 while(<$P2>) | |
74 { | |
75 $n++; | |
76 if ($n > 11) | |
77 { | |
78 my $line = $_; | |
79 $line =~s/N/0/g; | |
80 if (/^([^\s]+)\s+(.*)$/) | |
81 { | |
82 $ind_num++; | |
83 my $ind = $1; | |
84 push(@individus,$ind); | |
85 my $genoyping_line = $2; | |
86 print $P "$ind $ind_num 0 0 1 2"; | |
87 my @genotypes = split(/\s/,$genoyping_line); | |
88 foreach my $genotype(@genotypes) | |
89 { | |
90 $genotype =~s/N/0/g; | |
91 my @alleles = split("",$genotype); | |
92 print $P " " . join(" ",@alleles); | |
93 } | |
94 | |
95 print $P "\n"; | |
96 } | |
97 } | |
98 } | |
99 close($P2); | |
100 close($P); | |
101 | |
102 unlink("$directory/input.ped.2"); | |
103 | |
104 system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1"); | |
105 | |
106 | |
107 ################################### | |
108 # launch admixture for different K | |
109 ################################### | |
110 my %errors; | |
111 for (my $k = $kmin; $k <= $kmax; $k++) | |
112 { | |
113 system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1"); | |
114 my $cv_error_line = `grep -h CV $directory/log.$k`; | |
115 if ($cv_error_line =~/: (\d+\.*\d*)$/) | |
116 { | |
117 $errors{$1} = $k; | |
118 } | |
119 system("cat $directory/log.$k >>$directory/logs"); | |
120 system("echo '\n\n====================================\n\n' >>$directory/logs"); | |
121 system("cat out.$k.Q >>$directory/outputs.Q"); | |
122 system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); | |
123 system("cat out.$k.P >>$directory/outputs.P"); | |
124 system("echo '\n\n====================================\n\n' >>$directory/outputs.P"); | |
125 } | |
126 | |
127 my @sorted_errors = sort {$a<=>$b} keys(%errors); | |
128 my $best_K = $errors{@sorted_errors[0]}; | |
129 | |
130 | |
131 #system("cp -rf out.$best_K.Q $directory/output"); | |
132 | |
133 open(BEST1,"out.$best_K.Q"); | |
134 open(BEST2,">$directory/output"); | |
135 print BEST2 "<Covariate>\n"; | |
136 print BEST2 "<Trait>"; | |
137 for (my $j=1;$j<=$best_K;$j++) | |
138 { | |
139 print BEST2 " Q" . $j; | |
140 } | |
141 print BEST2 "\n"; | |
142 my $i = 0; | |
143 while(<BEST1>) | |
144 { | |
145 my $line = $_; | |
146 $line =~s/ /\t/g; | |
147 my $ind = $individus[$i]; | |
148 print BEST2 "$ind "; | |
149 print BEST2 $line; | |
150 $i++; | |
151 } | |
152 close(BEST1); | |
153 close(BEST2); | |
154 | |
155 system("cp -rf $directory/log.$best_K $directory/log"); | |
156 | |
157 | |
158 | |
159 |