annotate Admixture.pl @ 0:781ac6e7a3a1 draft

Uploaded
author dereeper
date Fri, 20 Feb 2015 10:09:18 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
2
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
3 use strict;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
4 use Switch;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
5 use Getopt::Long;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
6 use Bio::SeqIO;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
7
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
8 my $usage = qq~Usage:$0 <args> [<opts>]
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
9 where <args> are:
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
10 -i, --input <input HAPMAP>
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
11 -o, --output <output>
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
12 -k, --kmin <K min. int>
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
13 -m, --maxK <K max. int>
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
14 -d, --directory <temporary directory>
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
15 -p, --path <path to executables>
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
16 ~;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
17 $usage .= "\n";
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
18
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
19 my ($input,$output,$kmin,$kmax,$directory,$path);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
20
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
21
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
22 GetOptions(
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
23 "input=s" => \$input,
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
24 "output=s" => \$output,
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
25 "kmin=s" => \$kmin,
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
26 "maxK=s" => \$kmax,
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
27 "directory=s" => \$directory,
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
28 "path=s" => \$path
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
29 );
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
30
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
31
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
32 die $usage
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
33 if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
34
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
35 if ($kmin =~/^(\d+)\s*$/){
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
36 $kmin = $1;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
37 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
38 else{
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
39 die "Error: kmin must be an integer\n";
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
40 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
41 if ($kmax =~/^(\d+)\s*$/){
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
42 $kmax = $1;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
43 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
44 else{
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
45 die "Error: kmax must be an integer\n";
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
46 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
47
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
48
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
49 ######################
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
50 # create map file
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
51 ######################
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
52 open(my $M,">$directory/input.map");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
53 open(my $H,$input);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
54 <$H>;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
55 while(<$H>)
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
56 {
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
57 my @infos = split(/\t/,$_);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
58 print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n";
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
59 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
60 close($H);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
61 close($M);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
62
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
63 ######################
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
64 # create ped file
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
65 ######################
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
66 system("$path/transpose.awk $input >$directory/input.ped.2");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
67
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
68 open(my $P,">$directory/input.ped");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
69 open(my $P2,"$directory/input.ped.2");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
70 my $n = 0;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
71 my $ind_num = 0;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
72 my @individus;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
73 while(<$P2>)
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
74 {
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
75 $n++;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
76 if ($n > 11)
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
77 {
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
78 my $line = $_;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
79 $line =~s/N/0/g;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
80 if (/^([^\s]+)\s+(.*)$/)
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
81 {
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
82 $ind_num++;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
83 my $ind = $1;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
84 push(@individus,$ind);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
85 my $genoyping_line = $2;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
86 print $P "$ind $ind_num 0 0 1 2";
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
87 my @genotypes = split(/\s/,$genoyping_line);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
88 foreach my $genotype(@genotypes)
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
89 {
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
90 $genotype =~s/N/0/g;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
91 my @alleles = split("",$genotype);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
92 print $P " " . join(" ",@alleles);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
93 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
94
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
95 print $P "\n";
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
96 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
97 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
98 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
99 close($P2);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
100 close($P);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
101
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
102 unlink("$directory/input.ped.2");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
103
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
104 system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
105
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
106
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
107 ###################################
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
108 # launch admixture for different K
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
109 ###################################
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
110 my %errors;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
111 for (my $k = $kmin; $k <= $kmax; $k++)
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
112 {
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
113 system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
114 my $cv_error_line = `grep -h CV $directory/log.$k`;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
115 if ($cv_error_line =~/: (\d+\.*\d*)$/)
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
116 {
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
117 $errors{$1} = $k;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
118 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
119 system("cat $directory/log.$k >>$directory/logs");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
120 system("echo '\n\n====================================\n\n' >>$directory/logs");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
121 system("cat out.$k.Q >>$directory/outputs.Q");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
122 system("echo '\n\n====================================\n\n' >>$directory/outputs.Q");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
123 system("cat out.$k.P >>$directory/outputs.P");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
124 system("echo '\n\n====================================\n\n' >>$directory/outputs.P");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
125 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
126
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
127 my @sorted_errors = sort {$a<=>$b} keys(%errors);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
128 my $best_K = $errors{@sorted_errors[0]};
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
129
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
130
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
131 #system("cp -rf out.$best_K.Q $directory/output");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
132
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
133 open(BEST1,"out.$best_K.Q");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
134 open(BEST2,">$directory/output");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
135 print BEST2 "<Covariate>\n";
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
136 print BEST2 "<Trait>";
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
137 for (my $j=1;$j<=$best_K;$j++)
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
138 {
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
139 print BEST2 " Q" . $j;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
140 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
141 print BEST2 "\n";
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
142 my $i = 0;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
143 while(<BEST1>)
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
144 {
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
145 my $line = $_;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
146 $line =~s/ /\t/g;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
147 my $ind = $individus[$i];
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
148 print BEST2 "$ind ";
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
149 print BEST2 $line;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
150 $i++;
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
151 }
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
152 close(BEST1);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
153 close(BEST2);
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
154
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
155 system("cp -rf $directory/log.$best_K $directory/log");
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
156
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
157
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
158
781ac6e7a3a1 Uploaded
dereeper
parents:
diff changeset
159