annotate admixture/.svn/text-base/Admixture.pl.svn-base @ 20:13cff72ec2d3 draft

Uploaded
author dereeper
date Mon, 23 Mar 2015 05:30:36 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
20
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
2
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
3 use strict;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
4 use Switch;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
5 use Getopt::Long;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
6 use Bio::SeqIO;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
7
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
8 my $usage = qq~Usage:$0 <args> [<opts>]
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
9 where <args> are:
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
10 -i, --input <input HAPMAP>
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
11 -o, --output <output>
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
12 -k, --kmin <K min. int>
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
13 -m, --maxK <K max. int>
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
14 -d, --directory <temporary directory>
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
15 -p, --path <path to executables>
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
16 ~;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
17 $usage .= "\n";
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
18
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
19 my ($input,$output,$kmin,$kmax,$directory,$path);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
20
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
21
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
22 GetOptions(
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
23 "input=s" => \$input,
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
24 "output=s" => \$output,
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
25 "kmin=s" => \$kmin,
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
26 "maxK=s" => \$kmax,
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
27 "directory=s" => \$directory,
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
28 "path=s" => \$path
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
29 );
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
30
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
31
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
32 die $usage
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
33 if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
34
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
35 if ($kmin =~/^(\d+)\s*$/){
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
36 $kmin = $1;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
37 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
38 else{
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
39 die "Error: kmin must be an integer\n";
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
40 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
41 if ($kmax =~/^(\d+)\s*$/){
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
42 $kmax = $1;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
43 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
44 else{
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
45 die "Error: kmax must be an integer\n";
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
46 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
47
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
48
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
49 ######################
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
50 # create map file
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
51 ######################
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
52 open(my $M,">$directory/input.map");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
53 open(my $H,$input);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
54 <$H>;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
55 while(<$H>)
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
56 {
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
57 my @infos = split(/\t/,$_);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
58 print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n";
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
59 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
60 close($H);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
61 close($M);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
62
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
63 ######################
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
64 # create ped file
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
65 ######################
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
66 system("$path/transpose.awk $input >$directory/input.ped.2");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
67
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
68 open(my $P,">$directory/input.ped");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
69 open(my $P2,"$directory/input.ped.2");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
70 my $n = 0;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
71 my $ind_num = 0;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
72 my @individus;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
73 while(<$P2>)
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
74 {
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
75 $n++;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
76 if ($n > 11)
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
77 {
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
78 my $line = $_;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
79 $line =~s/N/0/g;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
80 if (/^([^\s]+)\s+(.*)$/)
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
81 {
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
82 $ind_num++;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
83 my $ind = $1;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
84 push(@individus,$ind);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
85 my $genoyping_line = $2;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
86 print $P "$ind $ind_num 0 0 1 2";
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
87 my @genotypes = split(/\s/,$genoyping_line);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
88 foreach my $genotype(@genotypes)
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
89 {
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
90 $genotype =~s/N/0/g;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
91 my @alleles = split("",$genotype);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
92 print $P " " . join(" ",@alleles);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
93 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
94
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
95 print $P "\n";
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
96 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
97 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
98 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
99 close($P2);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
100 close($P);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
101
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
102 unlink("$directory/input.ped.2");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
103
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
104 system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
105
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
106
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
107 ###################################
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
108 # launch admixture for different K
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
109 ###################################
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
110 my %errors;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
111 for (my $k = $kmin; $k <= $kmax; $k++)
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
112 {
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
113 system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
114 my $cv_error_line = `grep -h CV $directory/log.$k`;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
115 if ($cv_error_line =~/: (\d+\.*\d*)$/)
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
116 {
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
117 $errors{$1} = $k;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
118 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
119 system("cat $directory/log.$k >>$directory/logs");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
120 system("echo '\n\n====================================\n\n' >>$directory/logs");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
121 system("cat out.$k.Q >>$directory/outputs.Q");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
122 system("echo '\n\n====================================\n\n' >>$directory/outputs.Q");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
123 system("cat out.$k.P >>$directory/outputs.P");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
124 system("echo '\n\n====================================\n\n' >>$directory/outputs.P");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
125 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
126
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
127 my @sorted_errors = sort {$a<=>$b} keys(%errors);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
128 my $best_K = $errors{@sorted_errors[0]};
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
129
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
130
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
131 #system("cp -rf out.$best_K.Q $directory/output");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
132
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
133 open(BEST1,"out.$best_K.Q");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
134 open(BEST2,">$directory/output");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
135 print BEST2 "<Covariate>\n";
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
136 print BEST2 "<Trait>";
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
137 for (my $j=1;$j<=$best_K;$j++)
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
138 {
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
139 print BEST2 " Q" . $j;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
140 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
141 print BEST2 "\n";
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
142 my $i = 0;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
143 while(<BEST1>)
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
144 {
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
145 my $line = $_;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
146 $line =~s/ /\t/g;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
147 my $ind = $individus[$i];
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
148 print BEST2 "$ind ";
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
149 print BEST2 $line;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
150 $i++;
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
151 }
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
152 close(BEST1);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
153 close(BEST2);
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
154
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
155 system("cp -rf $directory/log.$best_K $directory/log");
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
156
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
157
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
158
13cff72ec2d3 Uploaded
dereeper
parents:
diff changeset
159