Mercurial > repos > dereeper > admixture
view Admixture.pl @ 0:781ac6e7a3a1 draft
Uploaded
author | dereeper |
---|---|
date | Fri, 20 Feb 2015 10:09:18 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use Switch; use Getopt::Long; use Bio::SeqIO; my $usage = qq~Usage:$0 <args> [<opts>] where <args> are: -i, --input <input HAPMAP> -o, --output <output> -k, --kmin <K min. int> -m, --maxK <K max. int> -d, --directory <temporary directory> -p, --path <path to executables> ~; $usage .= "\n"; my ($input,$output,$kmin,$kmax,$directory,$path); GetOptions( "input=s" => \$input, "output=s" => \$output, "kmin=s" => \$kmin, "maxK=s" => \$kmax, "directory=s" => \$directory, "path=s" => \$path ); die $usage if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path); if ($kmin =~/^(\d+)\s*$/){ $kmin = $1; } else{ die "Error: kmin must be an integer\n"; } if ($kmax =~/^(\d+)\s*$/){ $kmax = $1; } else{ die "Error: kmax must be an integer\n"; } ###################### # create map file ###################### open(my $M,">$directory/input.map"); open(my $H,$input); <$H>; while(<$H>) { my @infos = split(/\t/,$_); print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n"; } close($H); close($M); ###################### # create ped file ###################### system("$path/transpose.awk $input >$directory/input.ped.2"); open(my $P,">$directory/input.ped"); open(my $P2,"$directory/input.ped.2"); my $n = 0; my $ind_num = 0; my @individus; while(<$P2>) { $n++; if ($n > 11) { my $line = $_; $line =~s/N/0/g; if (/^([^\s]+)\s+(.*)$/) { $ind_num++; my $ind = $1; push(@individus,$ind); my $genoyping_line = $2; print $P "$ind $ind_num 0 0 1 2"; my @genotypes = split(/\s/,$genoyping_line); foreach my $genotype(@genotypes) { $genotype =~s/N/0/g; my @alleles = split("",$genotype); print $P " " . join(" ",@alleles); } print $P "\n"; } } } close($P2); close($P); unlink("$directory/input.ped.2"); system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1"); ################################### # launch admixture for different K ################################### my %errors; for (my $k = $kmin; $k <= $kmax; $k++) { system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1"); my $cv_error_line = `grep -h CV $directory/log.$k`; if ($cv_error_line =~/: (\d+\.*\d*)$/) { $errors{$1} = $k; } system("cat $directory/log.$k >>$directory/logs"); system("echo '\n\n====================================\n\n' >>$directory/logs"); system("cat out.$k.Q >>$directory/outputs.Q"); system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); system("cat out.$k.P >>$directory/outputs.P"); system("echo '\n\n====================================\n\n' >>$directory/outputs.P"); } my @sorted_errors = sort {$a<=>$b} keys(%errors); my $best_K = $errors{@sorted_errors[0]}; #system("cp -rf out.$best_K.Q $directory/output"); open(BEST1,"out.$best_K.Q"); open(BEST2,">$directory/output"); print BEST2 "<Covariate>\n"; print BEST2 "<Trait>"; for (my $j=1;$j<=$best_K;$j++) { print BEST2 " Q" . $j; } print BEST2 "\n"; my $i = 0; while(<BEST1>) { my $line = $_; $line =~s/ /\t/g; my $ind = $individus[$i]; print BEST2 "$ind "; print BEST2 $line; $i++; } close(BEST1); close(BEST2); system("cp -rf $directory/log.$best_K $directory/log");