| 20 | 1 #!/usr/bin/perl | 
|  | 2 | 
|  | 3 use strict; | 
|  | 4 use Switch; | 
|  | 5 use Getopt::Long; | 
|  | 6 use Bio::SeqIO; | 
|  | 7 | 
|  | 8 my $usage = qq~Usage:$0 <args> [<opts>] | 
|  | 9 where <args> are: | 
|  | 10     -i, --input         <input HAPMAP> | 
|  | 11     -o, --output        <output> | 
|  | 12     -k, --kmin          <K min. int> | 
|  | 13     -m, --maxK          <K max. int> | 
|  | 14     -d, --directory     <temporary directory> | 
|  | 15     -p, --path          <path to executables> | 
|  | 16 ~; | 
|  | 17 $usage .= "\n"; | 
|  | 18 | 
|  | 19 my ($input,$output,$kmin,$kmax,$directory,$path); | 
|  | 20 | 
|  | 21 | 
|  | 22 GetOptions( | 
|  | 23 	"input=s"      => \$input, | 
|  | 24 	"output=s"     => \$output, | 
|  | 25 	"kmin=s"       => \$kmin, | 
|  | 26 	"maxK=s"       => \$kmax, | 
|  | 27 	"directory=s"  => \$directory, | 
|  | 28 	"path=s"       => \$path | 
|  | 29 ); | 
|  | 30 | 
|  | 31 | 
|  | 32 die $usage | 
|  | 33   if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path); | 
|  | 34 | 
|  | 35 if ($kmin =~/^(\d+)\s*$/){ | 
|  | 36         $kmin = $1; | 
|  | 37 } | 
|  | 38 else{ | 
|  | 39         die "Error: kmin must be an integer\n"; | 
|  | 40 } | 
|  | 41 if ($kmax =~/^(\d+)\s*$/){ | 
|  | 42         $kmax = $1; | 
|  | 43 } | 
|  | 44 else{ | 
|  | 45         die "Error: kmax must be an integer\n"; | 
|  | 46 } | 
|  | 47 | 
|  | 48 | 
|  | 49 ###################### | 
|  | 50 # create map file | 
|  | 51 ###################### | 
|  | 52 open(my $M,">$directory/input.map"); | 
|  | 53 open(my $H,$input); | 
|  | 54 <$H>; | 
|  | 55 while(<$H>) | 
|  | 56 { | 
|  | 57 	my @infos = split(/\t/,$_); | 
|  | 58 	print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n"; | 
|  | 59 } | 
|  | 60 close($H); | 
|  | 61 close($M); | 
|  | 62 | 
|  | 63 ###################### | 
|  | 64 # create ped file | 
|  | 65 ###################### | 
|  | 66 system("$path/transpose.awk $input >$directory/input.ped.2"); | 
|  | 67 | 
|  | 68 open(my $P,">$directory/input.ped"); | 
|  | 69 open(my $P2,"$directory/input.ped.2"); | 
|  | 70 my $n = 0; | 
|  | 71 my $ind_num = 0; | 
|  | 72 my @individus; | 
|  | 73 while(<$P2>) | 
|  | 74 { | 
|  | 75 	$n++; | 
|  | 76 	if ($n > 11) | 
|  | 77 	{ | 
|  | 78 		my $line = $_; | 
|  | 79 		$line =~s/N/0/g; | 
|  | 80 		if (/^([^\s]+)\s+(.*)$/) | 
|  | 81 		{ | 
|  | 82 			$ind_num++; | 
|  | 83 			my $ind = $1; | 
|  | 84 			push(@individus,$ind); | 
|  | 85 			my $genoyping_line = $2; | 
|  | 86 			print $P "$ind	$ind_num	0	0	1	2"; | 
|  | 87 			my @genotypes = split(/\s/,$genoyping_line); | 
|  | 88 			foreach my $genotype(@genotypes) | 
|  | 89 			{ | 
|  | 90 				$genotype =~s/N/0/g; | 
|  | 91 				my @alleles = split("",$genotype); | 
|  | 92 				print $P "	" . join(" ",@alleles); | 
|  | 93 			} | 
|  | 94 | 
|  | 95 			print $P "\n"; | 
|  | 96 		} | 
|  | 97 	} | 
|  | 98 } | 
|  | 99 close($P2); | 
|  | 100 close($P); | 
|  | 101 | 
|  | 102 unlink("$directory/input.ped.2"); | 
|  | 103 | 
|  | 104 system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1"); | 
|  | 105 | 
|  | 106 | 
|  | 107 ################################### | 
|  | 108 # launch admixture for different K | 
|  | 109 ################################### | 
|  | 110 my %errors; | 
|  | 111 for (my $k = $kmin; $k <= $kmax; $k++) | 
|  | 112 { | 
|  | 113 	system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1"); | 
|  | 114 	my $cv_error_line = `grep -h CV $directory/log.$k`; | 
|  | 115 	if ($cv_error_line =~/: (\d+\.*\d*)$/) | 
|  | 116 	{ | 
|  | 117 		$errors{$1} = $k; | 
|  | 118 	} | 
|  | 119 	system("cat $directory/log.$k >>$directory/logs"); | 
|  | 120 	system("echo '\n\n====================================\n\n' >>$directory/logs"); | 
|  | 121 	system("cat out.$k.Q >>$directory/outputs.Q"); | 
|  | 122 	system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); | 
|  | 123 	system("cat out.$k.P >>$directory/outputs.P"); | 
|  | 124 	system("echo '\n\n====================================\n\n' >>$directory/outputs.P"); | 
|  | 125 } | 
|  | 126 | 
|  | 127 my @sorted_errors = sort {$a<=>$b} keys(%errors); | 
|  | 128 my $best_K = $errors{@sorted_errors[0]}; | 
|  | 129 | 
|  | 130 | 
|  | 131 #system("cp -rf out.$best_K.Q $directory/output"); | 
|  | 132 | 
|  | 133 open(BEST1,"out.$best_K.Q"); | 
|  | 134 open(BEST2,">$directory/output"); | 
|  | 135 print BEST2 "<Covariate>\n"; | 
|  | 136 print BEST2 "<Trait>"; | 
|  | 137 for (my $j=1;$j<=$best_K;$j++) | 
|  | 138 { | 
|  | 139 	print BEST2 "	Q" . $j; | 
|  | 140 } | 
|  | 141 print BEST2 "\n"; | 
|  | 142 my $i = 0; | 
|  | 143 while(<BEST1>) | 
|  | 144 { | 
|  | 145 	my $line = $_; | 
|  | 146 	$line =~s/ /\t/g; | 
|  | 147 	my $ind = $individus[$i]; | 
|  | 148 	print BEST2 "$ind	"; | 
|  | 149 	print BEST2 $line; | 
|  | 150 	$i++; | 
|  | 151 } | 
|  | 152 close(BEST1); | 
|  | 153 close(BEST2); | 
|  | 154 | 
|  | 155 system("cp -rf $directory/log.$best_K $directory/log"); | 
|  | 156 | 
|  | 157 | 
|  | 158 | 
|  | 159 |