| 4 | 1 #!/usr/bin/perl | 
|  | 2 | 
|  | 3 use strict; | 
| 5 | 4 use Switch; | 
| 4 | 5 use Getopt::Long; | 
| 5 | 6 use Bio::SeqIO; | 
| 4 | 7 use File::Basename; | 
|  | 8 | 
|  | 9 my $usage = qq~Usage:$0 <args> [<opts>] | 
|  | 10 where <args> are: | 
|  | 11     -i, --input         <input basename (for PED, FAM, BIM)> | 
|  | 12     -o, --output        <output> | 
|  | 13     -k, --kmin          <K min. int> | 
|  | 14     -m, --maxK          <K max. int> | 
|  | 15     -d, --directory     <temporary directory> | 
| 5 | 16     -t, --threshold     <threshold admixture proportion for group assignation> | 
| 4 | 17 ~; | 
|  | 18 $usage .= "\n"; | 
|  | 19 | 
| 5 | 20 my ($input,$output,$kmin,$kmax,$directory,$threshold); | 
| 4 | 21 | 
|  | 22 | 
|  | 23 GetOptions( | 
|  | 24 	"input=s"      => \$input, | 
|  | 25 	"output=s"     => \$output, | 
|  | 26 	"kmin=s"       => \$kmin, | 
|  | 27 	"maxK=s"       => \$kmax, | 
|  | 28 	"directory=s"  => \$directory, | 
| 5 | 29 	"threshold"    => \$threshold, | 
| 4 | 30 ); | 
|  | 31 | 
|  | 32 | 
|  | 33 die $usage | 
|  | 34   if ( !$input || !$output || !$kmin || !$kmax || !$directory ); | 
|  | 35 | 
|  | 36 if ($kmin =~/^(\d+)\s*$/){ | 
|  | 37         $kmin = $1; | 
|  | 38 } | 
|  | 39 else{ | 
|  | 40         die "Error: kmin must be an integer\n"; | 
|  | 41 } | 
|  | 42 if ($kmax =~/^(\d+)\s*$/){ | 
|  | 43         $kmax = $1; | 
|  | 44 } | 
|  | 45 else{ | 
|  | 46         die "Error: kmax must be an integer\n"; | 
|  | 47 } | 
|  | 48 | 
|  | 49 | 
|  | 50 | 
|  | 51 open(my $P2,"$input.fam"); | 
|  | 52 my $n = 0; | 
|  | 53 my $ind_num = 0; | 
|  | 54 my @individus; | 
|  | 55 while(<$P2>) | 
|  | 56 { | 
|  | 57 	my ($ind,$other) = split(/ /,$_); | 
|  | 58 	push(@individus,$ind); | 
|  | 59 } | 
|  | 60 close($P2); | 
|  | 61 | 
|  | 62 my $basename = basename($input); | 
|  | 63 | 
|  | 64 ################################### | 
|  | 65 # launch admixture for different K | 
|  | 66 ################################### | 
|  | 67 my %errors; | 
|  | 68 for (my $k = $kmin; $k <= $kmax; $k++) | 
|  | 69 { | 
| 6 | 70 	system("admixture --cv $input.bed $k >>$directory/log.$k 2>&1"); | 
| 4 | 71 	my $cv_error_line = `grep -h CV $directory/log.$k`; | 
|  | 72 	if ($cv_error_line =~/: (\d+\.*\d*)$/) | 
|  | 73 	{ | 
|  | 74 		$errors{$1} = $k; | 
|  | 75 	} | 
|  | 76 	system("cat $directory/log.$k >>$directory/logs"); | 
|  | 77 	system("echo '\n\n====================================\n\n' >>$directory/logs"); | 
|  | 78 | 
|  | 79 	open(my $O2,">$basename.$k.final.Q"); | 
| 5 | 80 	open(my $O3,">$directory/groups.$k"); | 
| 4 | 81 	open(my $O,"$basename.$k.Q"); | 
|  | 82 	my %hash_groupes; | 
|  | 83 	my %hash_indv; | 
|  | 84 	my %group_of_ind; | 
|  | 85 	my $i = 0; | 
|  | 86 	while (<$O>){ | 
|  | 87 		$i++; | 
|  | 88 		my $line = $_; | 
|  | 89 		$line =~s/\n//g; | 
|  | 90 		$line =~s/\r//g; | 
|  | 91 		my @infos = split(/\s+/,$line); | 
|  | 92 		my $group = "admix"; | 
|  | 93 		my $ind = $individus[$i]; | 
|  | 94 		for (my $j = 0; $j <$k; $j++){ | 
|  | 95 			my $val = $infos[$j]; | 
| 5 | 96 			if ($val > ($threshold/100)){$group = "Q$j";} | 
| 4 | 97 		} | 
|  | 98 		if ($ind){ | 
|  | 99 			$hash_indv{$ind} = join("	",@infos); | 
|  | 100 			$hash_groupes{$group}{"ind"} .= ",".$ind; | 
|  | 101 			$group_of_ind{$ind} = $group; | 
|  | 102 		} | 
|  | 103 	} | 
|  | 104 	close($O); | 
|  | 105 | 
|  | 106 	foreach my $group(sort keys(%hash_groupes)){ | 
|  | 107 		my @inds = split(",",$hash_groupes{$group}{"ind"}); | 
|  | 108 		foreach my $ind(@inds){ | 
|  | 109 			if ($ind =~/\w+/){ | 
| 5 | 110 				print $O3 "$ind;$group\n"; | 
| 4 | 111 				print $O2 $ind."	".$hash_indv{$ind}. "\n"; | 
|  | 112 			} | 
|  | 113 		} | 
|  | 114 	} | 
| 5 | 115 	close($O3); | 
| 4 | 116 	close($O2); | 
|  | 117 | 
|  | 118 	system("cat $basename.$k.final.Q >>$directory/outputs.Q"); | 
|  | 119 	system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); | 
|  | 120 	system("cat $basename.$k.P >>$directory/outputs.P"); | 
|  | 121 	system("echo '\n\n====================================\n\n' >>$directory/outputs.P"); | 
|  | 122 } | 
|  | 123 | 
|  | 124 my @sorted_errors = sort {$a<=>$b} keys(%errors); | 
|  | 125 my $best_K = $errors{@sorted_errors[0]}; | 
|  | 126 | 
|  | 127 | 
|  | 128 open(BEST1,"$basename.$best_K.final.Q"); | 
|  | 129 open(BEST2,">$directory/output"); | 
|  | 130 print BEST2 "<Covariate>\n"; | 
|  | 131 print BEST2 "<Trait>"; | 
|  | 132 for (my $j=1;$j<=$best_K;$j++) | 
|  | 133 { | 
|  | 134 	print BEST2 "	Q" . $j; | 
|  | 135 } | 
|  | 136 print BEST2 "\n"; | 
|  | 137 my $i = 0; | 
|  | 138 while(<BEST1>) | 
|  | 139 { | 
|  | 140 	my $line = $_; | 
|  | 141 	$line =~s/ /\t/g; | 
|  | 142 	print BEST2 $line; | 
|  | 143 	$i++; | 
|  | 144 } | 
|  | 145 close(BEST1); | 
|  | 146 close(BEST2); | 
|  | 147 | 
|  | 148 system("cp -rf $directory/log.$best_K $directory/log"); | 
| 5 | 149 system("cp -rf $directory/groups.$best_K $directory/groups"); | 
| 4 | 150 | 
|  | 151 | 
|  | 152 |