changeset 5:97c9c8daa3c3 draft

planemo upload
author dereeper
date Wed, 13 Apr 2016 07:51:13 -0400
parents 58df6910f1c3
children 3f66f32dc5d9
files Admixture.pl admixture.sh admixture.xml test-data/groups
diffstat 4 files changed, 186 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/Admixture.pl	Tue Apr 12 09:31:56 2016 -0400
+++ b/Admixture.pl	Wed Apr 13 07:51:13 2016 -0400
@@ -1,7 +1,9 @@
 #!/usr/bin/perl
 
 use strict;
+use Switch;
 use Getopt::Long;
+use Bio::SeqIO;
 use File::Basename;
 
 my $usage = qq~Usage:$0 <args> [<opts>]
@@ -11,10 +13,11 @@
     -k, --kmin          <K min. int>
     -m, --maxK          <K max. int>
     -d, --directory     <temporary directory>
+    -t, --threshold     <threshold admixture proportion for group assignation>
 ~;
 $usage .= "\n";
 
-my ($input,$output,$kmin,$kmax,$directory);
+my ($input,$output,$kmin,$kmax,$directory,$threshold);
 
 
 GetOptions(
@@ -23,6 +26,7 @@
 	"kmin=s"       => \$kmin,
 	"maxK=s"       => \$kmax,
 	"directory=s"  => \$directory,
+	"threshold"    => \$threshold,
 );
 
 
@@ -63,7 +67,7 @@
 my %errors;
 for (my $k = $kmin; $k <= $kmax; $k++)
 {
-	system("admixture --cv $input.bed $k >>$directory/log.$k 2>&1");
+	system("/apps/www/sniplay.cirad.fr/tools/admixture/admixture_linux-1.23/admixture --cv $input.bed $k >>$directory/log.$k 2>&1");
 	my $cv_error_line = `grep -h CV $directory/log.$k`;
 	if ($cv_error_line =~/: (\d+\.*\d*)$/)
 	{
@@ -73,6 +77,7 @@
 	system("echo '\n\n====================================\n\n' >>$directory/logs");
 
 	open(my $O2,">$basename.$k.final.Q");
+	open(my $O3,">$directory/groups.$k");
 	open(my $O,"$basename.$k.Q");
 	my %hash_groupes;
 	my %hash_indv;
@@ -88,7 +93,7 @@
 		my $ind = $individus[$i];
 		for (my $j = 0; $j <$k; $j++){
 			my $val = $infos[$j];
-			if ($val > 0.5){$group = "Q$j";}
+			if ($val > ($threshold/100)){$group = "Q$j";}
 		}
 		if ($ind){      
 			$hash_indv{$ind} = join("	",@infos);
@@ -102,12 +107,12 @@
 		my @inds = split(",",$hash_groupes{$group}{"ind"});
 		foreach my $ind(@inds){
 			if ($ind =~/\w+/){
-				#print $O3 "$ind;$group\n";
+				print $O3 "$ind;$group\n";
 				print $O2 $ind."	".$hash_indv{$ind}. "\n";
 			}
 		}
 	}
-	#close($O3);
+	close($O3);
 	close($O2);
 
 	system("cat $basename.$k.final.Q >>$directory/outputs.Q");
@@ -141,7 +146,7 @@
 close(BEST2);
 
 system("cp -rf $directory/log.$best_K $directory/log");
+system("cp -rf $directory/groups.$best_K $directory/groups");
 
 
 
-
--- a/admixture.sh	Tue Apr 12 09:31:56 2016 -0400
+++ b/admixture.sh	Wed Apr 13 07:51:13 2016 -0400
@@ -8,6 +8,8 @@
 best_k_logfile=$7
 kmin=$8
 kmax=$9
+groups=${10}
+threshold_group=${11}
 
 directory=`dirname $0`
 mkdir tmpdir$$
@@ -16,11 +18,12 @@
 cp -rf $bim tmpdir$$/input.bim
 
  
-perl $directory/Admixture.pl -i tmpdir$$/input -o $outputs -k $kmin -m $kmax -d tmpdir$$
+perl $directory/Admixture.pl -i tmpdir$$/input -o $outputs -k $kmin -m $kmax -d tmpdir$$ -t $threshold_group
 
 mv tmpdir$$/output $best_k_output
 mv tmpdir$$/log $best_k_logfile
 mv tmpdir$$/outputs.Q $outputs
 mv tmpdir$$/logs $logs
+mv tmpdir$$/groups $groups
 
 
--- a/admixture.xml	Tue Apr 12 09:31:56 2016 -0400
+++ b/admixture.xml	Wed Apr 13 07:51:13 2016 -0400
@@ -3,7 +3,7 @@
 	<requirements>
 		<requirement type="package" version="1.23">admixture</requirement>
 	</requirements>
-	<command interpreter="bash">./admixture.sh $bed $fam $bim $outputs $logs $best_k_output $best_k_logfile $kmin $kmax
+	<command interpreter="bash">./admixture.sh $bed $fam $bim $outputs $logs $best_k_output $best_k_logfile $kmin $kmax $best_k_groups $threshold_group
     </command>
 	<inputs>
 		<param format="txt" name="bed" type="data" label="Allelic file in BED format" help="Allelic file in BED format"/>
@@ -11,9 +11,11 @@
 		<param format="txt" name="bim" type="data" label="Bim file" help="Bim file"/>
 		<param type="text" name="kmin" label="K min" value="2"/>
 		<param type="text" name="kmax" label="K max" value="5"/>
+		<param type="text" name="threshold_group" label="Minimum admixture proportion percentage for group assignation" value="50"/>
 	</inputs>
 	<outputs>
 		<data format="txt" name="best_k_output" label="Best K Output"/>
+		<data format="txt" name="best_k_groups" label="Best K Groups"/>
 		<data format="txt" name="best_k_logfile" label="Best K Logfile"/>
 		<data format="txt" name="outputs" label="All Outputs"/>
 		<data format="txt" name="logs" label="All Logs"/>
@@ -25,9 +27,10 @@
 			<param name="bim" value="input.bim" />
 			<param name="fam" value="input.fam" />
 			<param name="kmax" value="3" />
-
+			<param name="threshold_group" value="60" />
 			<output name="best_k_output" file="output" />
 			<output name="outputs" file="outputs.Q" />
+			<output name="best_k_groups" file="groups" />
 		</test>
 
 	</tests>
@@ -53,7 +56,6 @@
 
 -----
 
-
 ===========
  Overview:
 ===========
@@ -67,11 +69,7 @@
 
 .. _Admixture: http://www.genetics.ucla.edu/software/admixture/index.html
 	</help>
-
 <citations>
 <citation type="doi" >10.1101/gr.094052.109</citation>
 </citations>
-
-
-
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/groups	Wed Apr 13 07:51:13 2016 -0400
@@ -0,0 +1,166 @@
+M202;Q0
+CICIHBETON;Q0
+DANGREY;Q0
+DAVAO;Q0
+DINORADO;Q0
+GOGO;Q0
+HD1-4;Q0
+JIMBRUKJOLOWORO;Q0
+KEDAYAN;Q0
+KINANDANGPATONG;Q0
+KUROKA;Q0
+MAINTIMOLOTSY1226;Q0
+NEPHOAVANG;Q0
+NPE826;Q0
+ORYZICASABANA6;Q0
+POENOETHITAM;Q0
+SPEAKER;Q0
+YUNLU7;Q0
+VIETNAM3;Q0
+IR47684-05-1-B;Q0
+CHUAN4;Q0
+EARLYMUTANTIAC165;Q0
+62667;Q1
+BAGANANASALAO;Q1
+CT13582-15-5-M;Q1
+IR65907-188-1-B;Q1
+IR66421-096-2-1-1;Q1
+IRAT234;Q1
+NHTA10;Q1
+IR66421-105-1-1;Q1
+PRIMAVERA;Q1
+BULUPANDAK;Q2
+GIZA171;Q2
+IAC165;Q2
+KHAODAM;Q2
+MOROBEREKAN;Q2
+NIPPONBARE_D;Q2
+63-104;Q2
+ARAGUAIA;Q2
+ARIAS;Q2
+ARROZCEBADA;Q2
+BABER;Q2
+BAKUNGH;Q2
+BENGALYVAKARINA;Q2
+BICOBRANCO;Q2
+BINULAWAN;Q2
+CAAWA/FORTUNA6;Q2
+CAIAPO;Q2
+CANAROXA;Q2
+CANELADEFERRO;Q2
+CHALOYOE;Q2
+CHAPHUMA;Q2
+CIRAD358;Q2
+CIRAD392;Q2
+CIRAD394;Q2
+CIRAD403;Q2
+CIRAD409;Q2
+CIRAD488;Q2
+CNA-7_BO_1_1_33-13-6-1;Q2
+COLOMBIA1;Q2
+CUBA65;Q2
+CUIABANA;Q2
+CURINCA;Q2
+DAM;Q2
+DAWASANRED;Q2
+DOURADOAGULHA;Q2
+DOURADOPRECOCE;Q2
+ESPERANZA;Q2
+FOHISOMOTRA;Q2
+GANIGI;Q2
+GEMJYAJYANAM;Q2
+GOGOLEMPUK;Q2
+GOGOLEMPAK;Q2
+GOMPA2;Q2
+GRAZI;Q2
+GUARANI;Q2
+GUNDILKUNING;Q2
+HAWMOM;Q2
+IAC25;Q2
+IAC47;Q2
+IDSA77;Q2
+IGUAPECATETO;Q2
+INDANE;Q2
+IR60080-46A;Q2
+IR63380-16;Q2
+IR63372-08;Q2
+IR68704-145-1-1-B;Q2
+IR71525-19-1-1;Q2
+IRAT104;Q2
+IRAT109;Q2
+IRAT112;Q2
+IRAT13;Q2
+IRAT144;Q2
+IRAT170;Q2
+IRAT177;Q2
+IRAT2;Q2
+IRAT212;Q2
+IRAT216;Q2
+IRAT257;Q2
+IRAT335;Q2
+IRAT362;Q2
+IRAT364;Q2
+IRAT366;Q2
+IRAT380;Q2
+JAOHAW;Q2
+JUMALI;Q2
+JUMULA2;Q2
+KAKANI2;Q2
+KANIRANGA;Q2
+KARASUKARASURANKASU;Q2
+KENDINGA5H;Q2
+KETANKONIR;Q2
+KETANLUMBU;Q2
+KETANMENAH;Q2
+KHAOKAPXANG;Q2
+KOMOJAMANITRA;Q2
+KU115;Q2
+LAMBAYQUE1;Q2
+LUDAN;Q2
+MAHAE;Q2
+MALAGKITPIRURUTONG;Q2
+MANANELATRA520;Q2
+MANDRIRAVINA3512;Q2
+MARAVILHA;Q2
+MITSANGANAHIJERY;Q2
+MOLOK;Q2
+NABESHI;Q2
+NHTA5;Q2
+NPE253;Q2
+OS4;Q2
+OS6;Q2
+P5589-1-1-3-P;Q2
+PACHOLINHA;Q2
+PADIBOENAR;Q2
+PADIKASALLE;Q2
+PALAWAN;Q2
+PATEBLANCMAN1;Q2
+PCT11_0_0_2_BO_1_55-1-3-1;Q2
+PCT4_SA_4_1_1076-2-4-1-5;Q2
+PEHPINUO;Q2
+PULULAPA;Q2
+RATHAL;Q2
+REKETMAUN;Q2
+RT1031-69;Q2
+SENG;Q2
+TANDUI;Q2
+TREMBESE;Q2
+TRESMESES;Q2
+TSIPALA89;Q2
+VARYLAVA90;Q2
+VARYLAVADEBETAFO;Q2
+VARYMADINIKA3566;Q2
+VARYMALADY;Q2
+VARYSOMOTRASIHANAKA;Q2
+WAB56-125;Q2
+WAB56-50;Q2
+WAB706-3-4-K4-KB-1;Q2
+YANCAOUSSA;Q2
+YANGKUMRED;Q2
+IR47686-09-01-B-1;Q2
+IR53236-275-1;Q2
+IR65261-19-1-B;Q2
+IR65907-206-4-B;Q2
+IR71524-44-1-1;Q2
+VIETNAM1;Q2
+CHUAN3;Q2