Mercurial > repos > dereeper > admixture
changeset 5:97c9c8daa3c3 draft
planemo upload
author | dereeper |
---|---|
date | Wed, 13 Apr 2016 07:51:13 -0400 |
parents | 58df6910f1c3 |
children | 3f66f32dc5d9 |
files | Admixture.pl admixture.sh admixture.xml test-data/groups |
diffstat | 4 files changed, 186 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/Admixture.pl Tue Apr 12 09:31:56 2016 -0400 +++ b/Admixture.pl Wed Apr 13 07:51:13 2016 -0400 @@ -1,7 +1,9 @@ #!/usr/bin/perl use strict; +use Switch; use Getopt::Long; +use Bio::SeqIO; use File::Basename; my $usage = qq~Usage:$0 <args> [<opts>] @@ -11,10 +13,11 @@ -k, --kmin <K min. int> -m, --maxK <K max. int> -d, --directory <temporary directory> + -t, --threshold <threshold admixture proportion for group assignation> ~; $usage .= "\n"; -my ($input,$output,$kmin,$kmax,$directory); +my ($input,$output,$kmin,$kmax,$directory,$threshold); GetOptions( @@ -23,6 +26,7 @@ "kmin=s" => \$kmin, "maxK=s" => \$kmax, "directory=s" => \$directory, + "threshold" => \$threshold, ); @@ -63,7 +67,7 @@ my %errors; for (my $k = $kmin; $k <= $kmax; $k++) { - system("admixture --cv $input.bed $k >>$directory/log.$k 2>&1"); + system("/apps/www/sniplay.cirad.fr/tools/admixture/admixture_linux-1.23/admixture --cv $input.bed $k >>$directory/log.$k 2>&1"); my $cv_error_line = `grep -h CV $directory/log.$k`; if ($cv_error_line =~/: (\d+\.*\d*)$/) { @@ -73,6 +77,7 @@ system("echo '\n\n====================================\n\n' >>$directory/logs"); open(my $O2,">$basename.$k.final.Q"); + open(my $O3,">$directory/groups.$k"); open(my $O,"$basename.$k.Q"); my %hash_groupes; my %hash_indv; @@ -88,7 +93,7 @@ my $ind = $individus[$i]; for (my $j = 0; $j <$k; $j++){ my $val = $infos[$j]; - if ($val > 0.5){$group = "Q$j";} + if ($val > ($threshold/100)){$group = "Q$j";} } if ($ind){ $hash_indv{$ind} = join(" ",@infos); @@ -102,12 +107,12 @@ my @inds = split(",",$hash_groupes{$group}{"ind"}); foreach my $ind(@inds){ if ($ind =~/\w+/){ - #print $O3 "$ind;$group\n"; + print $O3 "$ind;$group\n"; print $O2 $ind." ".$hash_indv{$ind}. "\n"; } } } - #close($O3); + close($O3); close($O2); system("cat $basename.$k.final.Q >>$directory/outputs.Q"); @@ -141,7 +146,7 @@ close(BEST2); system("cp -rf $directory/log.$best_K $directory/log"); +system("cp -rf $directory/groups.$best_K $directory/groups"); -
--- a/admixture.sh Tue Apr 12 09:31:56 2016 -0400 +++ b/admixture.sh Wed Apr 13 07:51:13 2016 -0400 @@ -8,6 +8,8 @@ best_k_logfile=$7 kmin=$8 kmax=$9 +groups=${10} +threshold_group=${11} directory=`dirname $0` mkdir tmpdir$$ @@ -16,11 +18,12 @@ cp -rf $bim tmpdir$$/input.bim -perl $directory/Admixture.pl -i tmpdir$$/input -o $outputs -k $kmin -m $kmax -d tmpdir$$ +perl $directory/Admixture.pl -i tmpdir$$/input -o $outputs -k $kmin -m $kmax -d tmpdir$$ -t $threshold_group mv tmpdir$$/output $best_k_output mv tmpdir$$/log $best_k_logfile mv tmpdir$$/outputs.Q $outputs mv tmpdir$$/logs $logs +mv tmpdir$$/groups $groups
--- a/admixture.xml Tue Apr 12 09:31:56 2016 -0400 +++ b/admixture.xml Wed Apr 13 07:51:13 2016 -0400 @@ -3,7 +3,7 @@ <requirements> <requirement type="package" version="1.23">admixture</requirement> </requirements> - <command interpreter="bash">./admixture.sh $bed $fam $bim $outputs $logs $best_k_output $best_k_logfile $kmin $kmax + <command interpreter="bash">./admixture.sh $bed $fam $bim $outputs $logs $best_k_output $best_k_logfile $kmin $kmax $best_k_groups $threshold_group </command> <inputs> <param format="txt" name="bed" type="data" label="Allelic file in BED format" help="Allelic file in BED format"/> @@ -11,9 +11,11 @@ <param format="txt" name="bim" type="data" label="Bim file" help="Bim file"/> <param type="text" name="kmin" label="K min" value="2"/> <param type="text" name="kmax" label="K max" value="5"/> + <param type="text" name="threshold_group" label="Minimum admixture proportion percentage for group assignation" value="50"/> </inputs> <outputs> <data format="txt" name="best_k_output" label="Best K Output"/> + <data format="txt" name="best_k_groups" label="Best K Groups"/> <data format="txt" name="best_k_logfile" label="Best K Logfile"/> <data format="txt" name="outputs" label="All Outputs"/> <data format="txt" name="logs" label="All Logs"/> @@ -25,9 +27,10 @@ <param name="bim" value="input.bim" /> <param name="fam" value="input.fam" /> <param name="kmax" value="3" /> - + <param name="threshold_group" value="60" /> <output name="best_k_output" file="output" /> <output name="outputs" file="outputs.Q" /> + <output name="best_k_groups" file="groups" /> </test> </tests> @@ -53,7 +56,6 @@ ----- - =========== Overview: =========== @@ -67,11 +69,7 @@ .. _Admixture: http://www.genetics.ucla.edu/software/admixture/index.html </help> - <citations> <citation type="doi" >10.1101/gr.094052.109</citation> </citations> - - - </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/groups Wed Apr 13 07:51:13 2016 -0400 @@ -0,0 +1,166 @@ +M202;Q0 +CICIHBETON;Q0 +DANGREY;Q0 +DAVAO;Q0 +DINORADO;Q0 +GOGO;Q0 +HD1-4;Q0 +JIMBRUKJOLOWORO;Q0 +KEDAYAN;Q0 +KINANDANGPATONG;Q0 +KUROKA;Q0 +MAINTIMOLOTSY1226;Q0 +NEPHOAVANG;Q0 +NPE826;Q0 +ORYZICASABANA6;Q0 +POENOETHITAM;Q0 +SPEAKER;Q0 +YUNLU7;Q0 +VIETNAM3;Q0 +IR47684-05-1-B;Q0 +CHUAN4;Q0 +EARLYMUTANTIAC165;Q0 +62667;Q1 +BAGANANASALAO;Q1 +CT13582-15-5-M;Q1 +IR65907-188-1-B;Q1 +IR66421-096-2-1-1;Q1 +IRAT234;Q1 +NHTA10;Q1 +IR66421-105-1-1;Q1 +PRIMAVERA;Q1 +BULUPANDAK;Q2 +GIZA171;Q2 +IAC165;Q2 +KHAODAM;Q2 +MOROBEREKAN;Q2 +NIPPONBARE_D;Q2 +63-104;Q2 +ARAGUAIA;Q2 +ARIAS;Q2 +ARROZCEBADA;Q2 +BABER;Q2 +BAKUNGH;Q2 +BENGALYVAKARINA;Q2 +BICOBRANCO;Q2 +BINULAWAN;Q2 +CAAWA/FORTUNA6;Q2 +CAIAPO;Q2 +CANAROXA;Q2 +CANELADEFERRO;Q2 +CHALOYOE;Q2 +CHAPHUMA;Q2 +CIRAD358;Q2 +CIRAD392;Q2 +CIRAD394;Q2 +CIRAD403;Q2 +CIRAD409;Q2 +CIRAD488;Q2 +CNA-7_BO_1_1_33-13-6-1;Q2 +COLOMBIA1;Q2 +CUBA65;Q2 +CUIABANA;Q2 +CURINCA;Q2 +DAM;Q2 +DAWASANRED;Q2 +DOURADOAGULHA;Q2 +DOURADOPRECOCE;Q2 +ESPERANZA;Q2 +FOHISOMOTRA;Q2 +GANIGI;Q2 +GEMJYAJYANAM;Q2 +GOGOLEMPUK;Q2 +GOGOLEMPAK;Q2 +GOMPA2;Q2 +GRAZI;Q2 +GUARANI;Q2 +GUNDILKUNING;Q2 +HAWMOM;Q2 +IAC25;Q2 +IAC47;Q2 +IDSA77;Q2 +IGUAPECATETO;Q2 +INDANE;Q2 +IR60080-46A;Q2 +IR63380-16;Q2 +IR63372-08;Q2 +IR68704-145-1-1-B;Q2 +IR71525-19-1-1;Q2 +IRAT104;Q2 +IRAT109;Q2 +IRAT112;Q2 +IRAT13;Q2 +IRAT144;Q2 +IRAT170;Q2 +IRAT177;Q2 +IRAT2;Q2 +IRAT212;Q2 +IRAT216;Q2 +IRAT257;Q2 +IRAT335;Q2 +IRAT362;Q2 +IRAT364;Q2 +IRAT366;Q2 +IRAT380;Q2 +JAOHAW;Q2 +JUMALI;Q2 +JUMULA2;Q2 +KAKANI2;Q2 +KANIRANGA;Q2 +KARASUKARASURANKASU;Q2 +KENDINGA5H;Q2 +KETANKONIR;Q2 +KETANLUMBU;Q2 +KETANMENAH;Q2 +KHAOKAPXANG;Q2 +KOMOJAMANITRA;Q2 +KU115;Q2 +LAMBAYQUE1;Q2 +LUDAN;Q2 +MAHAE;Q2 +MALAGKITPIRURUTONG;Q2 +MANANELATRA520;Q2 +MANDRIRAVINA3512;Q2 +MARAVILHA;Q2 +MITSANGANAHIJERY;Q2 +MOLOK;Q2 +NABESHI;Q2 +NHTA5;Q2 +NPE253;Q2 +OS4;Q2 +OS6;Q2 +P5589-1-1-3-P;Q2 +PACHOLINHA;Q2 +PADIBOENAR;Q2 +PADIKASALLE;Q2 +PALAWAN;Q2 +PATEBLANCMAN1;Q2 +PCT11_0_0_2_BO_1_55-1-3-1;Q2 +PCT4_SA_4_1_1076-2-4-1-5;Q2 +PEHPINUO;Q2 +PULULAPA;Q2 +RATHAL;Q2 +REKETMAUN;Q2 +RT1031-69;Q2 +SENG;Q2 +TANDUI;Q2 +TREMBESE;Q2 +TRESMESES;Q2 +TSIPALA89;Q2 +VARYLAVA90;Q2 +VARYLAVADEBETAFO;Q2 +VARYMADINIKA3566;Q2 +VARYMALADY;Q2 +VARYSOMOTRASIHANAKA;Q2 +WAB56-125;Q2 +WAB56-50;Q2 +WAB706-3-4-K4-KB-1;Q2 +YANCAOUSSA;Q2 +YANGKUMRED;Q2 +IR47686-09-01-B-1;Q2 +IR53236-275-1;Q2 +IR65261-19-1-B;Q2 +IR65907-206-4-B;Q2 +IR71524-44-1-1;Q2 +VIETNAM1;Q2 +CHUAN3;Q2