# HG changeset patch # User dereeper # Date 1546964363 18000 # Node ID c94c31bde7735a7254dbf1263ad0463324ddb665 # Parent 22aee300afdf04985283c4951d8ac8ce7188956b Uploaded diff -r 22aee300afdf -r c94c31bde773 Snmf.pl --- a/Snmf.pl Wed Apr 18 05:44:40 2018 -0400 +++ b/Snmf.pl Tue Jan 08 11:19:23 2019 -0500 @@ -2,7 +2,6 @@ use strict; use Getopt::Long; -use Bio::SeqIO; my $usage = qq~Usage:$0 [] where are: @@ -34,7 +33,7 @@ my $PLINK_EXE = "plink"; -system("$PLINK_EXE --vcf $input --allow-extra-chr --recode vcf-fid --out $directory/input >>$directory/logs 2>&1"); +system("$PLINK_EXE --vcf $input --allow-extra-chr --recode-vcf --const-fid --out $directory/input >>$directory/logs 2>&1"); system("vcf2geno $directory/input.vcf $directory/polymorphisms.geno >>$directory/logs 2>&1"); @@ -47,6 +46,8 @@ # launch admixture for different K ################################### my %errors; +open(E,">$directory/entropy"); +print E "K, number of ancestal populations Cross-entropy\n"; for (my $k = $kmin; $k <= $kmax; $k++) { system("sNMF -x $directory/polymorphisms.geno -K $k -c >>$directory/log.$k 2>&1"); @@ -64,9 +65,7 @@ } close($LOG); - open(E,">>$directory/entropy"); - print E "K=$k $ent\n"; - close(E); + print E "K=$k $ent\n"; print $O2 "Indiv"; print $O3 "Indiv;Group\n"; @@ -115,7 +114,7 @@ system("cat $directory/out.$k.final.Q >>$directory/outputs.Q"); system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); } - +close(E); my @sorted_errors = sort {$a<=>$b} keys(%errors); my $best_K = $errors{@sorted_errors[0]}; @@ -125,4 +124,27 @@ system("cp -rf $directory/log.$best_K $directory/log"); system("cp -rf $directory/out.$best_K.group $directory/groups"); +#exit; + + +open(BEST1,"$directory/out.$best_K.final.Q"); +; +open(BEST2,">$directory/output"); +print BEST2 "\n"; +print BEST2 ""; +for (my $j=1;$j<=$best_K;$j++) +{ + print BEST2 " Q" . $j; +} +print BEST2 "\n"; +my $i = 0; +while() +{ + my $line = $_; + $line =~s/ /\t/g; + print BEST2 $line; + $i++; +} +close(BEST1); +close(BEST2); diff -r 22aee300afdf -r c94c31bde773 snmf.sh --- a/snmf.sh Wed Apr 18 05:44:40 2018 -0400 +++ b/snmf.sh Tue Jan 08 11:19:23 2019 -0500 @@ -8,6 +8,7 @@ kmax=$7 groups=$8 threshold_group=$9 +entropy=${10} directory=`dirname $0` mkdir tmpdir$$ @@ -20,5 +21,6 @@ mv tmpdir$$/outputs.Q $outputs mv tmpdir$$/logs $logs mv tmpdir$$/groups $groups +mv tmpdir$$/entropy $entropy diff -r 22aee300afdf -r c94c31bde773 snmf.xml --- a/snmf.xml Wed Apr 18 05:44:40 2018 -0400 +++ b/snmf.xml Tue Jan 08 11:19:23 2019 -0500 @@ -10,7 +10,7 @@ - ./snmf.sh $vcf $outputs $logs $best_k_output $best_k_logfile $kmin $kmax $best_k_groups $threshold_group + ./snmf.sh $vcf $outputs $logs $best_k_output $best_k_logfile $kmin $kmax $best_k_groups $threshold_group $entropy @@ -23,6 +23,7 @@ +