changeset 8:c94c31bde773 draft

Uploaded
author dereeper
date Tue, 08 Jan 2019 11:19:23 -0500
parents 22aee300afdf
children e44eb2ba80ec
files Snmf.pl snmf.sh snmf.xml
diffstat 3 files changed, 32 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/Snmf.pl	Wed Apr 18 05:44:40 2018 -0400
+++ b/Snmf.pl	Tue Jan 08 11:19:23 2019 -0500
@@ -2,7 +2,6 @@
 
 use strict;
 use Getopt::Long;
-use Bio::SeqIO;
 
 my $usage = qq~Usage:$0 <args> [<opts>]
 where <args> are:
@@ -34,7 +33,7 @@
 
 my $PLINK_EXE = "plink";
 
-system("$PLINK_EXE --vcf $input --allow-extra-chr --recode vcf-fid --out $directory/input >>$directory/logs 2>&1");
+system("$PLINK_EXE --vcf $input --allow-extra-chr --recode-vcf --const-fid --out $directory/input >>$directory/logs 2>&1");
 
 system("vcf2geno $directory/input.vcf $directory/polymorphisms.geno >>$directory/logs 2>&1");
 
@@ -47,6 +46,8 @@
 # launch admixture for different K
 ###################################
 my %errors;
+open(E,">$directory/entropy");
+print E "K, number of ancestal populations	Cross-entropy\n";
 for (my $k = $kmin; $k <= $kmax; $k++)
 {
 	system("sNMF -x $directory/polymorphisms.geno -K $k -c >>$directory/log.$k 2>&1");
@@ -64,9 +65,7 @@
 	}
 	close($LOG);
 
-	open(E,">>$directory/entropy");
-	print E "K=$k $ent\n";
-	close(E);
+	print E "K=$k	$ent\n";
 
 	print $O2 "Indiv";
 	print $O3 "Indiv;Group\n";
@@ -115,7 +114,7 @@
 	system("cat $directory/out.$k.final.Q >>$directory/outputs.Q");
 	system("echo '\n\n====================================\n\n' >>$directory/outputs.Q");
 }
-
+close(E);
 my @sorted_errors = sort {$a<=>$b} keys(%errors);
 my $best_K = $errors{@sorted_errors[0]};
 
@@ -125,4 +124,27 @@
 system("cp -rf $directory/log.$best_K $directory/log");
 system("cp -rf $directory/out.$best_K.group $directory/groups");
 
+#exit;
 
+
+
+open(BEST1,"$directory/out.$best_K.final.Q");
+<BEST1>;
+open(BEST2,">$directory/output");
+print BEST2 "<Covariate>\n";
+print BEST2 "<Trait>";
+for (my $j=1;$j<=$best_K;$j++)
+{
+        print BEST2 "	Q" . $j;
+}
+print BEST2 "\n";
+my $i = 0;
+while(<BEST1>)
+{
+        my $line = $_;
+        $line =~s/ /\t/g;
+        print BEST2 $line;
+        $i++;
+}
+close(BEST1);
+close(BEST2);
--- a/snmf.sh	Wed Apr 18 05:44:40 2018 -0400
+++ b/snmf.sh	Tue Jan 08 11:19:23 2019 -0500
@@ -8,6 +8,7 @@
 kmax=$7
 groups=$8
 threshold_group=$9
+entropy=${10}
 
 directory=`dirname $0`
 mkdir tmpdir$$
@@ -20,5 +21,6 @@
 mv tmpdir$$/outputs.Q $outputs
 mv tmpdir$$/logs $logs
 mv tmpdir$$/groups $groups
+mv tmpdir$$/entropy $entropy
 
 
--- a/snmf.xml	Wed Apr 18 05:44:40 2018 -0400
+++ b/snmf.xml	Tue Jan 08 11:19:23 2019 -0500
@@ -10,7 +10,7 @@
                 <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
                 <exit_code range="1:" level="fatal" />
         </stdio>
-	<command interpreter="bash">./snmf.sh $vcf $outputs $logs $best_k_output $best_k_logfile $kmin $kmax $best_k_groups $threshold_group
+	<command interpreter="bash">./snmf.sh $vcf $outputs $logs $best_k_output $best_k_logfile $kmin $kmax $best_k_groups $threshold_group $entropy
         </command>
 	<inputs>
 		<param format="vcf" name="vcf" type="data" label="VCF file" help="VCF file"/>
@@ -23,6 +23,7 @@
 		<data format="txt" name="best_k_groups" label="Best K Groups"/>
 		<data format="txt" name="best_k_logfile" label="Best K Logfile"/>
 		<data format="txt" name="outputs" label="Structure by sNMF"/>
+		<data format="tabular" name="entropy" label="Cross-entropy values"/>
 		<data format="txt" name="logs" label="All Logs"/>
 	</outputs>