Mercurial > repos > miller-lab > genome_diversity
changeset 6:626b714f72bb
add gd_ploteig
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Tue, 10 Apr 2012 13:51:19 -0400 |
parents | 8a1147101f85 |
children | e29f4d801bb0 |
files | README genome_diversity/Makefile genome_diversity/bin/gd_ploteig |
diffstat | 3 files changed, 173 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/README Tue Apr 10 12:49:47 2012 -0400 +++ b/README Tue Apr 10 13:51:19 2012 -0400 @@ -7,7 +7,7 @@ networkx (we used version 1.6) http://pypi.python.org/packages/source/n/networkx/ And the following software: - ADMIXTURE (we used version 1.22 ) http://www.genetics.ucla.edu/software/admixture/ + ADMIXTURE (we used version 1.22) http://www.genetics.ucla.edu/software/admixture/ EIGENSOFT (we used version 3.0) http://genepath.med.harvard.edu/~reich/Software.htm PHAST (we used version 1.2.1) http://compgen.bscb.cornell.edu/phast/ QuickTree (we used version 1.1) http://www.sanger.ac.uk/resources/software/quicktree/
--- a/genome_diversity/Makefile Tue Apr 10 12:49:47 2012 -0400 +++ b/genome_diversity/Makefile Tue Apr 10 13:51:19 2012 -0400 @@ -3,7 +3,6 @@ clean: cd src && make clean - rm -rf bin install: cd src && make install
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/bin/gd_ploteig Tue Apr 10 13:51:19 2012 -0400 @@ -0,0 +1,172 @@ +#!/usr/bin/env perl + +### ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-o outfile] [-x] [-k] [-y] [-z sep] +use Getopt::Std ; +use File::Basename ; +use warnings ; + +## pops : separated -x = make postscript and pdf -z use another separator +## -k keep intermediate files +## NEW if pops is a file names are read one per line + +getopts('i:o:p:c:s:d:z:t:xky',\%opts) ; +$postscmode = $opts{"x"} ; +$oldkeystyle = $opts{"y"} ; +$kflag = $opts{"k"} ; +$keepflag = 1 if ($kflag) ; +$keepflag = 1 unless ($postscmode) ; + +$zsep = ":" ; +if (defined $opts{"z"}) { + $zsep = $opts{"z"} ; + $zsep = "\+" if ($zsep eq "+") ; +} + +$title = "" ; +if (defined $opts{"t"}) { + $title = $opts{"t"} ; +} +if (defined $opts{"i"}) { + $infile = $opts{"i"} ; +} +else { + usage() ; + exit 0 ; +} +open (FF, $infile) || die "can't open $infile\n" ; +@L = (<FF>) ; +chomp @L ; +$nf = 0 ; +foreach $line (@L) { + next if ($line =~ /^\s+#/) ; + @Z = split " ", $line ; + $x = @Z ; + $nf = $x if ($nf < $x) ; +} +printf "## number of fields: %d\n", $nf ; +$popcol = $nf-1 ; + + +if (defined $opts{"p"}) { + $pops = $opts{"p"} ; +} +else { + die "p parameter compulsory\n" ; +} + +$popsname = setpops ($pops) ; +print "$popsname\n" ; + +$c1 = 1; $c2 =2 ; +if (defined $opts{"c"}) { + $cols = $opts{"c"} ; + ($c1, $c2) = split ":", $cols ; + die "bad c param: $cols\n" unless (defined $cols) ; +} + +$stem = "$infile.$c1:$c2" ; +if (defined $opts{"s"}) { + $stem = $opts{"s"} ; +} +$gnfile = "$stem.$popsname.xtxt" ; + +if (defined $opts{"o"}) { + $gnfile = $opts{"o"} ; +} + +@T = () ; ## trash +open (GG, ">$gnfile") || die "can't open $gnfile\n" ; +print GG "## " unless ($postscmode) ; +print GG "set terminal postscript color\n" ; +print GG "set style line 2 lc rgbcolor \"#376600\"\n"; +print GG "set style line 11 lc rgbcolor \"#376600\"\n"; +print GG "set style line 20 lc rgbcolor \"#376600\"\n"; +print GG "set style line 29 lc rgbcolor \"#376600\"\n"; +print GG "set style line 6 lc rgbcolor \"#FFCC00\"\n"; +print GG "set style line 15 lc rgbcolor \"#FFCC00\"\n"; +print GG "set style line 24 lc rgbcolor \"#FFCC00\"\n"; +print GG "set style increment user\n"; +print GG "set title \"$title\" \n" ; +print GG "set key outside\n" unless ($oldkeystyle) ; +print GG "set xlabel \"eigenvector $c1\" \n" ; +print GG "set ylabel \"eigenvector $c2\" \n" ; +print GG "plot " ; +$np = @P ; +$lastpop = $P[$np-1] ; +$d1 = $c1+1 ; +$d2 = $c2+1 ; +foreach $pop (@P) { + $dfile = "$stem:$pop" ; + push @T, $dfile ; + print GG " \"$dfile\" using $d1:$d2 title \"$pop\" " ; + print GG ", \\\n" unless ($pop eq $lastpop) ; + open (YY, ">$dfile") || die "can't open $dfile\n" ; + foreach $line (@L) { + next if ($line =~ /^\s+#/) ; + @Z = split " ", $line ; + next unless (defined $Z[$popcol]) ; + next unless ($Z[$popcol] eq $pop) ; + print YY "$line\n" ; + } + close YY ; +} +print GG "\n" ; +print GG "## " if ($postscmode) ; +print GG "pause 9999\n" ; +close GG ; + +if ($postscmode) { +$psfile = "$stem.ps" ; + + if ($gnfile =~ /xtxt/) { + $psfile = $gnfile ; + $psfile =~ s/xtxt/ps/ ; + } +system "gnuplot < $gnfile > $psfile" ; +#system "fixgreen $psfile" ; +system "ps2pdf $psfile " ; +} +unlink (@T) unless $keepflag ; + +sub usage { + +print "ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-o outfile] [-x] [-k]\n" ; +print "-i eigfile input file first col indiv-id last col population\n" ; +print "## as output by smartpca in outputvecs \n" ; +print "-c a:b a, b columns to plot. 1:2 would be common and leading 2 eigenvectors\n" ; +print "-p pops Populations to plot. : delimited. eg -p Bantu:San:French\n" ; +print "## pops can also be a filename. List populations 1 per line\n" ; +print "[-s stem] stem will start various output files\n" ; +print "[-o ofile] ofile will be gnuplot control file. Should have xtxt suffix\n"; +print "[-x] make ps and pdf files\n" ; +print "[-k] keep various intermediate files although -x set\n" ; +print "## necessary if .xtxt file is to be hand edited\n" ; +print "[-y] put key at top right inside box (old mode)\n" ; +print "[-t] title (legend)\n" ; + +print "The xtxt file is a gnuplot file and can be easily hand edited. Intermediate files +needed if you want to make your own plot\n" ; + +} +sub setpops { + my ($pops) = @_ ; + local (@a, $d, $b, $e) ; + + if (-e $pops) { + open (FF1, $pops) || die "can't open $pops\n" ; + @P = () ; + foreach $line (<FF1>) { + ($a) = split " ", $line ; + next unless (defined $a) ; + next if ($a =~ /\#/) ; + push @P, $a ; + } + $out = join ":", @P ; + print "## pops: $out\n" ; + ($b, $d , $e) = fileparse($pops) ; + return $b ; + } + @P = split $zsep, $pops ; + return $pops ; + +}