# HG changeset patch # User dereeper # Date 1435824536 14400 # Node ID 78b98120be91d24bd19930ffe3f0dae9502d8a69 # Parent 781ac6e7a3a1c30328cbd118421c0d28d6845416 Uploaded diff -r 781ac6e7a3a1 -r 78b98120be91 Admixture.pl --- a/Admixture.pl Fri Feb 20 10:09:18 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,159 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; -use Bio::SeqIO; - -my $usage = qq~Usage:$0 [] -where are: - -i, --input - -o, --output - -k, --kmin - -m, --maxK - -d, --directory - -p, --path -~; -$usage .= "\n"; - -my ($input,$output,$kmin,$kmax,$directory,$path); - - -GetOptions( - "input=s" => \$input, - "output=s" => \$output, - "kmin=s" => \$kmin, - "maxK=s" => \$kmax, - "directory=s" => \$directory, - "path=s" => \$path -); - - -die $usage - if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path); - -if ($kmin =~/^(\d+)\s*$/){ - $kmin = $1; -} -else{ - die "Error: kmin must be an integer\n"; -} -if ($kmax =~/^(\d+)\s*$/){ - $kmax = $1; -} -else{ - die "Error: kmax must be an integer\n"; -} - - -###################### -# create map file -###################### -open(my $M,">$directory/input.map"); -open(my $H,$input); -<$H>; -while(<$H>) -{ - my @infos = split(/\t/,$_); - print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n"; -} -close($H); -close($M); - -###################### -# create ped file -###################### -system("$path/transpose.awk $input >$directory/input.ped.2"); - -open(my $P,">$directory/input.ped"); -open(my $P2,"$directory/input.ped.2"); -my $n = 0; -my $ind_num = 0; -my @individus; -while(<$P2>) -{ - $n++; - if ($n > 11) - { - my $line = $_; - $line =~s/N/0/g; - if (/^([^\s]+)\s+(.*)$/) - { - $ind_num++; - my $ind = $1; - push(@individus,$ind); - my $genoyping_line = $2; - print $P "$ind $ind_num 0 0 1 2"; - my @genotypes = split(/\s/,$genoyping_line); - foreach my $genotype(@genotypes) - { - $genotype =~s/N/0/g; - my @alleles = split("",$genotype); - print $P " " . join(" ",@alleles); - } - - print $P "\n"; - } - } -} -close($P2); -close($P); - -unlink("$directory/input.ped.2"); - -system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1"); - - -################################### -# launch admixture for different K -################################### -my %errors; -for (my $k = $kmin; $k <= $kmax; $k++) -{ - system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1"); - my $cv_error_line = `grep -h CV $directory/log.$k`; - if ($cv_error_line =~/: (\d+\.*\d*)$/) - { - $errors{$1} = $k; - } - system("cat $directory/log.$k >>$directory/logs"); - system("echo '\n\n====================================\n\n' >>$directory/logs"); - system("cat out.$k.Q >>$directory/outputs.Q"); - system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); - system("cat out.$k.P >>$directory/outputs.P"); - system("echo '\n\n====================================\n\n' >>$directory/outputs.P"); -} - -my @sorted_errors = sort {$a<=>$b} keys(%errors); -my $best_K = $errors{@sorted_errors[0]}; - - -#system("cp -rf out.$best_K.Q $directory/output"); - -open(BEST1,"out.$best_K.Q"); -open(BEST2,">$directory/output"); -print BEST2 "\n"; -print BEST2 ""; -for (my $j=1;$j<=$best_K;$j++) -{ - print BEST2 " Q" . $j; -} -print BEST2 "\n"; -my $i = 0; -while() -{ - my $line = $_; - $line =~s/ /\t/g; - my $ind = $individus[$i]; - print BEST2 "$ind "; - print BEST2 $line; - $i++; -} -close(BEST1); -close(BEST2); - -system("cp -rf $directory/log.$best_K $directory/log"); - - - - diff -r 781ac6e7a3a1 -r 78b98120be91 admixture.sh --- a/admixture.sh Fri Feb 20 10:09:18 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/bin/bash -input=$1 -outputs=$2 -logs=$3 -best_k_output=$4 -best_k_logfile=$5 -kmin=$6 -kmax=$7 - -directory=`dirname $0` -mkdir tmpdir$$ -cp -rf $input tmpdir$$/input - -/usr/bin/perl $directory/Admixture.pl -i $input -o $outputs -k $kmin -m $kmax -d tmpdir$$ -p $directory - -mv tmpdir$$/output $best_k_output -mv tmpdir$$/log $best_k_logfile -mv tmpdir$$/outputs.Q $outputs -mv tmpdir$$/logs $logs - - diff -r 781ac6e7a3a1 -r 78b98120be91 admixture.xml --- a/admixture.xml Fri Feb 20 10:09:18 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ - - a population structure from large SNP genotype datasets - - plink - admixture - - ./admixture.sh $input $outputs $logs $best_k_output $best_k_logfile $kmin $kmax - - - - - - - - - - - - - - - -.. class:: infomark - -**Program encapsulated in Galaxy by Southgreen** - -.. class:: infomark - -**Admixture version 1.23** - ------ - -============== - Please cite: -============== - -"Fast model-based estimation of ancestry in unrelated individuals.", **D.H. Alexander, J. Novembre, and K. Lange.**, Genome Research, 19:1655{1664, 2009. - ------ - -=========== - Overview: -=========== - -ADMIXTURE is a program for estimating ancestry in a model-based manner from large autosomal SNP genotype datasets, where the individuals are unrelated (for example, the individuals in a case-control association study). - ------ - -For further informations, please visite the Admixture_ website. - - -.. _Admixture: http://www.genetics.ucla.edu/software/admixture/index.html - - - diff -r 781ac6e7a3a1 -r 78b98120be91 tool-data/tool_dependencies.xml --- a/tool-data/tool_dependencies.xml Fri Feb 20 10:09:18 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ - - - - - - - - - - - diff -r 781ac6e7a3a1 -r 78b98120be91 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Jul 02 04:08:56 2015 -0400 @@ -0,0 +1,6 @@ + + + + + + diff -r 781ac6e7a3a1 -r 78b98120be91 transpose.awk --- a/transpose.awk Fri Feb 20 10:09:18 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -#!/usr/bin/gawk -f - -BEGIN { - max_x =0; - max_y =0; -} - -{ - max_y++; - for( i=1; i<=NF; i++ ) - { - if (i>max_x) max_x=i; - A[i,max_y] = $i; - } -} - -END { - for ( x=1; x<=max_x; x++ ) - { - for ( y=1; y<=max_y; y++ ) - { - if ( (x,y) in A ) printf "%s",A[x,y]; - if ( y!=max_y ) printf " "; - } - printf "\n"; - } -}