Mercurial > repos > dereeper > admixture
changeset 1:78b98120be91 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 02 Jul 2015 04:08:56 -0400 |
parents | 781ac6e7a3a1 |
children | 92ddf4f7c0c6 |
files | Admixture.pl admixture.sh admixture.xml tool-data/tool_dependencies.xml tool_dependencies.xml transpose.awk |
diffstat | 6 files changed, 6 insertions(+), 273 deletions(-) [+] |
line wrap: on
line diff
--- a/Admixture.pl Fri Feb 20 10:09:18 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,159 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; -use Bio::SeqIO; - -my $usage = qq~Usage:$0 <args> [<opts>] -where <args> are: - -i, --input <input HAPMAP> - -o, --output <output> - -k, --kmin <K min. int> - -m, --maxK <K max. int> - -d, --directory <temporary directory> - -p, --path <path to executables> -~; -$usage .= "\n"; - -my ($input,$output,$kmin,$kmax,$directory,$path); - - -GetOptions( - "input=s" => \$input, - "output=s" => \$output, - "kmin=s" => \$kmin, - "maxK=s" => \$kmax, - "directory=s" => \$directory, - "path=s" => \$path -); - - -die $usage - if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path); - -if ($kmin =~/^(\d+)\s*$/){ - $kmin = $1; -} -else{ - die "Error: kmin must be an integer\n"; -} -if ($kmax =~/^(\d+)\s*$/){ - $kmax = $1; -} -else{ - die "Error: kmax must be an integer\n"; -} - - -###################### -# create map file -###################### -open(my $M,">$directory/input.map"); -open(my $H,$input); -<$H>; -while(<$H>) -{ - my @infos = split(/\t/,$_); - print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n"; -} -close($H); -close($M); - -###################### -# create ped file -###################### -system("$path/transpose.awk $input >$directory/input.ped.2"); - -open(my $P,">$directory/input.ped"); -open(my $P2,"$directory/input.ped.2"); -my $n = 0; -my $ind_num = 0; -my @individus; -while(<$P2>) -{ - $n++; - if ($n > 11) - { - my $line = $_; - $line =~s/N/0/g; - if (/^([^\s]+)\s+(.*)$/) - { - $ind_num++; - my $ind = $1; - push(@individus,$ind); - my $genoyping_line = $2; - print $P "$ind $ind_num 0 0 1 2"; - my @genotypes = split(/\s/,$genoyping_line); - foreach my $genotype(@genotypes) - { - $genotype =~s/N/0/g; - my @alleles = split("",$genotype); - print $P " " . join(" ",@alleles); - } - - print $P "\n"; - } - } -} -close($P2); -close($P); - -unlink("$directory/input.ped.2"); - -system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1"); - - -################################### -# launch admixture for different K -################################### -my %errors; -for (my $k = $kmin; $k <= $kmax; $k++) -{ - system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1"); - my $cv_error_line = `grep -h CV $directory/log.$k`; - if ($cv_error_line =~/: (\d+\.*\d*)$/) - { - $errors{$1} = $k; - } - system("cat $directory/log.$k >>$directory/logs"); - system("echo '\n\n====================================\n\n' >>$directory/logs"); - system("cat out.$k.Q >>$directory/outputs.Q"); - system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); - system("cat out.$k.P >>$directory/outputs.P"); - system("echo '\n\n====================================\n\n' >>$directory/outputs.P"); -} - -my @sorted_errors = sort {$a<=>$b} keys(%errors); -my $best_K = $errors{@sorted_errors[0]}; - - -#system("cp -rf out.$best_K.Q $directory/output"); - -open(BEST1,"out.$best_K.Q"); -open(BEST2,">$directory/output"); -print BEST2 "<Covariate>\n"; -print BEST2 "<Trait>"; -for (my $j=1;$j<=$best_K;$j++) -{ - print BEST2 " Q" . $j; -} -print BEST2 "\n"; -my $i = 0; -while(<BEST1>) -{ - my $line = $_; - $line =~s/ /\t/g; - my $ind = $individus[$i]; - print BEST2 "$ind "; - print BEST2 $line; - $i++; -} -close(BEST1); -close(BEST2); - -system("cp -rf $directory/log.$best_K $directory/log"); - - - -
--- a/admixture.sh Fri Feb 20 10:09:18 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/bin/bash -input=$1 -outputs=$2 -logs=$3 -best_k_output=$4 -best_k_logfile=$5 -kmin=$6 -kmax=$7 - -directory=`dirname $0` -mkdir tmpdir$$ -cp -rf $input tmpdir$$/input - -/usr/bin/perl $directory/Admixture.pl -i $input -o $outputs -k $kmin -m $kmax -d tmpdir$$ -p $directory - -mv tmpdir$$/output $best_k_output -mv tmpdir$$/log $best_k_logfile -mv tmpdir$$/outputs.Q $outputs -mv tmpdir$$/logs $logs - -
--- a/admixture.xml Fri Feb 20 10:09:18 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ -<tool id="admixture" name="Admixture" version="1.23"> - <description>a population structure from large SNP genotype datasets</description> - <requirements> - <requirement type="package" version="1.07">plink</requirement> - <requirement type="package" version="1.23">admixture</requirement> - </requirements> - <command interpreter="bash">./admixture.sh $input $outputs $logs $best_k_output $best_k_logfile $kmin $kmax - </command> - <inputs> - <param format="txt" name="input" type="data" label="Allelic file in Hapmap format" help="Allelic file in Hapmap format"/> - <param type="text" name="kmin" label="K min" value="1"/> - <param type="text" name="kmax" label="K max" value="5"/> - </inputs> - <outputs> - <data format="txt" name="best_k_output" label="Best K Output"/> - <data format="txt" name="best_k_logfile" label="Best K Logfile"/> - <data format="txt" name="outputs" label="All Outputs"/> - <data format="txt" name="logs" label="All Logs"/> - </outputs> - <help> - - -.. class:: infomark - -**Program encapsulated in Galaxy by Southgreen** - -.. class:: infomark - -**Admixture version 1.23** - ------ - -============== - Please cite: -============== - -"Fast model-based estimation of ancestry in unrelated individuals.", **D.H. Alexander, J. Novembre, and K. Lange.**, Genome Research, 19:1655{1664, 2009. - ------ - -=========== - Overview: -=========== - -ADMIXTURE is a program for estimating ancestry in a model-based manner from large autosomal SNP genotype datasets, where the individuals are unrelated (for example, the individuals in a case-control association study). - ------ - -For further informations, please visite the Admixture_ website. - - -.. _Admixture: http://www.genetics.ucla.edu/software/admixture/index.html - </help> - -</tool>
--- a/tool-data/tool_dependencies.xml Fri Feb 20 10:09:18 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="plink" version="1.07"> - <repository changeset_revision="65400c333b88" name="package_plink_1_07" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" /> - </package> - <package name="admixture" version="1.23"> - <repository changeset_revision="61e04b2aa621" name="package_admixture_1_23" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" /> - </package> -</tool_dependency> - -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Jul 02 04:08:56 2015 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="admixture" version="1.23"> + <repository changeset_revision="61e04b2aa621" name="package_admixture_1_23" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" /> + </package> +</tool_dependency>
--- a/transpose.awk Fri Feb 20 10:09:18 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -#!/usr/bin/gawk -f - -BEGIN { - max_x =0; - max_y =0; -} - -{ - max_y++; - for( i=1; i<=NF; i++ ) - { - if (i>max_x) max_x=i; - A[i,max_y] = $i; - } -} - -END { - for ( x=1; x<=max_x; x++ ) - { - for ( y=1; y<=max_y; y++ ) - { - if ( (x,y) in A ) printf "%s",A[x,y]; - if ( y!=max_y ) printf " "; - } - printf "\n"; - } -}