changeset 1:78b98120be91 draft

Uploaded
author dereeper
date Thu, 02 Jul 2015 04:08:56 -0400
parents 781ac6e7a3a1
children 92ddf4f7c0c6
files Admixture.pl admixture.sh admixture.xml tool-data/tool_dependencies.xml tool_dependencies.xml transpose.awk
diffstat 6 files changed, 6 insertions(+), 273 deletions(-) [+]
line wrap: on
line diff
--- a/Admixture.pl	Fri Feb 20 10:09:18 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,159 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use Switch;
-use Getopt::Long;
-use Bio::SeqIO;
-
-my $usage = qq~Usage:$0 <args> [<opts>]
-where <args> are:
-    -i, --input         <input HAPMAP>
-    -o, --output        <output>
-    -k, --kmin          <K min. int>
-    -m, --maxK          <K max. int>
-    -d, --directory     <temporary directory>
-    -p, --path          <path to executables>
-~;
-$usage .= "\n";
-
-my ($input,$output,$kmin,$kmax,$directory,$path);
-
-
-GetOptions(
-	"input=s"      => \$input,
-	"output=s"     => \$output,
-	"kmin=s"       => \$kmin,
-	"maxK=s"       => \$kmax,
-	"directory=s"  => \$directory,
-	"path=s"       => \$path
-);
-
-
-die $usage
-  if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path);
-
-if ($kmin =~/^(\d+)\s*$/){
-        $kmin = $1;
-}
-else{
-        die "Error: kmin must be an integer\n";
-}
-if ($kmax =~/^(\d+)\s*$/){
-        $kmax = $1;
-}
-else{
-        die "Error: kmax must be an integer\n";
-}
-
-  
-######################
-# create map file
-######################
-open(my $M,">$directory/input.map");
-open(my $H,$input);
-<$H>;
-while(<$H>)
-{
-	my @infos = split(/\t/,$_);
-	print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n";
-}
-close($H);
-close($M);
-
-######################
-# create ped file
-######################
-system("$path/transpose.awk $input >$directory/input.ped.2");
-
-open(my $P,">$directory/input.ped");
-open(my $P2,"$directory/input.ped.2");
-my $n = 0;
-my $ind_num = 0;
-my @individus;
-while(<$P2>)
-{
-	$n++;
-	if ($n > 11)
-	{
-		my $line = $_;
-		$line =~s/N/0/g;
-		if (/^([^\s]+)\s+(.*)$/)
-		{
-			$ind_num++;
-			my $ind = $1;
-			push(@individus,$ind);
-			my $genoyping_line = $2;
-			print $P "$ind	$ind_num	0	0	1	2";
-			my @genotypes = split(/\s/,$genoyping_line);
-			foreach my $genotype(@genotypes)
-			{
-				$genotype =~s/N/0/g;
-				my @alleles = split("",$genotype);
-				print $P "	" . join(" ",@alleles);
-			}
-			
-			print $P "\n";
-		}
-	}
-}
-close($P2);
-close($P);
-
-unlink("$directory/input.ped.2");
-
-system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1");
-
-
-###################################
-# launch admixture for different K
-###################################
-my %errors;
-for (my $k = $kmin; $k <= $kmax; $k++)
-{
-	system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1");
-	my $cv_error_line = `grep -h CV $directory/log.$k`;
-	if ($cv_error_line =~/: (\d+\.*\d*)$/)
-	{
-		$errors{$1} = $k;
-	}
-	system("cat $directory/log.$k >>$directory/logs");
-	system("echo '\n\n====================================\n\n' >>$directory/logs");
-	system("cat out.$k.Q >>$directory/outputs.Q");
-	system("echo '\n\n====================================\n\n' >>$directory/outputs.Q");
-	system("cat out.$k.P >>$directory/outputs.P");
-	system("echo '\n\n====================================\n\n' >>$directory/outputs.P");
-}
-
-my @sorted_errors = sort {$a<=>$b} keys(%errors);
-my $best_K = $errors{@sorted_errors[0]};
-
-
-#system("cp -rf out.$best_K.Q $directory/output");
-
-open(BEST1,"out.$best_K.Q");
-open(BEST2,">$directory/output");
-print BEST2 "<Covariate>\n";
-print BEST2 "<Trait>";
-for (my $j=1;$j<=$best_K;$j++)
-{
-	print BEST2 "	Q" . $j;
-}
-print BEST2 "\n";
-my $i = 0;
-while(<BEST1>)
-{
-	my $line = $_;
-	$line =~s/ /\t/g;
-	my $ind = $individus[$i];
-	print BEST2 "$ind	";
-	print BEST2 $line;
-	$i++;
-}
-close(BEST1);
-close(BEST2);
-
-system("cp -rf $directory/log.$best_K $directory/log");
-
-
-
-
--- a/admixture.sh	Fri Feb 20 10:09:18 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/bin/bash
-input=$1
-outputs=$2
-logs=$3
-best_k_output=$4
-best_k_logfile=$5
-kmin=$6
-kmax=$7
-
-directory=`dirname $0`
-mkdir tmpdir$$
-cp -rf $input tmpdir$$/input
- 
-/usr/bin/perl $directory/Admixture.pl -i $input -o $outputs -k $kmin -m $kmax -d tmpdir$$ -p $directory
-
-mv tmpdir$$/output $best_k_output
-mv tmpdir$$/log $best_k_logfile
-mv tmpdir$$/outputs.Q $outputs
-mv tmpdir$$/logs $logs
-
-
--- a/admixture.xml	Fri Feb 20 10:09:18 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-<tool id="admixture" name="Admixture" version="1.23">
-	<description>a population structure from large SNP genotype datasets</description>
-	<requirements>
-		<requirement type="package" version="1.07">plink</requirement>
-		<requirement type="package" version="1.23">admixture</requirement>
-	</requirements>
-	<command interpreter="bash">./admixture.sh $input $outputs $logs $best_k_output $best_k_logfile $kmin $kmax
-    </command>
-	<inputs>
-		<param format="txt" name="input" type="data" label="Allelic file in Hapmap format" help="Allelic file in Hapmap format"/>
-		<param type="text" name="kmin" label="K min" value="1"/>
-		<param type="text" name="kmax" label="K max" value="5"/>
-	</inputs>
-	<outputs>
-		<data format="txt" name="best_k_output" label="Best K Output"/>
-		<data format="txt" name="best_k_logfile" label="Best K Logfile"/>
-		<data format="txt" name="outputs" label="All Outputs"/>
-		<data format="txt" name="logs" label="All Logs"/>
-	</outputs>
-	<help>
-	
-
-.. class:: infomark
-
-**Program encapsulated in Galaxy by Southgreen**
-
-.. class:: infomark
-
-**Admixture version 1.23**
-
------
-
-==============
- Please cite: 
-==============
-
-"Fast model-based estimation of ancestry in unrelated individuals.", **D.H. Alexander, J. Novembre, and K. Lange.**, Genome Research, 19:1655{1664, 2009.
-
------
-
-===========
- Overview:
-===========
-
-ADMIXTURE is a program for estimating ancestry in a model-based manner from large autosomal SNP genotype datasets, where the individuals are unrelated (for example, the individuals in a case-control association study).
-
------
-
-For further informations, please visite the Admixture_ website.
-
-
-.. _Admixture: http://www.genetics.ucla.edu/software/admixture/index.html
-	</help>
-
-</tool>
--- a/tool-data/tool_dependencies.xml	Fri Feb 20 10:09:18 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="plink" version="1.07">
-        <repository changeset_revision="65400c333b88" name="package_plink_1_07" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" />
-    </package>
-    <package name="admixture" version="1.23">
-        <repository changeset_revision="61e04b2aa621" name="package_admixture_1_23" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" />
-    </package>
-</tool_dependency>
-
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Jul 02 04:08:56 2015 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="admixture" version="1.23">
+        <repository changeset_revision="61e04b2aa621" name="package_admixture_1_23" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" />
+    </package>
+</tool_dependency>
--- a/transpose.awk	Fri Feb 20 10:09:18 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-#!/usr/bin/gawk -f
-
-BEGIN {
-    max_x =0;
-    max_y =0;
-}
-
-{
-    max_y++;
-    for( i=1; i<=NF; i++ )
-    {
-        if (i>max_x) max_x=i;
-        A[i,max_y] = $i;
-    }
-}
-
-END {
-    for ( x=1; x<=max_x; x++ )
-    {
-        for ( y=1; y<=max_y; y++ )
-        {
-            if ( (x,y) in A ) printf "%s",A[x,y];
-            if ( y!=max_y ) printf " ";
-        }
-        printf "\n";
-    }
-}