annotate tools/human_genome_variation/lped_to_geno.pl @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env perl
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 use strict;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 use warnings;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 #convert from a MAP and PED file to a genotype file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 #assumes not many SNPs but lots of individuals
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 # transposed formats are used when lots of SNPs (TPED, TFAM)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 if (!@ARGV or scalar @ARGV ne 2) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 print "usage: lped_to_geno.pl infile.map infile.ped > outfile\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 exit;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 my $map = shift @ARGV;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 my $ped = shift @ARGV;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 my @snp; #array to hold SNPs from map file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 open(FH, $map) or die "Couldn't open $map, $!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 while (<FH>) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 chomp;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 my @f = split(/\s+/); #3 or 4 columns
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 #chrom ID [distance|morgans] position
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 if (!exists $f[3]) { $f[3] = $f[2]; } #only 3 columns
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 #have to leave in so know which to skip later
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 #if ($f[3] < 0) { next; } #way of excluding SNPs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 #if ($f[0] eq '0') { next; } #unplaced SNP
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 $f[0] = "chr$f[0]";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 push(@snp, "$f[0]:$f[3]:$f[1]");
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 close FH or die "Couldn't finish $map, $!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 #rows are individuals, columns are SNPs (7 & up)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 #need to print row per SNP
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 my @allele; #alleles to go with @snp
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 my @pheno; #marker for phenotype
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 open(FH, $ped) or die "Couldn't open $ped, $!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 while (<FH>) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 chomp;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 my @f = split(/\s+/);
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 if (!defined $f[5]) { die "ERROR undefined phenotype $f[0] $f[1] $f[2] $f[3] $f[4]\n"; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 push(@pheno, $f[5]);
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 my $j = 0;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 for(my $i = 6; $i< $#f; $i+=2) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 if (!$allele[$j]) { $allele[$j] = ''; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 #can be ACTG or 1234 (for haploview etc) or 0 for missing
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 if ($f[$i] eq '1') { $f[$i] = 'A'; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 elsif ($f[$i] eq '2') { $f[$i] = 'C'; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 elsif ($f[$i] eq '3') { $f[$i] = 'G'; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 elsif ($f[$i] eq '4') { $f[$i] = 'T'; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 if ($f[$i+1] eq '1') { $f[$i+1] = 'A'; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 elsif ($f[$i+1] eq '2') { $f[$i+1] = 'C'; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 elsif ($f[$i+1] eq '3') { $f[$i+1] = 'G'; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 elsif ($f[$i+1] eq '4') { $f[$i+1] = 'T'; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 $f[$i] = uc($f[$i]);
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 $f[$i+1] = uc($f[$i+1]);
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 $allele[$j] .= " $f[$i]$f[$i+1]";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 $j++;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 close FH or die "Couldn't close $ped, $!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 print "ID Chr Pos";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 foreach (@pheno) { if ($_ > 0) { print " ", $_ - 1; }} #go from 1/2 to 0/1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 print "\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 for(my $i =0; $i <= $#snp; $i++) { #foreach snp
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 $allele[$i] =~ /(\w)/;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 my $nt = $1;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 my $j = 0;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 my @t = split(/:/, $snp[$i]);
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 if ($t[0] eq 'chr0' or $t[1] < 0) { next; } #skip this SNP
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 if ($t[0] eq 'chrX') { $t[0] = 'chr23'; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 elsif ($t[0] eq 'chrY') { $t[0] = 'chr24'; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 elsif ($t[0] eq 'chrXY') { $t[0] = 'chr23'; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 elsif ($t[0] eq 'chrMT') { $t[0] = 'chr25'; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 print "$t[2] $t[0] $t[1]";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 $allele[$i] =~ s/^\s+//;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 foreach my $p (split(/ +/, $allele[$i])) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 if ($pheno[$j] > 0) { #pheno 0 or -9 skip
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 #change AA BB AB to 2 0 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 if ($p eq "$nt$nt") { print " 2"; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 elsif ($p =~ /$nt/) { print " 1"; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 else { print " 0"; }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 $j++;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 print "\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 exit;