Mercurial > repos > xuebing > sharplabtool
comparison tools/human_genome_variation/lped_to_geno.pl @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 use strict; | |
4 use warnings; | |
5 | |
6 #convert from a MAP and PED file to a genotype file | |
7 #assumes not many SNPs but lots of individuals | |
8 # transposed formats are used when lots of SNPs (TPED, TFAM) | |
9 | |
10 if (!@ARGV or scalar @ARGV ne 2) { | |
11 print "usage: lped_to_geno.pl infile.map infile.ped > outfile\n"; | |
12 exit; | |
13 } | |
14 | |
15 my $map = shift @ARGV; | |
16 my $ped = shift @ARGV; | |
17 | |
18 my @snp; #array to hold SNPs from map file | |
19 open(FH, $map) or die "Couldn't open $map, $!\n"; | |
20 while (<FH>) { | |
21 chomp; | |
22 my @f = split(/\s+/); #3 or 4 columns | |
23 #chrom ID [distance|morgans] position | |
24 if (!exists $f[3]) { $f[3] = $f[2]; } #only 3 columns | |
25 #have to leave in so know which to skip later | |
26 #if ($f[3] < 0) { next; } #way of excluding SNPs | |
27 #if ($f[0] eq '0') { next; } #unplaced SNP | |
28 $f[0] = "chr$f[0]"; | |
29 push(@snp, "$f[0]:$f[3]:$f[1]"); | |
30 } | |
31 close FH or die "Couldn't finish $map, $!\n"; | |
32 | |
33 #rows are individuals, columns are SNPs (7 & up) | |
34 #need to print row per SNP | |
35 my @allele; #alleles to go with @snp | |
36 my @pheno; #marker for phenotype | |
37 open(FH, $ped) or die "Couldn't open $ped, $!\n"; | |
38 while (<FH>) { | |
39 chomp; | |
40 my @f = split(/\s+/); | |
41 if (!defined $f[5]) { die "ERROR undefined phenotype $f[0] $f[1] $f[2] $f[3] $f[4]\n"; } | |
42 push(@pheno, $f[5]); | |
43 my $j = 0; | |
44 for(my $i = 6; $i< $#f; $i+=2) { | |
45 if (!$allele[$j]) { $allele[$j] = ''; } | |
46 #can be ACTG or 1234 (for haploview etc) or 0 for missing | |
47 if ($f[$i] eq '1') { $f[$i] = 'A'; } | |
48 elsif ($f[$i] eq '2') { $f[$i] = 'C'; } | |
49 elsif ($f[$i] eq '3') { $f[$i] = 'G'; } | |
50 elsif ($f[$i] eq '4') { $f[$i] = 'T'; } | |
51 if ($f[$i+1] eq '1') { $f[$i+1] = 'A'; } | |
52 elsif ($f[$i+1] eq '2') { $f[$i+1] = 'C'; } | |
53 elsif ($f[$i+1] eq '3') { $f[$i+1] = 'G'; } | |
54 elsif ($f[$i+1] eq '4') { $f[$i+1] = 'T'; } | |
55 $f[$i] = uc($f[$i]); | |
56 $f[$i+1] = uc($f[$i+1]); | |
57 $allele[$j] .= " $f[$i]$f[$i+1]"; | |
58 $j++; | |
59 } | |
60 } | |
61 close FH or die "Couldn't close $ped, $!\n"; | |
62 | |
63 print "ID Chr Pos"; | |
64 foreach (@pheno) { if ($_ > 0) { print " ", $_ - 1; }} #go from 1/2 to 0/1 | |
65 print "\n"; | |
66 for(my $i =0; $i <= $#snp; $i++) { #foreach snp | |
67 $allele[$i] =~ /(\w)/; | |
68 my $nt = $1; | |
69 my $j = 0; | |
70 my @t = split(/:/, $snp[$i]); | |
71 if ($t[0] eq 'chr0' or $t[1] < 0) { next; } #skip this SNP | |
72 if ($t[0] eq 'chrX') { $t[0] = 'chr23'; } | |
73 elsif ($t[0] eq 'chrY') { $t[0] = 'chr24'; } | |
74 elsif ($t[0] eq 'chrXY') { $t[0] = 'chr23'; } | |
75 elsif ($t[0] eq 'chrMT') { $t[0] = 'chr25'; } | |
76 print "$t[2] $t[0] $t[1]"; | |
77 $allele[$i] =~ s/^\s+//; | |
78 foreach my $p (split(/ +/, $allele[$i])) { | |
79 if ($pheno[$j] > 0) { #pheno 0 or -9 skip | |
80 #change AA BB AB to 2 0 1 | |
81 if ($p eq "$nt$nt") { print " 2"; } | |
82 elsif ($p =~ /$nt/) { print " 1"; } | |
83 else { print " 0"; } | |
84 } | |
85 $j++; | |
86 } | |
87 print "\n"; | |
88 } | |
89 | |
90 exit; |