annotate CreateMatrixMultiple.pl @ 2:cabe6c55780a draft

Uploaded
author jnavarro
date Thu, 26 Oct 2017 07:37:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
1 #!/usr/bin/perl
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
2 #V1.0.0
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
3 use strict;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
4 use warnings;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
5 use Getopt::Long;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
6
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
7
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
8 my $input_matrix_files;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
9 GetOptions (
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
10 "input_matrix_files=s" => \$input_matrix_files
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
11 ) or die("Error in command line arguments\n");
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
12
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
13 my @files = split(/,/,$input_matrix_files);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
14 my @tbl_hash;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
15 my %global_hash;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
16 my @tbl_genotype_names;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
17 my %chromosome_hash;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
18 my %result_by_chr;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
19 my @tbl_chr_name;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
20
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
21 for (my $i=0;$i<=$#files;$i++){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
22 my $current_file = $files[$i];
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
23 my $current_genotype = "NA";
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
24 my %current_hash;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
25
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
26 open (CF,$current_file) or die ("Can't open file $current_file\n");
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
27 my $header = <CF>;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
28 if ($header =~ /Chrom\s*Pos\s*Ref\s*(.*?)\s*$/){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
29 $current_genotype = $1;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
30 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
31 else {
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
32 print STDERR "Unable to recognize header in matrix file\n$header\n";
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
33 exit(0);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
34 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
35 while (my $line=<CF>){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
36 if ($line!~/^\s*$/){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
37 my @fields = split (/\t+/,$line);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
38 my $chr;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
39 my $pos;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
40 my $ref;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
41 my $variant;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
42
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
43 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
44 $chr = $1;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
45 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
46 else {
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
47 print STDERR "Unable to detect chromosome in matrix file\n$line\n";
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
48 exit(0);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
49 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
50 if ($fields[1]=~/^\s*(\d+)\s*$/){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
51 $pos = $1;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
52 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
53 else {
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
54 print STDERR "Unable to detect position in matrix file\n$line\n";
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
55 exit(0);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
56 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
57
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
58 if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
59 $ref = $1;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
60 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
61 else {
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
62 print STDERR "Unable to detect reference base in matrix file\n$line\n";
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
63 exit(0);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
64 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
65
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
66 if ($fields[3]=~/^\s*([\w\/]+)\s*$/i){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
67 $variant = $1;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
68 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
69 else {
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
70 print STDERR "Unable to detect variant in matrix file\n$line\n";
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
71 exit(0);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
72 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
73 $current_hash{"$chr#$pos"} = $variant;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
74 $global_hash{"$chr#$pos"} = $ref;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
75 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
76 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
77 close CF;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
78 push(@tbl_genotype_names,$current_genotype);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
79 push(@tbl_hash,\%current_hash);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
80 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
81
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
82 print "Chrom\tPos\tRef";
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
83 for (my $i=0;$i<=$#tbl_genotype_names;$i++){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
84 print "\t".$tbl_genotype_names[$i];
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
85 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
86 print "\n";
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
87
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
88 my @tbl_line_to_display;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
89
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
90 #exit(0);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
91
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
92 foreach my $key (keys %global_hash){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
93 my @tbl = split (/\#/,$key);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
94 my $chr = $tbl[0];
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
95 my $pos = $tbl[1];
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
96 my $ref = $global_hash{$key};
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
97 my $line = "$chr\t$pos\t$ref";
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
98 my $isvariant = 0;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
99 for (my $i=0;$i<=$#tbl_hash;$i++){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
100 #my %current_hash = %{$tbl_hash[$i]};
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
101 if ($tbl_hash[$i]->{$key}){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
102 $line.="\t".$tbl_hash[$i]->{$key};
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
103 if ($tbl_hash[$i]->{$key} ne $ref){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
104 $isvariant = 1;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
105 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
106 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
107 else {
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
108 $line.="\t"."NA";
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
109 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
110
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
111 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
112
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
113 $line .="\n";
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
114 if ($isvariant == 1){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
115 push(@tbl_line_to_display,$line);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
116 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
117 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
118
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
119
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
120 for (my $i=0;$i<=$#tbl_line_to_display;$i++){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
121 my @tbl = split (/\s+/,$tbl_line_to_display[$i]);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
122 my $current_chr = $tbl[0];
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
123 my @current_tbl;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
124 if ($result_by_chr{$current_chr}){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
125 push (@{$result_by_chr{$current_chr}},$tbl_line_to_display[$i]);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
126 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
127 else {
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
128 push (@current_tbl,$tbl_line_to_display[$i]);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
129 $result_by_chr{$current_chr} = \@current_tbl;
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
130 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
131 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
132
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
133 foreach my $key (sort keys %result_by_chr){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
134 my @current_tbl = sort mysort @{$result_by_chr{$key}};
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
135 for (my $i=0;$i<=$#current_tbl;$i++){
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
136 print $current_tbl[$i];
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
137 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
138 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
139
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
140 sub mysort {
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
141 my @tbla = split (/\s+/,$a);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
142 my @tblb = split (/\s+/,$b);
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
143 $tbla[0] cmp $tblb[0] || $tbla[1]<=>$tblb[1];
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
144 }
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
145
cabe6c55780a Uploaded
jnavarro
parents:
diff changeset
146