Mercurial > repos > mcharles > rapsosnp
comparison rapsodyn/CreateMatrixMultiple.pl @ 15:56d328bce3a7 draft default tip
Uploaded
author | mcharles |
---|---|
date | Thu, 29 Jan 2015 08:54:06 -0500 |
parents | 0a6c1cfe4dc8 |
children |
comparison
equal
deleted
inserted
replaced
14:93e6f2af1ce2 | 15:56d328bce3a7 |
---|---|
9 GetOptions ( | 9 GetOptions ( |
10 "input_matrix_files=s" => \$input_matrix_files | 10 "input_matrix_files=s" => \$input_matrix_files |
11 ) or die("Error in command line arguments\n"); | 11 ) or die("Error in command line arguments\n"); |
12 | 12 |
13 my @files = split(/,/,$input_matrix_files); | 13 my @files = split(/,/,$input_matrix_files); |
14 my @tbl_hash; | |
15 my %global_hash; | |
16 my @tbl_genotype_names; | |
17 my %chromosome_hash; | |
18 my %result_by_chr; | |
19 my @tbl_chr_name; | |
20 | |
14 for (my $i=0;$i<=$#files;$i++){ | 21 for (my $i=0;$i<=$#files;$i++){ |
15 print $files[$i],"\n"; | 22 my $current_file = $files[$i]; |
23 my $current_genotype = "NA"; | |
24 my %current_hash; | |
25 | |
26 open (CF,$current_file) or die ("Can't open file $current_file\n"); | |
27 my $header = <CF>; | |
28 if ($header =~ /Chrom\s*Pos\s*Ref\s*(.*?)\s*$/){ | |
29 $current_genotype = $1; | |
30 } | |
31 else { | |
32 print STDERR "Unable to recognize header in matrix file\n$header\n"; | |
33 exit(0); | |
34 } | |
35 while (my $line=<CF>){ | |
36 if ($line!~/^\s*$/){ | |
37 my @fields = split (/\t+/,$line); | |
38 my $chr; | |
39 my $pos; | |
40 my $ref; | |
41 my $variant; | |
42 | |
43 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){ | |
44 $chr = $1; | |
45 } | |
46 else { | |
47 print STDERR "Unable to detect chromosome in matrix file\n$line\n"; | |
48 exit(0); | |
49 } | |
50 if ($fields[1]=~/^\s*(\d+)\s*$/){ | |
51 $pos = $1; | |
52 } | |
53 else { | |
54 print STDERR "Unable to detect position in matrix file\n$line\n"; | |
55 exit(0); | |
56 } | |
57 | |
58 if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){ | |
59 $ref = $1; | |
60 } | |
61 else { | |
62 print STDERR "Unable to detect reference base in matrix file\n$line\n"; | |
63 exit(0); | |
64 } | |
65 | |
66 if ($fields[3]=~/^\s*([\w\/]+)\s*$/i){ | |
67 $variant = $1; | |
68 } | |
69 else { | |
70 print STDERR "Unable to detect variant in matrix file\n$line\n"; | |
71 exit(0); | |
72 } | |
73 $current_hash{"$chr#$pos"} = $variant; | |
74 $global_hash{"$chr#$pos"} = $ref; | |
75 } | |
76 } | |
77 close CF; | |
78 push(@tbl_genotype_names,$current_genotype); | |
79 push(@tbl_hash,\%current_hash); | |
16 } | 80 } |
17 | 81 |
82 print "Chrom\tPos\tRef"; | |
83 for (my $i=0;$i<=$#tbl_genotype_names;$i++){ | |
84 print "\t".$tbl_genotype_names[$i]; | |
85 } | |
86 print "\n"; | |
87 | |
88 my @tbl_line_to_display; | |
89 | |
90 #exit(0); | |
91 | |
92 foreach my $key (keys %global_hash){ | |
93 my @tbl = split (/\#/,$key); | |
94 my $chr = $tbl[0]; | |
95 my $pos = $tbl[1]; | |
96 my $ref = $global_hash{$key}; | |
97 my $line = "$chr\t$pos\t$ref"; | |
98 my $isvariant = 0; | |
99 for (my $i=0;$i<=$#tbl_hash;$i++){ | |
100 #my %current_hash = %{$tbl_hash[$i]}; | |
101 if ($tbl_hash[$i]->{$key}){ | |
102 $line.="\t".$tbl_hash[$i]->{$key}; | |
103 if ($tbl_hash[$i]->{$key} ne $ref){ | |
104 $isvariant = 1; | |
105 } | |
106 } | |
107 else { | |
108 $line.="\t"."NA"; | |
109 } | |
110 | |
111 } | |
112 | |
113 $line .="\n"; | |
114 if ($isvariant == 1){ | |
115 push(@tbl_line_to_display,$line); | |
116 } | |
117 } | |
118 | |
119 | |
120 for (my $i=0;$i<=$#tbl_line_to_display;$i++){ | |
121 my @tbl = split (/\s+/,$tbl_line_to_display[$i]); | |
122 my $current_chr = $tbl[0]; | |
123 my @current_tbl; | |
124 if ($result_by_chr{$current_chr}){ | |
125 push (@{$result_by_chr{$current_chr}},$tbl_line_to_display[$i]); | |
126 } | |
127 else { | |
128 push (@current_tbl,$tbl_line_to_display[$i]); | |
129 $result_by_chr{$current_chr} = \@current_tbl; | |
130 } | |
131 } | |
132 | |
133 foreach my $key (sort keys %result_by_chr){ | |
134 my @current_tbl = sort mysort @{$result_by_chr{$key}}; | |
135 for (my $i=0;$i<=$#current_tbl;$i++){ | |
136 print $current_tbl[$i]; | |
137 } | |
138 } | |
139 | |
140 sub mysort { | |
141 my @tbla = split (/\s+/,$a); | |
142 my @tblb = split (/\s+/,$b); | |
143 $tbla[0] cmp $tblb[0] || $tbla[1]<=>$tblb[1]; | |
144 } | |
145 | |
146 |