Mercurial > repos > mcharles > rapsosnp
view rapsodyn/CreateMatrixMultiple.pl @ 15:56d328bce3a7 draft default tip
Uploaded
author | mcharles |
---|---|
date | Thu, 29 Jan 2015 08:54:06 -0500 |
parents | 0a6c1cfe4dc8 |
children |
line wrap: on
line source
#!/usr/bin/perl #V1.0.0 use strict; use warnings; use Getopt::Long; my $input_matrix_files; GetOptions ( "input_matrix_files=s" => \$input_matrix_files ) or die("Error in command line arguments\n"); my @files = split(/,/,$input_matrix_files); my @tbl_hash; my %global_hash; my @tbl_genotype_names; my %chromosome_hash; my %result_by_chr; my @tbl_chr_name; for (my $i=0;$i<=$#files;$i++){ my $current_file = $files[$i]; my $current_genotype = "NA"; my %current_hash; open (CF,$current_file) or die ("Can't open file $current_file\n"); my $header = <CF>; if ($header =~ /Chrom\s*Pos\s*Ref\s*(.*?)\s*$/){ $current_genotype = $1; } else { print STDERR "Unable to recognize header in matrix file\n$header\n"; exit(0); } while (my $line=<CF>){ if ($line!~/^\s*$/){ my @fields = split (/\t+/,$line); my $chr; my $pos; my $ref; my $variant; if ($fields[0]=~/^\s*([\w\-]+)\s*$/){ $chr = $1; } else { print STDERR "Unable to detect chromosome in matrix file\n$line\n"; exit(0); } if ($fields[1]=~/^\s*(\d+)\s*$/){ $pos = $1; } else { print STDERR "Unable to detect position in matrix file\n$line\n"; exit(0); } if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){ $ref = $1; } else { print STDERR "Unable to detect reference base in matrix file\n$line\n"; exit(0); } if ($fields[3]=~/^\s*([\w\/]+)\s*$/i){ $variant = $1; } else { print STDERR "Unable to detect variant in matrix file\n$line\n"; exit(0); } $current_hash{"$chr#$pos"} = $variant; $global_hash{"$chr#$pos"} = $ref; } } close CF; push(@tbl_genotype_names,$current_genotype); push(@tbl_hash,\%current_hash); } print "Chrom\tPos\tRef"; for (my $i=0;$i<=$#tbl_genotype_names;$i++){ print "\t".$tbl_genotype_names[$i]; } print "\n"; my @tbl_line_to_display; #exit(0); foreach my $key (keys %global_hash){ my @tbl = split (/\#/,$key); my $chr = $tbl[0]; my $pos = $tbl[1]; my $ref = $global_hash{$key}; my $line = "$chr\t$pos\t$ref"; my $isvariant = 0; for (my $i=0;$i<=$#tbl_hash;$i++){ #my %current_hash = %{$tbl_hash[$i]}; if ($tbl_hash[$i]->{$key}){ $line.="\t".$tbl_hash[$i]->{$key}; if ($tbl_hash[$i]->{$key} ne $ref){ $isvariant = 1; } } else { $line.="\t"."NA"; } } $line .="\n"; if ($isvariant == 1){ push(@tbl_line_to_display,$line); } } for (my $i=0;$i<=$#tbl_line_to_display;$i++){ my @tbl = split (/\s+/,$tbl_line_to_display[$i]); my $current_chr = $tbl[0]; my @current_tbl; if ($result_by_chr{$current_chr}){ push (@{$result_by_chr{$current_chr}},$tbl_line_to_display[$i]); } else { push (@current_tbl,$tbl_line_to_display[$i]); $result_by_chr{$current_chr} = \@current_tbl; } } foreach my $key (sort keys %result_by_chr){ my @current_tbl = sort mysort @{$result_by_chr{$key}}; for (my $i=0;$i<=$#current_tbl;$i++){ print $current_tbl[$i]; } } sub mysort { my @tbla = split (/\s+/,$a); my @tblb = split (/\s+/,$b); $tbla[0] cmp $tblb[0] || $tbla[1]<=>$tblb[1]; }