view rapsodyn/CreateMatrix.pl @ 14:93e6f2af1ce2 draft

Uploaded
author mcharles
date Mon, 26 Jan 2015 18:10:52 -0500
parents 0a6c1cfe4dc8
children
line wrap: on
line source

#!/usr/bin/perl
#V1.0.0
use strict;
use warnings;
use Getopt::Long;

my $input_pileup_file;
my $input_variant_unique_file;
my $input_name;

GetOptions (
"input_name=s" => \$input_name,
"input_pileup_file=s" => \$input_pileup_file,
"input_variant_unique_file=s" => \$input_variant_unique_file
) or die("Error in command line arguments\n");


print "Chrom\tPos\tRef\t$input_name\n";

my %variant_unique;
open (VU,$input_variant_unique_file) or die ("Can't open variant unique file\n");
while (my $line=<VU>){
	if ($line!~/^\s*$/){
		my @fields = split (/\t+/,$line);
		my $chr;
		my $pos;
		if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
			$chr = $1;
		}
		else {
			print STDERR "Unable to detect chromosome in pileup file\n$line",$fields[0],"\n",$fields[1],"\n";
			exit(0);
		}
		if ($fields[1]=~/^\s*(\d+)\s*$/){
			$pos = $1;
		}
		else {
			print STDERR "Unable to detect position in pileup file\n$line",$fields[0],"\n",$fields[1],"\n";
			exit(0);
		}
		if ($#fields>=4){
			$variant_unique{"$chr#$pos"}=$fields[4];
		}
		else {
			print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n";	
		}
	}
}
close VU;

my %covered;

my $compt=0;
open (PU,$input_pileup_file) or die ("Can't open pileup file\n");
while (my $line=<PU>){
	#$compt++;
	#if ($compt>200000){last;}
	if ($line!~/^\s*$/){
		my @fields = split (/\t+/,$line);
		my $chr;
		my $pos;
		my $ref;
		my $depth;
		my $pile;
		
		if ($#fields>=4){
			if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
				$chr = $1;
			}
			else {
				print STDERR "Unable to detect chromosome in pileup file\n$line\n";
				exit(0);
			}
			if ($fields[1]=~/^\s*(\d+)\s*$/){
				$pos = $1;
			}
			else {
				print STDERR "Unable to detect position in pileup file\n$line\n";
				exit(0);
			}
			
			if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){
				$ref = $1;
			}
			else {
				print STDERR "Unable to detect reference base in pileup file\n$line\n";
				exit(0);
			}
			
			if ($fields[3]=~/^\s*(\d+)\s*$/i){
				$depth = $1;
			}
			else {
				print STDERR "Unable to detect depth in pileup file\n$line\n";
				exit(0);
			}
			
			$pile = $fields[4];
			
			
			if ($variant_unique{"$chr#$pos"}) {
				$pile = $variant_unique{"$chr#$pos"};
				$pile =~ s/\$//g; #the read start at this position
				$pile =~ s/\^.//g; #the read end at this position followed by quality char
				my $ori_pile = $pile;
				
				my @detail = split(//,$pile);
				my $current_in="";
				my $current_del="";
				my $current_length=0;
				my $noindel_pile="";
				my @IN;
				my @DEL;
				
				$noindel_pile = $pile;
				while ($noindel_pile =~ /\+(\d+)/){
					my $length = $1;
					$noindel_pile =~ s/(\+\d+[ATGCNX]{$length})//i ;
					push(@IN,$1);
					#print "test : $pile / $noindel_pile\n";
				}
				while ($noindel_pile =~ /\-(\d+)/){
					my $length = $1;
					$noindel_pile =~ s/(\-\d+[ATGCNX]{$length})//i ;
					push(@DEL,$1);
					#print "test : $pile / $noindel_pile\n";
				}
				
				my %variant_type;
				
				my $indel_pile = $ori_pile;
				$variant_type{"IN"} = ($indel_pile =~ s/\+//gi);
				$variant_type{"DEL"} = ($indel_pile =~ s/\-//gi);
				
				my $base_pile = $noindel_pile;
				$variant_type{"A"} = ($base_pile =~ s/a//gi);
				$variant_type{"T"} = ($base_pile =~ s/t//gi);
				$variant_type{"C"} = ($base_pile =~ s/c//gi);
				$variant_type{"G"} = ($base_pile =~ s/g//gi);
				$variant_type{"N"} = ($base_pile =~ s/n//gi);
				
				my $top_number=0;
				my $top_variant="";
				foreach my $key (sort{$variant_type{$b}<=>$variant_type{$a}} keys %variant_type){
					if ($variant_type{$key} > 0){
						if ($variant_type{$key} > $top_number){
							$top_number = $variant_type{$key};
							$top_variant = $key;
						}
						elsif ($variant_type{$key} == $top_number){
							$top_variant .= "/$key";
						}
					}
					
				}
				print "$chr\t$pos\t$ref\t$top_variant\n";
				
			}
			else {
				print "$chr\t$pos\t$ref\t$ref\n";
			}
		}
		else {
			print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n";
		}
	}
}
close PU;