view rapsodyn/MergeLogFiles.pl @ 14:93e6f2af1ce2 draft

Uploaded
author mcharles
date Mon, 26 Jan 2015 18:10:52 -0500
parents 827da1a9a326
children
line wrap: on
line source

#!/usr/bin/perl
#V1.0.0
use strict;
use warnings;
use Getopt::Long;


my $input_log_files;
GetOptions (
"input_log_files=s" => \$input_log_files
) or die("Error in command line arguments\n");

my @files = split(/,/,$input_log_files);



my $FastqPrep_detected=0;
my $FastqPrep_type="NA";
my $FastqPrep_before_read1_nbreads=0;
my $FastqPrep_before_read1_nbbases=0;
my $FastqPrep_before_read2_nbreads=0;
my $FastqPrep_before_read2_nbbases=0;
my $FastqPrep_after_read1_nbreads=0;
my $FastqPrep_after_read1_nbbases=0;
my $FastqPrep_after_read2_nbreads=0;
my $FastqPrep_after_read2_nbbases=0;

my $Samfilter_detected = 0;
my %Samfilter_before_hash;
my %Samfilter_after_hash;
my $Samfilter_before_nbreads=0;
my $Samfilter_after_nbreads=0;

my $Pileupvariant_detected = 0;
my $Pileupvariant="";

my $Listfiltering_detected = 0;
my $Listfiltering="";

my $Pileupfiltering_detected = 0;
my $Pileupfiltering="";

my $Blastfiltering_detected = 0;
my $Blastfiltering_checked = 0;
my $Blastfiltering_selected = 0;



for (my $f=0;$f<=$#files;$f++){
	my $current_file = $files[$f];
	open(IN, $current_file) or die ("Can't open $current_file\n");
	if ( -z IN){
		next;
	}
	
	while (my $line =<IN>){
		if ($line =~ /Fastq preparation/){
			$FastqPrep_detected=1;
			my $line1 = <IN>;
			my $line2 = <IN>;
			my $line3 = <IN>;
			my $line4 = <IN>;
			my $line5 = <IN>;
			my $line6 = <IN>;
			my $line7 = <IN>;
			if ($line1=~/Fastq format \:\s*(\w+)\s*/){
				$FastqPrep_type=$1;
			}
			if ($line3=~/.*?\:\s*(\d+).*?\:\s*(\d+)/){
				$FastqPrep_before_read1_nbreads += $1;
				$FastqPrep_before_read1_nbbases += $2;
			}
			if ($line4=~/.*?\:\s*(\d+).*?\:\s*(\d+)/){
				$FastqPrep_before_read2_nbreads += $1;
				$FastqPrep_before_read2_nbbases += $2;
			}
			if ($line6=~/.*?\:\s*(\d+).*?\:\s*(\d+)/){
				$FastqPrep_after_read1_nbreads += $1;
				$FastqPrep_after_read1_nbbases += $2;
			}
			if ($line7=~/.*?\:\s*(\d+).*?\:\s*(\d+)/){
				$FastqPrep_after_read2_nbreads += $1;
				$FastqPrep_after_read2_nbbases += $2;
			}
			
		}
		elsif ($line =~ /Blast filtering/){
			$Blastfiltering_detected=1;
			my $line1 = <IN>;
			my $line2 = <IN>;
			my $current_checked = 0;
			my $current_selected = 0;
			if ($line1=~/(\d+)/){
				$current_checked = $1;
				$Blastfiltering_checked += $current_checked;
			}
			if ($line2=~/(\d+)/){
				$current_selected = $1;
				$Blastfiltering_selected += $current_selected;
			}
			
		}
		elsif ($line=~/Sam filtering/){
			$Samfilter_detected=1;
			my $line1 = <IN>;
			my $line2 = <IN>;
			my $line3 = <IN>;
			my $line4 = <IN>;
			my $line5 = <IN>;
			my $line6 = <IN>;
			my @tbl_score_before = split(/[\*\:]/,$line2);
			my @tbl_number_before = split(/[\*\:]/,$line3);
			
			my @tbl_score_after = split(/[\*\:]/,$line5);
			my @tbl_number_after = split(/[\*\:]/,$line6);
			
			if ($#tbl_score_before != $#tbl_number_before){
				print STDERR "Error Formating in Sam Filtering\n";
				exit(0);
			}
			else {
				for (my $i=0;$i<=$#tbl_score_before;$i++){
					if ($tbl_score_before[$i] =~ /(\d+)/){
						my $current_score_before = $1;
						if ($tbl_number_before[$i] =~ /(\d+)/){
							my $current_number_before = $1;
							$Samfilter_before_nbreads += $current_number_before;
							if ($Samfilter_before_hash{$current_score_before}){
								$Samfilter_before_hash{$current_score_before} += $current_number_before;
							}
							else {
								$Samfilter_before_hash{$current_score_before} = $current_number_before;
							}
						}
						else {
							print STDERR "Error Formating in Sam Filtering\n";
							exit(0);
						}
					}
					else {
						next;
					}
				}
			}
			
			if ($#tbl_score_after != $#tbl_number_after){
				print STDERR "Error Formating in Sam Filtering\n";
				exit(0);
			}
			else {
				for (my $i=0;$i<=$#tbl_score_after;$i++){
					if ($tbl_score_after[$i] =~ /(\d+)/){
						my $current_score_after = $1;
						if ($tbl_number_after[$i] =~ /(\d+)/){
							my $current_number_after = $1;
							$Samfilter_after_nbreads += $current_number_after;
							if ($Samfilter_after_hash{$current_score_after}){
								$Samfilter_after_hash{$current_score_after} += $current_number_after;
							}
							else {
								$Samfilter_after_hash{$current_score_after} = $current_number_after;
							}
						}
						else {
							print STDERR "Error Formating in Sam Filtering\n";
							exit(0);
						}
					}
					else {
						next;
					}
				}
			}
		}
		elsif ($line=~/Variant extraction/){
			$Pileupvariant_detected=1;
			$Pileupvariant .= $line;
			while ($line = <IN>){
				$Pileupvariant .= $line;
				if ($line=~/^\s*$/){
					last;
				}
			}
		}
		elsif ($line=~/List Filtering/){
			$Listfiltering_detected =1;
			$Listfiltering .= $line;
			while ($line = <IN>){
				$Listfiltering .= $line;
				if ($line=~/^\s*$/){
					last;
				}
			}
		}
		elsif ($line=~/MPileup filtering/){
			$Pileupfiltering_detected =1;
			$Pileupfiltering.= $line;
			while ($line = <IN>){
				$Pileupfiltering .= $line;
				if ($line=~/^\s*$/){
					last;
				}
			}
		}
		
	}
	close (IN);
}

if ($FastqPrep_detected == 1){
	print "####\tFastq preparation\n";
	print "Fastq format : ",$FastqPrep_type,"\n";
	print "## Before preparation\n";
	print "#Read1 :	",$FastqPrep_before_read1_nbreads,"\t#Base : ",$FastqPrep_before_read1_nbbases,"\n";
	print "#Read2 :	",$FastqPrep_before_read2_nbreads,"\t#Base : ",$FastqPrep_before_read2_nbbases,"\n";
	print "## After preparation\n";
	print "#Read1 :	",$FastqPrep_after_read1_nbreads,"\t#Base : ",$FastqPrep_after_read1_nbbases,"\n";
	print "#Read2 :	",$FastqPrep_after_read2_nbreads,"\t#Base : ",$FastqPrep_after_read2_nbbases,"\n";
	print "\n";
}


if ($Samfilter_detected == 1){
	print "####	 Sam filtering \n";
	print "## Before filtering ($Samfilter_before_nbreads)\n";
	print "bitscore	:";
	foreach my $key (sort{$Samfilter_before_hash{$b}<=>$Samfilter_before_hash{$a}} keys %Samfilter_before_hash){
		print "\t$key\t*";
	}	
	print "\n";
	print " number 	:";
	foreach my $key (sort{$Samfilter_before_hash{$b}<=>$Samfilter_before_hash{$a}} keys %Samfilter_before_hash){
		print "\t",$Samfilter_before_hash{$key},"\t*";
	}	
	print "\n";
	print "## After filtering ($Samfilter_after_nbreads)\n";
	print "bitscore	:";
	foreach my $key (sort{$Samfilter_after_hash{$b}<=>$Samfilter_after_hash{$a}} keys %Samfilter_after_hash){
		print "\t$key\t";
	}	
	print "\n";
	print " number 	:";
	foreach my $key (sort{$Samfilter_after_hash{$b}<=>$Samfilter_after_hash{$a}} keys %Samfilter_after_hash){
		print "\t",$Samfilter_after_hash{$key},"\t*";
	}
	print "\n";
	print "\n";	
}

if ($Pileupvariant_detected == 1){
	print $Pileupvariant,"\n";
}

if ($Listfiltering_detected == 1){
	print $Listfiltering,"\n";
}

if ($Blastfiltering_detected == 1){
	print "####	 Blast filtering\n";
	print "Variant checked  :\t$Blastfiltering_checked\n";
	print "Variant selected :\t$Blastfiltering_selected\n";
	print "\n";
}

if ($Pileupfiltering_detected == 1){
	print $Pileupfiltering,"\n";
}