# HG changeset patch # User mcharles # Date 1421681909 18000 # Node ID 827da1a9a32626679f945dbb54ef3418242b408c # Parent c794dafd1ae5d0dfe74b1122c22ba45f4ba883a5 Uploaded diff -r c794dafd1ae5 -r 827da1a9a326 rapsodyn/MergeLogFiles.pl --- a/rapsodyn/MergeLogFiles.pl Mon Jan 19 04:46:31 2015 -0500 +++ b/rapsodyn/MergeLogFiles.pl Mon Jan 19 10:38:29 2015 -0500 @@ -11,6 +11,257 @@ ) or die("Error in command line arguments\n"); my @files = split(/,/,$input_log_files); -for (my $i=0;$i<=$#files;$i++){ - print $files[$i],"\n"; + + + +my $FastqPrep_detected=0; +my $FastqPrep_type="NA"; +my $FastqPrep_before_read1_nbreads=0; +my $FastqPrep_before_read1_nbbases=0; +my $FastqPrep_before_read2_nbreads=0; +my $FastqPrep_before_read2_nbbases=0; +my $FastqPrep_after_read1_nbreads=0; +my $FastqPrep_after_read1_nbbases=0; +my $FastqPrep_after_read2_nbreads=0; +my $FastqPrep_after_read2_nbbases=0; + +my $Samfilter_detected = 0; +my %Samfilter_before_hash; +my %Samfilter_after_hash; +my $Samfilter_before_nbreads=0; +my $Samfilter_after_nbreads=0; + +my $Pileupvariant_detected = 0; +my $Pileupvariant=""; + +my $Listfiltering_detected = 0; +my $Listfiltering=""; + +my $Pileupfiltering_detected = 0; +my $Pileupfiltering=""; + +my $Blastfiltering_detected = 0; +my $Blastfiltering_checked = 0; +my $Blastfiltering_selected = 0; + + + +for (my $f=0;$f<=$#files;$f++){ + my $current_file = $files[$f]; + open(IN, $current_file) or die ("Can't open $current_file\n"); + if ( -z IN){ + next; + } + + while (my $line =){ + if ($line =~ /Fastq preparation/){ + $FastqPrep_detected=1; + my $line1 = ; + my $line2 = ; + my $line3 = ; + my $line4 = ; + my $line5 = ; + my $line6 = ; + my $line7 = ; + if ($line1=~/Fastq format \:\s*(\w+)\s*/){ + $FastqPrep_type=$1; + } + if ($line3=~/.*?\:\s*(\d+).*?\:\s*(\d+)/){ + $FastqPrep_before_read1_nbreads += $1; + $FastqPrep_before_read1_nbbases += $2; + } + if ($line4=~/.*?\:\s*(\d+).*?\:\s*(\d+)/){ + $FastqPrep_before_read2_nbreads += $1; + $FastqPrep_before_read2_nbbases += $2; + } + if ($line6=~/.*?\:\s*(\d+).*?\:\s*(\d+)/){ + $FastqPrep_after_read1_nbreads += $1; + $FastqPrep_after_read1_nbbases += $2; + } + if ($line7=~/.*?\:\s*(\d+).*?\:\s*(\d+)/){ + $FastqPrep_after_read2_nbreads += $1; + $FastqPrep_after_read2_nbbases += $2; + } + + } + elsif ($line =~ /Blast filtering/){ + $Blastfiltering_detected=1; + my $line1 = ; + my $line2 = ; + my $current_checked = 0; + my $current_selected = 0; + if ($line1=~/(\d+)/){ + $current_checked = $1; + $Blastfiltering_checked += $current_checked; + } + if ($line2=~/(\d+)/){ + $current_selected = $1; + $Blastfiltering_selected += $current_selected; + } + + } + elsif ($line=~/Sam filtering/){ + $Samfilter_detected=1; + my $line1 = ; + my $line2 = ; + my $line3 = ; + my $line4 = ; + my $line5 = ; + my $line6 = ; + my @tbl_score_before = split(/[\*\:]/,$line2); + my @tbl_number_before = split(/[\*\:]/,$line3); + + my @tbl_score_after = split(/[\*\:]/,$line5); + my @tbl_number_after = split(/[\*\:]/,$line6); + + if ($#tbl_score_before != $#tbl_number_before){ + print STDERR "Error Formating in Sam Filtering\n"; + exit(0); + } + else { + for (my $i=0;$i<=$#tbl_score_before;$i++){ + if ($tbl_score_before[$i] =~ /(\d+)/){ + my $current_score_before = $1; + if ($tbl_number_before[$i] =~ /(\d+)/){ + my $current_number_before = $1; + $Samfilter_before_nbreads += $current_number_before; + if ($Samfilter_before_hash{$current_score_before}){ + $Samfilter_before_hash{$current_score_before} += $current_number_before; + } + else { + $Samfilter_before_hash{$current_score_before} = $current_number_before; + } + } + else { + print STDERR "Error Formating in Sam Filtering\n"; + exit(0); + } + } + else { + next; + } + } + } + + if ($#tbl_score_after != $#tbl_number_after){ + print STDERR "Error Formating in Sam Filtering\n"; + exit(0); + } + else { + for (my $i=0;$i<=$#tbl_score_after;$i++){ + if ($tbl_score_after[$i] =~ /(\d+)/){ + my $current_score_after = $1; + if ($tbl_number_after[$i] =~ /(\d+)/){ + my $current_number_after = $1; + $Samfilter_after_nbreads += $current_number_after; + if ($Samfilter_after_hash{$current_score_after}){ + $Samfilter_after_hash{$current_score_after} += $current_number_after; + } + else { + $Samfilter_after_hash{$current_score_after} = $current_number_after; + } + } + else { + print STDERR "Error Formating in Sam Filtering\n"; + exit(0); + } + } + else { + next; + } + } + } + } + elsif ($line=~/Variant extraction/){ + $Pileupvariant_detected=1; + $Pileupvariant .= $line; + while ($line = ){ + $Pileupvariant .= $line; + if ($line=~/^\s*$/){ + last; + } + } + } + elsif ($line=~/List Filtering/){ + $Listfiltering_detected =1; + $Listfiltering .= $line; + while ($line = ){ + $Listfiltering .= $line; + if ($line=~/^\s*$/){ + last; + } + } + } + elsif ($line=~/MPileup filtering/){ + $Pileupfiltering_detected =1; + $Pileupfiltering.= $line; + while ($line = ){ + $Pileupfiltering .= $line; + if ($line=~/^\s*$/){ + last; + } + } + } + + } + close (IN); +} + +if ($FastqPrep_detected == 1){ + print "####\tFastq preparation\n"; + print "Fastq format : ",$FastqPrep_type,"\n"; + print "## Before preparation\n"; + print "#Read1 : ",$FastqPrep_before_read1_nbreads,"\t#Base : ",$FastqPrep_before_read1_nbbases,"\n"; + print "#Read2 : ",$FastqPrep_before_read2_nbreads,"\t#Base : ",$FastqPrep_before_read2_nbbases,"\n"; + print "## After preparation\n"; + print "#Read1 : ",$FastqPrep_after_read1_nbreads,"\t#Base : ",$FastqPrep_after_read1_nbbases,"\n"; + print "#Read2 : ",$FastqPrep_after_read2_nbreads,"\t#Base : ",$FastqPrep_after_read2_nbbases,"\n"; + print "\n"; +} + + +if ($Samfilter_detected == 1){ + print "#### Sam filtering \n"; + print "## Before filtering ($Samfilter_before_nbreads)\n"; + print "bitscore :"; + foreach my $key (sort{$Samfilter_before_hash{$b}<=>$Samfilter_before_hash{$a}} keys %Samfilter_before_hash){ + print "\t$key\t*"; + } + print "\n"; + print " number :"; + foreach my $key (sort{$Samfilter_before_hash{$b}<=>$Samfilter_before_hash{$a}} keys %Samfilter_before_hash){ + print "\t",$Samfilter_before_hash{$key},"\t*"; + } + print "\n"; + print "## After filtering ($Samfilter_after_nbreads)\n"; + print "bitscore :"; + foreach my $key (sort{$Samfilter_after_hash{$b}<=>$Samfilter_after_hash{$a}} keys %Samfilter_after_hash){ + print "\t$key\t"; + } + print "\n"; + print " number :"; + foreach my $key (sort{$Samfilter_after_hash{$b}<=>$Samfilter_after_hash{$a}} keys %Samfilter_after_hash){ + print "\t",$Samfilter_after_hash{$key},"\t*"; + } + print "\n"; + print "\n"; +} + +if ($Pileupvariant_detected == 1){ + print $Pileupvariant,"\n"; +} + +if ($Listfiltering_detected == 1){ + print $Listfiltering,"\n"; +} + +if ($Blastfiltering_detected == 1){ + print "#### Blast filtering\n"; + print "Variant checked :\t$Blastfiltering_checked\n"; + print "Variant selected :\t$Blastfiltering_selected\n"; + print "\n"; +} + +if ($Pileupfiltering_detected == 1){ + print $Pileupfiltering,"\n"; } \ No newline at end of file