view sum_fastqc.pl @ 3:8256c1d0d63b draft

Uploaded
author estrain
date Wed, 17 Oct 2018 11:10:03 -0400
parents
children d47775122e78
line wrap: on
line source

#!/usr/bin/perl

####################################################
## 
## sum_fastqc.pl
## 
## Errol Strain (estrain@gmail.com) 
##
## Description: Takes raw FASTQC output and produces
## simple table summary
##
#################################################### 

my($inname)=shift(@ARGV);
my($qscore)=shift(@ARGV);
$qscore=~s/\s+//g;
my(@qlist)=split(/\,/,$qscore);

print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC";
foreach(@qlist) {
  print "\tQ".$_;
}
print "\n";

foreach (@ARGV) {
  print_stats($_);
}

sub print_stats {
  $infile = shift;
  # First 10 lines of raw FASTQC contain basic overview
  @sumlines=`head -n 10 $infile`;
  chomp(@sumlines);

  # Sequence level Q scores are buried in the middle of the file
  @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
  chomp(@qlines);

  @fastqc = split(/[\n\t]/,shift(@sumlines));
  @pass = split(/\t/,shift(@sumlines));
  shift(@sumlines);
  @fn = split(/\t/,shift(@sumlines));
  shift(@sumlines);
  shift(@sumlines);
  @nreads = split(/\t/,shift(@sumlines));
  @npoor = split(/\t/,shift(@sumlines));
  shift(@sumlines);
  @gc = split(/\t/,shift(@sumlines));

  print $inname."\t";
  print $fn[1]."\t";
  print $fastqc[1]."\t";
  print $pass[1]."\t";
  print $nreads[1]."\t";
  print $npoor[1]."\t";
  print $gc[1];
  foreach $qs (@qlist) {
    print "\t";
    print qcal($nreads[1],$qs,\@qlines);
  }
  print "\n";
}

# Sum reads w/ Q scores > cutoff and divide by number of reads
sub qcal {
   $nreads=shift(@_);
   $cutoff=shift(@_);
   @qarray=@{$_[0]};
   $sum = 0;
  
   foreach $item (@qarray) {
      my($qval,$q)=split(/\t/,$item);
      if($qval>=$cutoff) {
        $sum += $q;
      }
   }
   $qmean = sprintf("%.2f", 100 * $sum / $nreads);
   return $qmean;
}