changeset 5:7df018757d26 draft

Added additional fields, max %N sites and average length
author estrain
date Thu, 18 Oct 2018 17:14:47 -0400
parents d47775122e78
children e386e916efa1
files sum_fastqc.pl
diffstat 1 files changed, 41 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/sum_fastqc.pl	Wed Oct 17 16:29:11 2018 -0400
+++ b/sum_fastqc.pl	Thu Oct 18 17:14:47 2018 -0400
@@ -16,7 +16,7 @@
 $qscore=~s/\s+//g;
 my(@qlist)=split(/\,/,$qscore);
 
-print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC\tMeanQ";
+print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC\%\tMaxN\%\tMeanLen\tMeanQ";
 foreach(@qlist) {
   print "\tQ".$_;
 }
@@ -36,6 +36,12 @@
   @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
   chomp(@qlines);
 
+  @nlines=`awk '/#Base\tN\-Count/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
+  chomp(@nlines);
+
+  @lenlines=`awk '/#Length\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
+  chomp(@lenlines);
+
   @fastqc = split(/[\n\t]/,shift(@sumlines));
   @pass = split(/\t/,shift(@sumlines));
   shift(@sumlines);
@@ -54,6 +60,8 @@
   print $nreads[1]."\t";
   print $npoor[1]."\t";
   print $gc[1]."\t";
+  print maxn(\@nlines)."\t";
+  print meanlen($nreads[1],\@lenlines)."\t";
   print readmean($nreads[1],\@qlines);
   foreach $qs (@qlist) {
     print "\t";
@@ -70,10 +78,10 @@
    $sum = 0;
   
    foreach $item (@qarray) {
-      my($qval,$q)=split(/\t/,$item);
-      if($qval>=$cutoff) {
-        $sum += $q;
-      }
+     my($qval,$q)=split(/\t/,$item);
+     if($qval>=$cutoff) {
+       $sum += $q;
+     }
    }
    $qmean = sprintf("%.2f", 100 * $sum / $nreads);
    return $qmean;
@@ -92,3 +100,31 @@
    $readq = sprintf("%.2f", $sum / $nreads);
    return $readq;
 }
+
+sub maxn {
+   @narray=@{$_[0]};
+   my($max_nval)=0;
+
+   foreach $item (@narray) {
+     my($plist,$nval)=split(/\t/,$item);
+     if($nval>$max_nval) {
+       $max_nval=$nval;
+     }
+   }
+   $max_nval = sprintf("%.2f", 100*$max_nval);
+   return $max_nval;
+}  
+
+sub meanlen {
+   $nreads=shift(@_);
+   @larray=@{$_[0]};
+   my($sum) = 0;
+
+   foreach $item (@larray) {
+     my($lenrange,$count)=split(/\t/,$item);
+     my($l1,$l2)=split(/\-/,$lenrange);
+     $sum+=(($l1+$l2)/2)*$count; 
+   }
+   $sum = sprintf("%.1f",$sum/$nreads);
+   return $sum;
+}