# HG changeset patch # User estrain # Date 1539789003 14400 # Node ID 8256c1d0d63b3f82dbede7335e72ab13580e4a49 # Parent bc939b04bb12d3028d4687aa4fbf0e452ed7bf0f Uploaded diff -r bc939b04bb12 -r 8256c1d0d63b sum_fastqc.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sum_fastqc.pl Wed Oct 17 11:10:03 2018 -0400 @@ -0,0 +1,79 @@ +#!/usr/bin/perl + +#################################################### +## +## sum_fastqc.pl +## +## Errol Strain (estrain@gmail.com) +## +## Description: Takes raw FASTQC output and produces +## simple table summary +## +#################################################### + +my($inname)=shift(@ARGV); +my($qscore)=shift(@ARGV); +$qscore=~s/\s+//g; +my(@qlist)=split(/\,/,$qscore); + +print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC"; +foreach(@qlist) { + print "\tQ".$_; +} +print "\n"; + +foreach (@ARGV) { + print_stats($_); +} + +sub print_stats { + $infile = shift; + # First 10 lines of raw FASTQC contain basic overview + @sumlines=`head -n 10 $infile`; + chomp(@sumlines); + + # Sequence level Q scores are buried in the middle of the file + @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; + chomp(@qlines); + + @fastqc = split(/[\n\t]/,shift(@sumlines)); + @pass = split(/\t/,shift(@sumlines)); + shift(@sumlines); + @fn = split(/\t/,shift(@sumlines)); + shift(@sumlines); + shift(@sumlines); + @nreads = split(/\t/,shift(@sumlines)); + @npoor = split(/\t/,shift(@sumlines)); + shift(@sumlines); + @gc = split(/\t/,shift(@sumlines)); + + print $inname."\t"; + print $fn[1]."\t"; + print $fastqc[1]."\t"; + print $pass[1]."\t"; + print $nreads[1]."\t"; + print $npoor[1]."\t"; + print $gc[1]; + foreach $qs (@qlist) { + print "\t"; + print qcal($nreads[1],$qs,\@qlines); + } + print "\n"; +} + +# Sum reads w/ Q scores > cutoff and divide by number of reads +sub qcal { + $nreads=shift(@_); + $cutoff=shift(@_); + @qarray=@{$_[0]}; + $sum = 0; + + foreach $item (@qarray) { + my($qval,$q)=split(/\t/,$item); + if($qval>=$cutoff) { + $sum += $q; + } + } + $qmean = sprintf("%.2f", 100 * $sum / $nreads); + return $qmean; +}