Mercurial > repos > estrain > sum_fastqc
view sum_fastqc.pl @ 3:8256c1d0d63b draft
Uploaded
author | estrain |
---|---|
date | Wed, 17 Oct 2018 11:10:03 -0400 |
parents | |
children | d47775122e78 |
line wrap: on
line source
#!/usr/bin/perl #################################################### ## ## sum_fastqc.pl ## ## Errol Strain (estrain@gmail.com) ## ## Description: Takes raw FASTQC output and produces ## simple table summary ## #################################################### my($inname)=shift(@ARGV); my($qscore)=shift(@ARGV); $qscore=~s/\s+//g; my(@qlist)=split(/\,/,$qscore); print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC"; foreach(@qlist) { print "\tQ".$_; } print "\n"; foreach (@ARGV) { print_stats($_); } sub print_stats { $infile = shift; # First 10 lines of raw FASTQC contain basic overview @sumlines=`head -n 10 $infile`; chomp(@sumlines); # Sequence level Q scores are buried in the middle of the file @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; chomp(@qlines); @fastqc = split(/[\n\t]/,shift(@sumlines)); @pass = split(/\t/,shift(@sumlines)); shift(@sumlines); @fn = split(/\t/,shift(@sumlines)); shift(@sumlines); shift(@sumlines); @nreads = split(/\t/,shift(@sumlines)); @npoor = split(/\t/,shift(@sumlines)); shift(@sumlines); @gc = split(/\t/,shift(@sumlines)); print $inname."\t"; print $fn[1]."\t"; print $fastqc[1]."\t"; print $pass[1]."\t"; print $nreads[1]."\t"; print $npoor[1]."\t"; print $gc[1]; foreach $qs (@qlist) { print "\t"; print qcal($nreads[1],$qs,\@qlines); } print "\n"; } # Sum reads w/ Q scores > cutoff and divide by number of reads sub qcal { $nreads=shift(@_); $cutoff=shift(@_); @qarray=@{$_[0]}; $sum = 0; foreach $item (@qarray) { my($qval,$q)=split(/\t/,$item); if($qval>=$cutoff) { $sum += $q; } } $qmean = sprintf("%.2f", 100 * $sum / $nreads); return $qmean; }