Mercurial > repos > estrain > sum_fastqc
changeset 5:7df018757d26 draft
Added additional fields, max %N sites and average length
author | estrain |
---|---|
date | Thu, 18 Oct 2018 17:14:47 -0400 |
parents | d47775122e78 |
children | e386e916efa1 |
files | sum_fastqc.pl |
diffstat | 1 files changed, 41 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/sum_fastqc.pl Wed Oct 17 16:29:11 2018 -0400 +++ b/sum_fastqc.pl Thu Oct 18 17:14:47 2018 -0400 @@ -16,7 +16,7 @@ $qscore=~s/\s+//g; my(@qlist)=split(/\,/,$qscore); -print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC\tMeanQ"; +print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC\%\tMaxN\%\tMeanLen\tMeanQ"; foreach(@qlist) { print "\tQ".$_; } @@ -36,6 +36,12 @@ @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; chomp(@qlines); + @nlines=`awk '/#Base\tN\-Count/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; + chomp(@nlines); + + @lenlines=`awk '/#Length\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; + chomp(@lenlines); + @fastqc = split(/[\n\t]/,shift(@sumlines)); @pass = split(/\t/,shift(@sumlines)); shift(@sumlines); @@ -54,6 +60,8 @@ print $nreads[1]."\t"; print $npoor[1]."\t"; print $gc[1]."\t"; + print maxn(\@nlines)."\t"; + print meanlen($nreads[1],\@lenlines)."\t"; print readmean($nreads[1],\@qlines); foreach $qs (@qlist) { print "\t"; @@ -70,10 +78,10 @@ $sum = 0; foreach $item (@qarray) { - my($qval,$q)=split(/\t/,$item); - if($qval>=$cutoff) { - $sum += $q; - } + my($qval,$q)=split(/\t/,$item); + if($qval>=$cutoff) { + $sum += $q; + } } $qmean = sprintf("%.2f", 100 * $sum / $nreads); return $qmean; @@ -92,3 +100,31 @@ $readq = sprintf("%.2f", $sum / $nreads); return $readq; } + +sub maxn { + @narray=@{$_[0]}; + my($max_nval)=0; + + foreach $item (@narray) { + my($plist,$nval)=split(/\t/,$item); + if($nval>$max_nval) { + $max_nval=$nval; + } + } + $max_nval = sprintf("%.2f", 100*$max_nval); + return $max_nval; +} + +sub meanlen { + $nreads=shift(@_); + @larray=@{$_[0]}; + my($sum) = 0; + + foreach $item (@larray) { + my($lenrange,$count)=split(/\t/,$item); + my($l1,$l2)=split(/\-/,$lenrange); + $sum+=(($l1+$l2)/2)*$count; + } + $sum = sprintf("%.1f",$sum/$nreads); + return $sum; +}