# HG changeset patch
# User estrain
# Date 1758208459 0
# Node ID 8c2ad48a2eba15d4ea12487ebdc352eaaa8298b6
# Parent b3d943bc70aeaf699cfc1020a5f09eaaff0f6b90
Uploaded
diff -r b3d943bc70ae -r 8c2ad48a2eba sum_fastqc.pl
--- a/sum_fastqc.pl Wed Feb 23 14:35:40 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,135 +0,0 @@
-#!/usr/bin/perl
-
-####################################################
-##
-## sum_fastqc.pl
-##
-## Errol Strain (estrain@gmail.com)
-##
-## Description: Takes raw FASTQC output and produces
-## simple table summary
-##
-####################################################
-
-my($inname)=shift(@ARGV);
-my($qscore)=shift(@ARGV);
-$qscore=~s/\s+//g;
-my(@qlist)=split(/\,/,$qscore);
-
-print "Input\tFile\tFastQC\tReads\tGC\%\tAvg_Len\tMax\_N\%\tMean_Q";
-foreach(@qlist) {
- print "\tQ".$_."\%";
-}
-print "\n";
-
-foreach (@ARGV) {
- print_stats($_);
-}
-
-sub print_stats {
- $infile = shift;
- # First 10 lines of raw FASTQC contain basic overview
- @sumlines=`head -n 10 $infile`;
- chomp(@sumlines);
-
- # Sequence level Q scores are buried in the middle of the file
- @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
- chomp(@qlines);
-
- @nlines=`awk '/#Base\tN\-Count/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
- chomp(@nlines);
-
- @lenlines=`awk '/#Length\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
- chomp(@lenlines);
-
- @fastqc = split(/[\n\t]/,shift(@sumlines));
- @pass = split(/\t/,shift(@sumlines));
- shift(@sumlines);
- @fn = split(/\t/,shift(@sumlines));
- shift(@sumlines);
- shift(@sumlines);
- @nreads = split(/\t/,shift(@sumlines));
- @npoor = split(/\t/,shift(@sumlines));
- shift(@sumlines);
- @gc = split(/\t/,shift(@sumlines));
-
- print $inname."\t";
- print $fn[1]."\t";
- print $fastqc[1]."\t";
- print $nreads[1]."\t";
- print $gc[1]."\t";
- print meanlen($nreads[1],\@lenlines)."\t";
- print maxn(\@nlines)."\t";
- print readmean($nreads[1],\@qlines);
- foreach $qs (@qlist) {
- print "\t";
- print qcal($nreads[1],$qs,\@qlines);
- }
- print "\n";
-}
-
-# Sum reads w/ Q scores > cutoff and divide by number of reads
-sub qcal {
- $nreads=shift(@_);
- $cutoff=shift(@_);
- @qarray=@{$_[0]};
- $sum = 0;
-
- foreach $item (@qarray) {
- my($qval,$q)=split(/\t/,$item);
- if($qval>=$cutoff) {
- $sum += $q;
- }
- }
- $qmean = sprintf("%.2f", 100 * $sum / $nreads);
- return $qmean;
-}
-
-# Calculate mean read Q score
-sub readmean {
- $nreads=shift(@_);
- @qarray=@{$_[0]};
- my($sum) = 0;
-
- foreach $item (@qarray) {
- my($qval,$q)=split(/\t/,$item);
- $sum += $q*$qval;
- }
-
- $readq = sprintf("%.2f", $sum / $nreads);
- return $readq;
-}
-
-# Find position with hights fraction of Ns
-sub maxn {
- @narray=@{$_[0]};
- my($max_nval)=0;
-
- foreach $item (@narray) {
- my($plist,$nval)=split(/\t/,$item);
- if($nval>$max_nval) {
- $max_nval=$nval;
- }
- }
- $max_nval = sprintf("%.4f", $max_nval);
- return $max_nval;
-}
-
-# Calculate mean read length
-sub meanlen {
- $nreads=shift(@_);
- @larray=@{$_[0]};
- my($sum) = 0;
-
- foreach $item (@larray) {
- my($lenrange,$count)=split(/\t/,$item);
- my(@lvals)=split(/\-/,$lenrange);
- if(@lvals==2) {
- $sum+=(($lvals[0]+$lvals[1])/2)*$count;
- } else {
- $sum+=($lvals[0])*$count;
- }
- }
- $sum = sprintf("%.1f",$sum/$nreads);
- return $sum;
-}
diff -r b3d943bc70ae -r 8c2ad48a2eba sum_fastqc.xml
--- a/sum_fastqc.xml Wed Feb 23 14:35:40 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-
- summarizes raw FASTQC output
-
-
- sum_fastqc.tab
- #else if $jobtype.select == "col"
- #set inname = $jobtype.coll.name
- #set infile = $jobtype.coll.forward
- #set infile2 = $jobtype.coll.reverse
- perl $__tool_directory__/sum_fastqc.pl "$inname" $qset.qscore $infile $infile2 > sum_fastqc.tab;
- #end if
-
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r b3d943bc70ae -r 8c2ad48a2eba sum_fastqc/sum_fastqc.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sum_fastqc/sum_fastqc.pl Thu Sep 18 15:14:19 2025 +0000
@@ -0,0 +1,137 @@
+#!/usr/bin/perl
+
+####################################################
+##
+## sum_fastqc.pl
+##
+## Errol Strain (estrain@gmail.com)
+##
+## Description: Takes raw FASTQC output and produces
+## simple table summary
+##
+####################################################
+
+my($inname)=shift(@ARGV);
+my($qscore)=shift(@ARGV);
+$qscore=~s/\s+//g;
+my(@qlist)=split(/\,/,$qscore);
+
+print "Input\tFile\tFastQC\tReads\tBases\tGC\%\tAvg_Len\tMax\_N\%\tMean_Q";
+foreach(@qlist) {
+ print "\tQ".$_."\%";
+}
+print "\n";
+
+foreach (@ARGV) {
+ print_stats($_);
+}
+
+sub print_stats {
+ $infile = shift;
+ # First 10 lines of raw FASTQC contain basic overview
+ @sumlines=`head -n 10 $infile`;
+ chomp(@sumlines);
+
+ # Sequence level Q scores are buried in the middle of the file
+ @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
+ chomp(@qlines);
+
+ @nlines=`awk '/#Base\tN\-Count/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
+ chomp(@nlines);
+
+ @lenlines=`awk '/#Length\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
+ chomp(@lenlines);
+
+ @fastqc = split(/[\n\t]/,shift(@sumlines));
+ @pass = split(/\t/,shift(@sumlines));
+ shift(@sumlines);
+ @fn = split(/\t/,shift(@sumlines));
+ shift(@sumlines);
+ shift(@sumlines);
+ @nreads = split(/\t/,shift(@sumlines));
+ @tb = split(/\t/,shift(@sumlines));
+ @npoor = split(/\t/,shift(@sumlines));
+ shift(@sumlines);
+ @gc = split(/\t/,shift(@sumlines));
+
+ print $inname."\t";
+ print $fn[1]."\t";
+ print $fastqc[1]."\t";
+ print $nreads[1]."\t";
+ print $tb[1]."\t";
+ print $gc[1]."\t";
+ print meanlen($nreads[1],\@lenlines)."\t";
+ print maxn(\@nlines)."\t";
+ print readmean($nreads[1],\@qlines);
+ foreach $qs (@qlist) {
+ print "\t";
+ print qcal($nreads[1],$qs,\@qlines);
+ }
+ print "\n";
+}
+
+# Sum reads w/ Q scores > cutoff and divide by number of reads
+sub qcal {
+ $nreads=shift(@_);
+ $cutoff=shift(@_);
+ @qarray=@{$_[0]};
+ $sum = 0;
+
+ foreach $item (@qarray) {
+ my($qval,$q)=split(/\t/,$item);
+ if($qval>=$cutoff) {
+ $sum += $q;
+ }
+ }
+ $qmean = sprintf("%.2f", 100 * $sum / $nreads);
+ return $qmean;
+}
+
+# Calculate mean read Q score
+sub readmean {
+ $nreads=shift(@_);
+ @qarray=@{$_[0]};
+ my($sum) = 0;
+
+ foreach $item (@qarray) {
+ my($qval,$q)=split(/\t/,$item);
+ $sum += $q*$qval;
+ }
+
+ $readq = sprintf("%.2f", $sum / $nreads);
+ return $readq;
+}
+
+# Find position with hights fraction of Ns
+sub maxn {
+ @narray=@{$_[0]};
+ my($max_nval)=0;
+
+ foreach $item (@narray) {
+ my($plist,$nval)=split(/\t/,$item);
+ if($nval>$max_nval) {
+ $max_nval=$nval;
+ }
+ }
+ $max_nval = sprintf("%.4f", $max_nval);
+ return $max_nval;
+}
+
+# Calculate mean read length
+sub meanlen {
+ $nreads=shift(@_);
+ @larray=@{$_[0]};
+ my($sum) = 0;
+
+ foreach $item (@larray) {
+ my($lenrange,$count)=split(/\t/,$item);
+ my(@lvals)=split(/\-/,$lenrange);
+ if(@lvals==2) {
+ $sum+=(($lvals[0]+$lvals[1])/2)*$count;
+ } else {
+ $sum+=($lvals[0])*$count;
+ }
+ }
+ $sum = sprintf("%.1f",$sum/$nreads);
+ return $sum;
+}
diff -r b3d943bc70ae -r 8c2ad48a2eba sum_fastqc/sum_fastqc.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sum_fastqc/sum_fastqc.xml Thu Sep 18 15:14:19 2025 +0000
@@ -0,0 +1,56 @@
+
+ summarizes raw FASTQC output
+
+
+ sum_fastqc.tab
+ #else if $jobtype.select == "col"
+ #set inname = $jobtype.coll.name
+ #set infile = $jobtype.coll.forward
+ #set infile2 = $jobtype.coll.reverse
+ perl $__tool_directory__/sum_fastqc.pl "$inname" $qset.qscore $infile $infile2 > sum_fastqc.tab;
+ #end if
+
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+