changeset 0:17652c6a7517 draft

Uploaded
author estrain
date Wed, 17 Oct 2018 11:09:06 -0400
parents
children 7d62c324d642
files sum_fastqc-b769c810924e/sum_fastqc.pl sum_fastqc-b769c810924e/sum_fastqc.xml
diffstat 2 files changed, 134 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sum_fastqc-b769c810924e/sum_fastqc.pl	Wed Oct 17 11:09:06 2018 -0400
@@ -0,0 +1,79 @@
+#!/usr/bin/perl
+
+####################################################
+## 
+## sum_fastqc.pl
+## 
+## Errol Strain (estrain@gmail.com) 
+##
+## Description: Takes raw FASTQC output and produces
+## simple table summary
+##
+#################################################### 
+
+my($inname)=shift(@ARGV);
+my($qscore)=shift(@ARGV);
+$qscore=~s/\s+//g;
+my(@qlist)=split(/\,/,$qscore);
+
+print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC";
+foreach(@qlist) {
+  print "\tQ".$_;
+}
+print "\n";
+
+foreach (@ARGV) {
+  print_stats($_);
+}
+
+sub print_stats {
+  $infile = shift;
+  # First 10 lines of raw FASTQC contain basic overview
+  @sumlines=`head -n 10 $infile`;
+  chomp(@sumlines);
+
+  # Sequence level Q scores are buried in the middle of the file
+  @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`;
+  chomp(@qlines);
+
+  @fastqc = split(/[\n\t]/,shift(@sumlines));
+  @pass = split(/\t/,shift(@sumlines));
+  shift(@sumlines);
+  @fn = split(/\t/,shift(@sumlines));
+  shift(@sumlines);
+  shift(@sumlines);
+  @nreads = split(/\t/,shift(@sumlines));
+  @npoor = split(/\t/,shift(@sumlines));
+  shift(@sumlines);
+  @gc = split(/\t/,shift(@sumlines));
+
+  print $inname."\t";
+  print $fn[1]."\t";
+  print $fastqc[1]."\t";
+  print $pass[1]."\t";
+  print $nreads[1]."\t";
+  print $npoor[1]."\t";
+  print $gc[1];
+  foreach $qs (@qlist) {
+    print "\t";
+    print qcal($nreads[1],$qs,\@qlines);
+  }
+  print "\n";
+}
+
+# Sum reads w/ Q scores > cutoff and divide by number of reads
+sub qcal {
+   $nreads=shift(@_);
+   $cutoff=shift(@_);
+   @qarray=@{$_[0]};
+   $sum = 0;
+  
+   foreach $item (@qarray) {
+      my($qval,$q)=split(/\t/,$item);
+      if($qval>=$cutoff) {
+        $sum += $q;
+      }
+   }
+   $qmean = sprintf("%.2f", 100 * $sum / $nreads);
+   return $qmean;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sum_fastqc-b769c810924e/sum_fastqc.xml	Wed Oct 17 11:09:06 2018 -0400
@@ -0,0 +1,55 @@
+<tool id="sum_fastqc" name="sum_fastqc" version="0.2">
+    <requirements>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+
+      #if $jobtype.select == "single"
+        #set inname = $jobtype.file
+        #set infile = $jobtype.file
+        perl $__tool_directory__/sum_fastqc.pl "$inname" $qset.qscore $infile > sum_fastqc.tab
+      #else if $jobtype.select == "col"
+        #set inname = $jobtype.coll.name
+        #set infile = $jobtype.coll.forward
+        #set infile2 = $jobtype.coll.reverse
+        perl $__tool_directory__/sum_fastqc.pl "$inname" $qset.qscore $infile $infile2 > sum_fastqc.tab;
+      #end if
+
+    ]]></command>
+    <inputs>
+      <conditional name="jobtype">
+        <param name="select" type="select" label="Select Input">
+          <option value="single">Raw FASTQC output File</option>
+          <option value="col">Pair of raw FASTQC Files</option>
+        </param>
+        <when value="single">
+          <param name="file" type="data" format="txt" label="Raw FASTQC" />
+        </when>
+        <when value="col">
+          <param name="coll" label="Raw FASTQC pair" type="data_collection" format="txt" collection_type="paired" />
+        </when>
+      </conditional>
+      <conditional name="qset">
+        <param name="selectq" type="select" label="Single or multiple Q scores">
+          <option value="single">Single Q score</option>
+          <option value="mul">Multiple Q scores</option>
+        </param>
+        <when value="single">
+          <param name="qscore" type="integer" label="Q score threshold (i.e. reads >= Q score)" value="30">
+            <validator type="in_range" message="Must be integer(0,40)." min="0" max="40"/>
+          </param>
+        </when>
+        <when value="mul">
+          <param name="qscore" type="text" label="Comma delimited Q score list (e.g. 25,30,35)" value="30"/>
+        </when>
+      </conditional>
+    </inputs>
+    <outputs>
+      <data format="tabular" name="FASTQC Summary" label="${tool.name} on ${on_string}: Contigs" from_work_dir="*.tab"/>
+    </outputs>
+
+    <help><![CDATA[
+
+    ]]></help>
+     <citations>
+    </citations>
+</tool>