Mercurial > repos > estrain > sum_fastqc
changeset 0:17652c6a7517 draft
Uploaded
author | estrain |
---|---|
date | Wed, 17 Oct 2018 11:09:06 -0400 |
parents | |
children | 7d62c324d642 |
files | sum_fastqc-b769c810924e/sum_fastqc.pl sum_fastqc-b769c810924e/sum_fastqc.xml |
diffstat | 2 files changed, 134 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sum_fastqc-b769c810924e/sum_fastqc.pl Wed Oct 17 11:09:06 2018 -0400 @@ -0,0 +1,79 @@ +#!/usr/bin/perl + +#################################################### +## +## sum_fastqc.pl +## +## Errol Strain (estrain@gmail.com) +## +## Description: Takes raw FASTQC output and produces +## simple table summary +## +#################################################### + +my($inname)=shift(@ARGV); +my($qscore)=shift(@ARGV); +$qscore=~s/\s+//g; +my(@qlist)=split(/\,/,$qscore); + +print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC"; +foreach(@qlist) { + print "\tQ".$_; +} +print "\n"; + +foreach (@ARGV) { + print_stats($_); +} + +sub print_stats { + $infile = shift; + # First 10 lines of raw FASTQC contain basic overview + @sumlines=`head -n 10 $infile`; + chomp(@sumlines); + + # Sequence level Q scores are buried in the middle of the file + @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; + chomp(@qlines); + + @fastqc = split(/[\n\t]/,shift(@sumlines)); + @pass = split(/\t/,shift(@sumlines)); + shift(@sumlines); + @fn = split(/\t/,shift(@sumlines)); + shift(@sumlines); + shift(@sumlines); + @nreads = split(/\t/,shift(@sumlines)); + @npoor = split(/\t/,shift(@sumlines)); + shift(@sumlines); + @gc = split(/\t/,shift(@sumlines)); + + print $inname."\t"; + print $fn[1]."\t"; + print $fastqc[1]."\t"; + print $pass[1]."\t"; + print $nreads[1]."\t"; + print $npoor[1]."\t"; + print $gc[1]; + foreach $qs (@qlist) { + print "\t"; + print qcal($nreads[1],$qs,\@qlines); + } + print "\n"; +} + +# Sum reads w/ Q scores > cutoff and divide by number of reads +sub qcal { + $nreads=shift(@_); + $cutoff=shift(@_); + @qarray=@{$_[0]}; + $sum = 0; + + foreach $item (@qarray) { + my($qval,$q)=split(/\t/,$item); + if($qval>=$cutoff) { + $sum += $q; + } + } + $qmean = sprintf("%.2f", 100 * $sum / $nreads); + return $qmean; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sum_fastqc-b769c810924e/sum_fastqc.xml Wed Oct 17 11:09:06 2018 -0400 @@ -0,0 +1,55 @@ +<tool id="sum_fastqc" name="sum_fastqc" version="0.2"> + <requirements> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + + #if $jobtype.select == "single" + #set inname = $jobtype.file + #set infile = $jobtype.file + perl $__tool_directory__/sum_fastqc.pl "$inname" $qset.qscore $infile > sum_fastqc.tab + #else if $jobtype.select == "col" + #set inname = $jobtype.coll.name + #set infile = $jobtype.coll.forward + #set infile2 = $jobtype.coll.reverse + perl $__tool_directory__/sum_fastqc.pl "$inname" $qset.qscore $infile $infile2 > sum_fastqc.tab; + #end if + + ]]></command> + <inputs> + <conditional name="jobtype"> + <param name="select" type="select" label="Select Input"> + <option value="single">Raw FASTQC output File</option> + <option value="col">Pair of raw FASTQC Files</option> + </param> + <when value="single"> + <param name="file" type="data" format="txt" label="Raw FASTQC" /> + </when> + <when value="col"> + <param name="coll" label="Raw FASTQC pair" type="data_collection" format="txt" collection_type="paired" /> + </when> + </conditional> + <conditional name="qset"> + <param name="selectq" type="select" label="Single or multiple Q scores"> + <option value="single">Single Q score</option> + <option value="mul">Multiple Q scores</option> + </param> + <when value="single"> + <param name="qscore" type="integer" label="Q score threshold (i.e. reads >= Q score)" value="30"> + <validator type="in_range" message="Must be integer(0,40)." min="0" max="40"/> + </param> + </when> + <when value="mul"> + <param name="qscore" type="text" label="Comma delimited Q score list (e.g. 25,30,35)" value="30"/> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="FASTQC Summary" label="${tool.name} on ${on_string}: Contigs" from_work_dir="*.tab"/> + </outputs> + + <help><![CDATA[ + + ]]></help> + <citations> + </citations> +</tool>