Mercurial > repos > estrain > sum_fastqc
changeset 1:7d62c324d642 draft
Deleted selected files
author | estrain |
---|---|
date | Wed, 17 Oct 2018 11:09:20 -0400 |
parents | 17652c6a7517 |
children | bc939b04bb12 |
files | sum_fastqc-b769c810924e/sum_fastqc.pl sum_fastqc-b769c810924e/sum_fastqc.xml |
diffstat | 2 files changed, 0 insertions(+), 134 deletions(-) [+] |
line wrap: on
line diff
--- a/sum_fastqc-b769c810924e/sum_fastqc.pl Wed Oct 17 11:09:06 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ -#!/usr/bin/perl - -#################################################### -## -## sum_fastqc.pl -## -## Errol Strain (estrain@gmail.com) -## -## Description: Takes raw FASTQC output and produces -## simple table summary -## -#################################################### - -my($inname)=shift(@ARGV); -my($qscore)=shift(@ARGV); -$qscore=~s/\s+//g; -my(@qlist)=split(/\,/,$qscore); - -print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC"; -foreach(@qlist) { - print "\tQ".$_; -} -print "\n"; - -foreach (@ARGV) { - print_stats($_); -} - -sub print_stats { - $infile = shift; - # First 10 lines of raw FASTQC contain basic overview - @sumlines=`head -n 10 $infile`; - chomp(@sumlines); - - # Sequence level Q scores are buried in the middle of the file - @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; - chomp(@qlines); - - @fastqc = split(/[\n\t]/,shift(@sumlines)); - @pass = split(/\t/,shift(@sumlines)); - shift(@sumlines); - @fn = split(/\t/,shift(@sumlines)); - shift(@sumlines); - shift(@sumlines); - @nreads = split(/\t/,shift(@sumlines)); - @npoor = split(/\t/,shift(@sumlines)); - shift(@sumlines); - @gc = split(/\t/,shift(@sumlines)); - - print $inname."\t"; - print $fn[1]."\t"; - print $fastqc[1]."\t"; - print $pass[1]."\t"; - print $nreads[1]."\t"; - print $npoor[1]."\t"; - print $gc[1]; - foreach $qs (@qlist) { - print "\t"; - print qcal($nreads[1],$qs,\@qlines); - } - print "\n"; -} - -# Sum reads w/ Q scores > cutoff and divide by number of reads -sub qcal { - $nreads=shift(@_); - $cutoff=shift(@_); - @qarray=@{$_[0]}; - $sum = 0; - - foreach $item (@qarray) { - my($qval,$q)=split(/\t/,$item); - if($qval>=$cutoff) { - $sum += $q; - } - } - $qmean = sprintf("%.2f", 100 * $sum / $nreads); - return $qmean; -}
--- a/sum_fastqc-b769c810924e/sum_fastqc.xml Wed Oct 17 11:09:06 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ -<tool id="sum_fastqc" name="sum_fastqc" version="0.2"> - <requirements> - </requirements> - <command detect_errors="exit_code"><![CDATA[ - - #if $jobtype.select == "single" - #set inname = $jobtype.file - #set infile = $jobtype.file - perl $__tool_directory__/sum_fastqc.pl "$inname" $qset.qscore $infile > sum_fastqc.tab - #else if $jobtype.select == "col" - #set inname = $jobtype.coll.name - #set infile = $jobtype.coll.forward - #set infile2 = $jobtype.coll.reverse - perl $__tool_directory__/sum_fastqc.pl "$inname" $qset.qscore $infile $infile2 > sum_fastqc.tab; - #end if - - ]]></command> - <inputs> - <conditional name="jobtype"> - <param name="select" type="select" label="Select Input"> - <option value="single">Raw FASTQC output File</option> - <option value="col">Pair of raw FASTQC Files</option> - </param> - <when value="single"> - <param name="file" type="data" format="txt" label="Raw FASTQC" /> - </when> - <when value="col"> - <param name="coll" label="Raw FASTQC pair" type="data_collection" format="txt" collection_type="paired" /> - </when> - </conditional> - <conditional name="qset"> - <param name="selectq" type="select" label="Single or multiple Q scores"> - <option value="single">Single Q score</option> - <option value="mul">Multiple Q scores</option> - </param> - <when value="single"> - <param name="qscore" type="integer" label="Q score threshold (i.e. reads >= Q score)" value="30"> - <validator type="in_range" message="Must be integer(0,40)." min="0" max="40"/> - </param> - </when> - <when value="mul"> - <param name="qscore" type="text" label="Comma delimited Q score list (e.g. 25,30,35)" value="30"/> - </when> - </conditional> - </inputs> - <outputs> - <data format="tabular" name="FASTQC Summary" label="${tool.name} on ${on_string}: Contigs" from_work_dir="*.tab"/> - </outputs> - - <help><![CDATA[ - - ]]></help> - <citations> - </citations> -</tool>