Mercurial > repos > estrain > sum_fastqc
comparison sum_fastqc.pl @ 5:7df018757d26 draft
Added additional fields, max %N sites and average length
author | estrain |
---|---|
date | Thu, 18 Oct 2018 17:14:47 -0400 |
parents | d47775122e78 |
children | 53bfb3b2c026 |
comparison
equal
deleted
inserted
replaced
4:d47775122e78 | 5:7df018757d26 |
---|---|
14 my($inname)=shift(@ARGV); | 14 my($inname)=shift(@ARGV); |
15 my($qscore)=shift(@ARGV); | 15 my($qscore)=shift(@ARGV); |
16 $qscore=~s/\s+//g; | 16 $qscore=~s/\s+//g; |
17 my(@qlist)=split(/\,/,$qscore); | 17 my(@qlist)=split(/\,/,$qscore); |
18 | 18 |
19 print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC\tMeanQ"; | 19 print "Input\tFile\tFastQC\tPass-Fail\tReads\tPoor_Reads\tGC\%\tMaxN\%\tMeanLen\tMeanQ"; |
20 foreach(@qlist) { | 20 foreach(@qlist) { |
21 print "\tQ".$_; | 21 print "\tQ".$_; |
22 } | 22 } |
23 print "\n"; | 23 print "\n"; |
24 | 24 |
34 | 34 |
35 # Sequence level Q scores are buried in the middle of the file | 35 # Sequence level Q scores are buried in the middle of the file |
36 @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; | 36 @qlines=`awk '/#Quality\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; |
37 chomp(@qlines); | 37 chomp(@qlines); |
38 | 38 |
39 @nlines=`awk '/#Base\tN\-Count/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; | |
40 chomp(@nlines); | |
41 | |
42 @lenlines=`awk '/#Length\tCount/,/>>END_MODULE/' $infile | head -n -1 | tail -n +2`; | |
43 chomp(@lenlines); | |
44 | |
39 @fastqc = split(/[\n\t]/,shift(@sumlines)); | 45 @fastqc = split(/[\n\t]/,shift(@sumlines)); |
40 @pass = split(/\t/,shift(@sumlines)); | 46 @pass = split(/\t/,shift(@sumlines)); |
41 shift(@sumlines); | 47 shift(@sumlines); |
42 @fn = split(/\t/,shift(@sumlines)); | 48 @fn = split(/\t/,shift(@sumlines)); |
43 shift(@sumlines); | 49 shift(@sumlines); |
52 print $fastqc[1]."\t"; | 58 print $fastqc[1]."\t"; |
53 print $pass[1]."\t"; | 59 print $pass[1]."\t"; |
54 print $nreads[1]."\t"; | 60 print $nreads[1]."\t"; |
55 print $npoor[1]."\t"; | 61 print $npoor[1]."\t"; |
56 print $gc[1]."\t"; | 62 print $gc[1]."\t"; |
63 print maxn(\@nlines)."\t"; | |
64 print meanlen($nreads[1],\@lenlines)."\t"; | |
57 print readmean($nreads[1],\@qlines); | 65 print readmean($nreads[1],\@qlines); |
58 foreach $qs (@qlist) { | 66 foreach $qs (@qlist) { |
59 print "\t"; | 67 print "\t"; |
60 print qcal($nreads[1],$qs,\@qlines); | 68 print qcal($nreads[1],$qs,\@qlines); |
61 } | 69 } |
68 $cutoff=shift(@_); | 76 $cutoff=shift(@_); |
69 @qarray=@{$_[0]}; | 77 @qarray=@{$_[0]}; |
70 $sum = 0; | 78 $sum = 0; |
71 | 79 |
72 foreach $item (@qarray) { | 80 foreach $item (@qarray) { |
73 my($qval,$q)=split(/\t/,$item); | 81 my($qval,$q)=split(/\t/,$item); |
74 if($qval>=$cutoff) { | 82 if($qval>=$cutoff) { |
75 $sum += $q; | 83 $sum += $q; |
76 } | 84 } |
77 } | 85 } |
78 $qmean = sprintf("%.2f", 100 * $sum / $nreads); | 86 $qmean = sprintf("%.2f", 100 * $sum / $nreads); |
79 return $qmean; | 87 return $qmean; |
80 } | 88 } |
81 | 89 |
90 } | 98 } |
91 | 99 |
92 $readq = sprintf("%.2f", $sum / $nreads); | 100 $readq = sprintf("%.2f", $sum / $nreads); |
93 return $readq; | 101 return $readq; |
94 } | 102 } |
103 | |
104 sub maxn { | |
105 @narray=@{$_[0]}; | |
106 my($max_nval)=0; | |
107 | |
108 foreach $item (@narray) { | |
109 my($plist,$nval)=split(/\t/,$item); | |
110 if($nval>$max_nval) { | |
111 $max_nval=$nval; | |
112 } | |
113 } | |
114 $max_nval = sprintf("%.2f", 100*$max_nval); | |
115 return $max_nval; | |
116 } | |
117 | |
118 sub meanlen { | |
119 $nreads=shift(@_); | |
120 @larray=@{$_[0]}; | |
121 my($sum) = 0; | |
122 | |
123 foreach $item (@larray) { | |
124 my($lenrange,$count)=split(/\t/,$item); | |
125 my($l1,$l2)=split(/\-/,$lenrange); | |
126 $sum+=(($l1+$l2)/2)*$count; | |
127 } | |
128 $sum = sprintf("%.1f",$sum/$nreads); | |
129 return $sum; | |
130 } |