Mercurial > repos > xilinxu > xilinxu
diff fastx_toolkit-0.0.6/scripts/fastq_quality_boxplot_graph.sh @ 3:997f5136985f draft default tip
Uploaded
author | xilinxu |
---|---|
date | Thu, 14 Aug 2014 04:52:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_toolkit-0.0.6/scripts/fastq_quality_boxplot_graph.sh Thu Aug 14 04:52:17 2014 -0400 @@ -0,0 +1,94 @@ +#!/bin/sh + +# FASTX-toolkit - FASTA/FASTQ preprocessing tools. +# Copyright (C) 2009 A. Gordon (gordon@cshl.edu) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +function usage() +{ + echo "Solexa-Quality BoxPlot plotter" + echo "Generates a solexa quality score box-plot graph " + echo + echo "Usage: $0 [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]" + echo + echo " [-p] - Generate PostScript (.PS) file. Default is PNG image." + echo " [-i INPUT.TXT] - Input file. Should be the output of \"solexa_quality_statistics\" program." + echo " [-o OUTPUT] - Output file name. default is STDOUT." + echo " [-t TITLE] - Title (usually the solexa file name) - will be plotted on the graph." + echo + exit +} + +# +# Input Data columns: #pos cnt min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count +# As produced by "solexa_quality_statistics" program + +TITLE="" # default title is empty +FILENAME="" +OUTPUTTERM="set term png size 2048,768" # default output terminal is "PNG" +OUTPUTFILE="/dev/stdout" # Default output file is simply "stdout" +while getopts ":t:i:o:ph" Option + do + case $Option in + # w ) CMD=$OPTARG; FILENAME="PIMSLogList.txt"; TARGET="logfiles"; ;; + t ) TITLE="for $OPTARG" ;; + i ) FILENAME=$OPTARG ;; + o ) OUTPUTFILE="$OPTARG" ;; + p ) OUTPUTTERM="set term postscript enhanced color \"Helvetica\" 8" ;; + h ) usage ;; + * ) echo "unrecognized argument. use '-h' for usage information."; exit -1 ;; + esac +done +shift $(($OPTIND - 1)) + + +if [ "$FILENAME" == "" ]; then + usage +fi + +if [ ! -r "$FILENAME" ]; then + echo "Error: can't open input file ($1)." >&2 + exit 1 +fi + +#Read number of cycles from the stats file (each line is a cycle, minus the header line) +#But for the graph, I want xrange to reach (num_cycles+1), so I don't subtract 1 now. +NUM_CYCLES=$(cat "$FILENAME" | wc -l) + +GNUPLOTCMD=" +$OUTPUTTERM +set boxwidth 0.8 +set size 1,1 +set key Left inside +set xlabel \"read position\" +set ylabel \"Quality Score (Solexa Scale: 40=Highest, -15=Lowest)\" +set title \"Quality Scores $TITLE\" +#set auto x +set bars 4.0 +set xrange [ 0: $NUM_CYCLES ] +set yrange [-15:45] +set y2range [-15:45] +set xtics 1 +set x2tics 1 +set ytics 2 +set y2tics 2 +set tics out +set grid ytics +set style fill empty +plot '$FILENAME' using 1:7:11:12:9 with candlesticks lt 1 lw 1 title 'Quartiles' whiskerbars, \ + '' using 1:8:8:8:8 with candlesticks lt -1 lw 2 title 'Medians' +" + +echo "$GNUPLOTCMD" | gnuplot > "$OUTPUTFILE"