comparison fastx_toolkit-0.0.6/scripts/fastq_quality_boxplot_graph.sh @ 3:997f5136985f draft default tip

Uploaded
author xilinxu
date Thu, 14 Aug 2014 04:52:17 -0400
parents
children
comparison
equal deleted inserted replaced
2:dfe9332138cf 3:997f5136985f
1 #!/bin/sh
2
3 # FASTX-toolkit - FASTA/FASTQ preprocessing tools.
4 # Copyright (C) 2009 A. Gordon (gordon@cshl.edu)
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Affero General Public License as
8 # published by the Free Software Foundation, either version 3 of the
9 # License, or (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Affero General Public License for more details.
15 #
16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19 function usage()
20 {
21 echo "Solexa-Quality BoxPlot plotter"
22 echo "Generates a solexa quality score box-plot graph "
23 echo
24 echo "Usage: $0 [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]"
25 echo
26 echo " [-p] - Generate PostScript (.PS) file. Default is PNG image."
27 echo " [-i INPUT.TXT] - Input file. Should be the output of \"solexa_quality_statistics\" program."
28 echo " [-o OUTPUT] - Output file name. default is STDOUT."
29 echo " [-t TITLE] - Title (usually the solexa file name) - will be plotted on the graph."
30 echo
31 exit
32 }
33
34 #
35 # Input Data columns: #pos cnt min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count
36 # As produced by "solexa_quality_statistics" program
37
38 TITLE="" # default title is empty
39 FILENAME=""
40 OUTPUTTERM="set term png size 2048,768" # default output terminal is "PNG"
41 OUTPUTFILE="/dev/stdout" # Default output file is simply "stdout"
42 while getopts ":t:i:o:ph" Option
43 do
44 case $Option in
45 # w ) CMD=$OPTARG; FILENAME="PIMSLogList.txt"; TARGET="logfiles"; ;;
46 t ) TITLE="for $OPTARG" ;;
47 i ) FILENAME=$OPTARG ;;
48 o ) OUTPUTFILE="$OPTARG" ;;
49 p ) OUTPUTTERM="set term postscript enhanced color \"Helvetica\" 8" ;;
50 h ) usage ;;
51 * ) echo "unrecognized argument. use '-h' for usage information."; exit -1 ;;
52 esac
53 done
54 shift $(($OPTIND - 1))
55
56
57 if [ "$FILENAME" == "" ]; then
58 usage
59 fi
60
61 if [ ! -r "$FILENAME" ]; then
62 echo "Error: can't open input file ($1)." >&2
63 exit 1
64 fi
65
66 #Read number of cycles from the stats file (each line is a cycle, minus the header line)
67 #But for the graph, I want xrange to reach (num_cycles+1), so I don't subtract 1 now.
68 NUM_CYCLES=$(cat "$FILENAME" | wc -l)
69
70 GNUPLOTCMD="
71 $OUTPUTTERM
72 set boxwidth 0.8
73 set size 1,1
74 set key Left inside
75 set xlabel \"read position\"
76 set ylabel \"Quality Score (Solexa Scale: 40=Highest, -15=Lowest)\"
77 set title \"Quality Scores $TITLE\"
78 #set auto x
79 set bars 4.0
80 set xrange [ 0: $NUM_CYCLES ]
81 set yrange [-15:45]
82 set y2range [-15:45]
83 set xtics 1
84 set x2tics 1
85 set ytics 2
86 set y2tics 2
87 set tics out
88 set grid ytics
89 set style fill empty
90 plot '$FILENAME' using 1:7:11:12:9 with candlesticks lt 1 lw 1 title 'Quartiles' whiskerbars, \
91 '' using 1:8:8:8:8 with candlesticks lt -1 lw 2 title 'Medians'
92 "
93
94 echo "$GNUPLOTCMD" | gnuplot > "$OUTPUTFILE"