diff fastx_toolkit-0.0.6/scripts/fastx_nucleotide_distribution_graph.sh @ 3:997f5136985f draft default tip

Uploaded
author xilinxu
date Thu, 14 Aug 2014 04:52:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastx_toolkit-0.0.6/scripts/fastx_nucleotide_distribution_graph.sh	Thu Aug 14 04:52:17 2014 -0400
@@ -0,0 +1,90 @@
+#!/bin/sh
+
+#    FASTX-toolkit - FASTA/FASTQ preprocessing tools.
+#    Copyright (C) 2009  A. Gordon (gordon@cshl.edu)
+#
+#   This program is free software: you can redistribute it and/or modify
+#   it under the terms of the GNU Affero General Public License as
+#   published by the Free Software Foundation, either version 3 of the
+#   License, or (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU Affero General Public License for more details.
+#
+#    You should have received a copy of the GNU Affero General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+usage()
+{
+	echo "FASTA/Q Nucleotide Distribution Plotter"
+	echo
+	echo "Usage: $0 [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]"
+	echo
+	echo "  [-p]           - Generate PostScript (.PS) file. Default is PNG image."
+	echo "  [-i INPUT.TXT] - Input file. Should be the output of \"fastx_quality_statistics\" program."
+	echo "  [-o OUTPUT]    - Output file name. default is STDOUT."
+	echo "  [-t TITLE]     - Title - will be plotted on the graph."
+	echo
+	exit 
+}
+
+#
+# Input Data columns: #pos	cnt	min	max	sum       	mean	Q1	med	Q3	IQR	lW	rW A_Count	C_Count	G_Count	T_Count	N_Count
+#  As produced by "fastq_quality_statistics" program
+
+TITLE=""					# default title is empty
+FILENAME=""
+OUTPUTTERM="set term png size 1048,768"		# default output terminal is "PNG"
+OUTPUTFILE="/dev/stdout"   			# Default output file is simply "stdout"
+while getopts ":t:i:o:ph" Option
+	do
+	case $Option in
+		t ) TITLE="for $OPTARG" ;;
+		i ) FILENAME=$OPTARG ;;
+		o ) OUTPUTFILE="$OPTARG" ;;
+		p ) OUTPUTTERM="set term postscript enhanced color \"Helvetica\" 8" ;;
+		h ) usage ;;
+		* ) echo "unrecognized argument. use '-h' for usage information."; exit -1 ;;
+	esac
+done
+shift $(($OPTIND - 1)) 
+
+
+if [ -z "$FILENAME" ]; then
+	usage
+fi
+
+if [ ! -r "$FILENAME" ]; then
+	echo "Error: can't open input file ($1)." >&2
+	exit 1
+fi
+
+GNUPLOTCMD="
+$OUTPUTTERM
+set boxwidth 0.75 absolute
+set size 1,1
+set style fill solid 1.00 border -1
+set xlabel \"read position\"
+set title \"Nucleotides distribution $TITLE\" 
+set ylabel \"% of total (per read position)\" 
+#set grid noxtics nomxtics ytics nomytics noztics nomztics \
+# nox2tics nomx2tics noy2tics nomy2tics nocbtics nomcbtics
+#set grid layerdefault   linetype 0 linewidth 1.000,  linetype 0 linewidth 1.000
+set key outside right top vertical Left reverse enhanced autotitles columnhead nobox
+set key invert samplen 4 spacing 1 width 0 height 0 
+set style histogram rowstacked 
+set style data histograms 
+set noytics
+set xtics 1
+set yrange [ 0.00000 : 100.000 ] noreverse nowriteback
+
+plot '$FILENAME' using (100.*column(13)/column(18)):xtic(1) title \"A\" lt rgb \"#5050ff\", \
+       '' using (100.*column(14)/column(18)) title \"C\" lt rgb \"#e00000\", \
+       '' using (100.*column(15)/column(18)) title \"G\" lt rgb \"#00c000\", \
+       '' using (100.*column(16)/column(18)) title \"T\" lt rgb \"#e6e600\", \
+       '' using (100.*column(17)/column(18)) title \"N\" lt rgb \"pink\"
+"
+
+echo "$GNUPLOTCMD" | gnuplot > "$OUTPUTFILE"