diff pal_finder_wrapper.sh @ 0:3f908e7fff4f draft

Uploaded first version to toolshed.
author pjbriggs
date Thu, 11 Dec 2014 09:23:24 -0500
parents
children 771ebe02636f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pal_finder_wrapper.sh	Thu Dec 11 09:23:24 2014 -0500
@@ -0,0 +1,334 @@
+#!/bin/sh
+#
+# pal_finder_wrapper.sh: run pal_finder perl script as a Galaxy tool
+#
+# Usage: run_palfinder.sh FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY FILTERED_MICROSATS [OPTIONS]
+#        run_palfinder.sh --454    FASTA    MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]
+#
+# Options:
+#
+# --primer-prefix PREFIX: prefix added to the beginning of all primer names (prPrefixName)
+# --2merMinReps N: miniumum number of 2-mer repeat units to detect (0=ignore units of this size)
+# --3merMinReps N
+# --4merMinReps N
+# --5merMinReps N
+# --6merMinReps N
+# --primer-mispriming-library FASTA: specify a Fasta file with sequences to avoid amplifying
+# --primer-opt-size VALUE: optimum primer length
+# --primer-min-size VALUE: minimum acceptable primer length
+# --primer-max-size VALUE: maximum acceptable primer length
+# --primer-min-gc VALUE: minimum allowable percentage of Gs and Cs in any primer
+# --primer-max-gc VALUE: maximum allowable percentage of Gs and Cs
+# --primer-gc-clamp VALUE: number of consecutive Gs and Cs at 3' end of both left and right primer
+# --primer-max-end-gc VALUE: max number of Gs or Cs in last five 3' bases of left or right primer
+# --primer-min-tm VALUE: minimum acceptable melting temperature (Celsius) for a primer oligo
+# --primer-max-tm VALUE: maximum acceptable melting temperature (Celsius)
+# --primer-opt-tm VALUE: optimum melting temperature (Celsius)
+# --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers
+# --output_config_file FNAME: write a copy of the config.txt file to FNAME
+# --filter_microsats FNAME: run Graeme Fox's Perl script to filter and sort the
+#                           microsatellites from pal_finder and write to FNAME
+#
+# pal_finder is available from http://sourceforge.net/projects/palfinder/
+#
+# primer3 is available from http://primer3.sourceforge.net/releases.php
+# (nb needs version 2.0.0-alpha)
+#
+# Explicitly set the locations of the pal_finder script, data files and the primer3
+# executable by setting the following variables in the environment:
+#
+#  * PALFINDER_SCRIPT_DIR: location of the pal_finder Perl script (defaults to
+#    /usr/bin)
+#  * PALFINDER_DATA_DIR: location of the pal_finder data files (specifically
+#    config.txt and simple.ref; defaults to /usr/share/pal_finder_v0.02.04)
+#  * PRIMER3_CORE_EXE: name of the primer3_core program, which should include the
+#    full path if it's not on the Galaxy user's PATH (defaults to primer3_core)
+#
+echo $*
+#
+# Initialise locations of scripts, data and executables
+#
+# Set these in the environment to overide at execution time
+: ${PALFINDER_SCRIPT_DIR:=/usr/bin}
+: ${PALFINDER_DATA_DIR:=/usr/share/pal_finder_v0.02.04}
+: ${PRIMER3_CORE_EXE:=primer3_core}
+#
+# Filter script is in the same directory as this script
+PALFINDER_FILTER_PL=$(dirname $0)/pal_finder_filter.pl
+if [ ! -f $PALFINDER_FILTER_PL ] ; then
+    echo No pal_finder_filter.pl script >&2
+    exit 1
+fi
+#
+# Check that we have all the components
+function have_program() {
+    local program=$1
+    local got_program=$(which $program 2>&1 | grep "no $(basename $program) in")
+    if [ -z "$got_program" ] ; then
+	echo yes
+    else
+	echo no
+    fi	
+}
+if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then
+    echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2
+    exit 1
+fi
+if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then
+    echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2
+    exit 1
+fi
+if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then
+    echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2
+    exit 1
+fi
+#
+# Initialise parameters used in the config.txt file
+PRIMER_PREFIX="test"
+MIN_2_MER_REPS=6
+MIN_3_MER_REPS=0
+MIN_4_MER_REPS=0
+MIN_5_MER_REPS=0
+MIN_6_MER_REPS=0
+PRIMER_MISPRIMING_LIBRARY=$PALFINDER_DATA_DIR/simple.ref
+PRIMER_OPT_SIZE=
+PRIMER_MAX_SIZE=
+PRIMER_MIN_SIZE=
+PRIMER_MAX_GC=
+PRIMER_MIN_GC=
+PRIMER_GC_CLAMP=
+PRIMER_MAX_END_GC=
+PRIMER_OPT_TM=
+PRIMER_MAX_TM=
+PRIMER_MIN_TM=
+PRIMER_PAIR_MAX_DIFF_TM=
+OUTPUT_CONFIG_FILE=
+FILTERED_MICROSATS=
+#
+# Collect command line arguments
+if [ $# -lt 2 ] ; then
+  echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
+  echo "       $0 --454    FASTA    MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
+  exit
+fi
+if [ "$1" == "--454" ] ; then
+    PLATFORM="454"
+    FNA=$2
+else
+    PLATFORM="Illumina"
+    FASTQ_R1=$1
+    FASTQ_R2=$2
+fi
+MICROSAT_SUMMARY=$3
+PAL_SUMMARY=$4
+shift; shift; shift; shift
+#
+# Collect command line options
+while [ ! -z "$1" ] ; do
+    case "$1" in
+	--primer-prefix)
+	    shift
+	    PRIMER_PREFIX=$1
+	    ;;
+	--2merMinReps)
+	    shift
+	    MIN_2_MER_REPS=$1
+	    ;;
+	--3merMinReps)
+	    shift
+	    MIN_3_MER_REPS=$1
+	    ;;
+	--4merMinReps)
+	    shift
+	    MIN_4_MER_REPS=$1
+	    ;;
+	--5merMinReps)
+	    shift
+	    MIN_5_MER_REPS=$1
+	    ;;
+	--6merMinReps)
+	    shift
+	    MIN_6_MER_REPS=$1
+	    ;;
+	--primer-mispriming-library)
+	    shift
+	    PRIMER_MISPRIMING_LIBRARY=$1
+	    ;;
+	--primer-opt-size)
+	    shift
+	    PRIMER_OPT_SIZE=$1
+	    ;;
+	--primer-max-size)
+	    shift
+	    PRIMER_MAX_SIZE=$1
+	    ;;
+	--primer-min-size)
+	    shift
+	    PRIMER_MIN_SIZE=$1
+	    ;;
+	--primer-max-gc)
+	    shift
+	    PRIMER_MAX_GC=$1
+	    ;;
+	--primer-min-gc)
+	    shift
+	    PRIMER_MIN_GC=$1
+	    ;;
+	--primer-gc-clamp)
+	    shift
+	    PRIMER_GC_CLAMP=$1
+	    ;;
+	--primer-max-end-gc)
+	    shift
+	    PRIMER_MAX_END_GC=$1
+	    ;;
+	--primer-opt-tm)
+	    shift
+	    PRIMER_OPT_TM=$1
+	    ;;
+	--primer-max-tm)
+	    shift
+	    PRIMER_MAX_TM=$1
+	    ;;
+	--primer-min-tm)
+	    shift
+	    PRIMER_MIN_TM=$1
+	    ;;
+	--primer-pair-max-diff-tm)
+	    shift
+	    PRIMER_PAIR_MAX_DIFF_TM=$1
+	    ;;
+	--output_config_file)
+	    shift
+	    OUTPUT_CONFIG_FILE=$1
+	    ;;
+	--filter_microsats)
+	    shift
+	    FILTERED_MICROSATS=$1
+	    ;;
+	*)
+	    echo Unknown option: $1 >&2
+	    exit 1
+	    ;;
+    esac
+    shift
+done
+#
+# Check that primer3_core is available
+got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"`
+if [ -z "$got_primer3" ] ; then
+  echo ERROR primer3_core not found >&2
+  exit 1
+fi
+#
+# Set up the working dir
+if [ "$PLATFORM" == "Illumina" ] ; then
+    # Paired end Illumina data as input
+    if [ $FASTQ_R1 == $FASTQ_R2 ] ; then
+	echo ERROR R1 and R2 fastqs are the same file >&2
+	exit 1
+    fi
+    ln -s $FASTQ_R1
+    ln -s $FASTQ_R2
+    fastq_r1=$(basename $FASTQ_R1)
+    fastq_r2=$(basename $FASTQ_R2)
+else
+    # 454 data as input
+    ln -s $FNA
+    fna=$(basename $FNA)
+fi
+ln -s $PRIMER_MISPRIMING_LIBRARY
+PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY)
+mkdir Output
+#
+# Copy in the default config.txt file
+/bin/cp $PALFINDER_DATA_DIR/config.txt .
+#
+# Update the config.txt file with new values
+function set_config_value() {
+    local key=$1
+    local value=$2
+    local config_txt=$3
+    if [ -z "$value" ] ; then
+	echo "No value for $key, left as default"
+    else
+	echo Setting "$key" to "$value"
+	sed -i 's,^'"$key"' .*,'"$key"'  '"$value"',' $config_txt
+    fi
+}
+# Input files
+set_config_value platform $PLATFORM config.txt
+if [ "$PLATFORM" == "Illumina" ] ; then
+    set_config_value inputFormat fastq config.txt
+    set_config_value pairedEnd 1 config.txt
+    set_config_value inputReadFile $fastq_r1 config.txt
+    set_config_value pairedReadFile $fastq_r2 config.txt
+else
+    set_config_value inputFormat fasta config.txt
+    set_config_value pairedEnd 0 config.txt
+    set_config_value input454reads $fna config.txt
+fi
+# Output files
+set_config_value MicrosatSumOut Output/microsat_summary.txt config.txt
+set_config_value PALsummaryOut Output/PAL_summary.txt config.txt
+# Microsat info
+set_config_value 2merMinReps $MIN_2_MER_REPS config.txt
+set_config_value 3merMinReps $MIN_3_MER_REPS config.txt
+set_config_value 4merMinReps $MIN_4_MER_REPS config.txt
+set_config_value 5merMinReps $MIN_5_MER_REPS config.txt
+set_config_value 6merMinReps $MIN_6_MER_REPS config.txt
+# Primer3 settings
+set_config_value primer3input Output/pr3in.txt config.txt
+set_config_value primer3output Output/pr3out.txt config.txt
+set_config_value primer3executable $PRIMER3_CORE_EXE config.txt
+set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt
+set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt
+set_config_value PRIMER_OPT_SIZE "$PRIMER_OPT_SIZE" config.txt
+set_config_value PRIMER_MIN_SIZE "$PRIMER_MIN_SIZE" config.txt
+set_config_value PRIMER_MAX_SIZE "$PRIMER_MAX_SIZE" config.txt
+set_config_value PRIMER_MIN_GC "$PRIMER_MIN_GC" config.txt
+set_config_value PRIMER_MAX_GC "$PRIMER_MAX_GC" config.txt
+set_config_value PRIMER_GC_CLAMP "$PRIMER_GC_CLAMP" config.txt
+set_config_value PRIMER_MAX_END_GC "$PRIMER_MAX_END_GC" config.txt
+set_config_value PRIMER_MIN_TM "$PRIMER_MIN_TM" config.txt
+set_config_value PRIMER_MAX_TM "$PRIMER_MAX_TM" config.txt
+set_config_value PRIMER_OPT_TM "$PRIMER_OPT_TM" config.txt
+set_config_value PRIMER_PAIR_MAX_DIFF_TM "$PRIMER_PAIR_MAX_DIFF_TM" config.txt
+#
+# Run pal_finder
+perl $PALFINDER_SCRIPT_DIR/pal_finder_v0.02.04.pl config.txt 2>&1 | tee pal_finder.log
+#
+# Check that log ends with "Done!!" message
+if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then
+    echo ERROR pal_finder failed to complete successfully >&2
+    exit 1
+fi
+#
+# Run the pal_finder_filter.pl script from Graeme Fox
+if [ ! -z "$FILTERED_MICROSATS" ] ; then
+    perl $PALFINDER_FILTER_PL Output/PAL_summary.txt 2>&1
+    if [ $? -ne 0 ] ; then
+	echo ERROR pal_finder_filter.pl exited with non-zero status >&2
+	exit 1
+    elif [ ! -f pal_finder_filter_output.txt ] ; then
+	echo ERROR no output from pal_finder_filter.pl >&2
+	exit 1
+    fi
+fi
+#
+# Clean up
+if [ -f Output/microsat_summary.txt ] ; then
+    /bin/mv Output/microsat_summary.txt $MICROSAT_SUMMARY
+fi
+if [ -f Output/PAL_summary.txt ] ; then
+    /bin/mv Output/PAL_summary.txt $PAL_SUMMARY
+fi
+if [ ! -z "$FILTERED_MICROSATS" ] && [ -f pal_finder_filter_output.txt ] ; then
+    echo Moving pal_finder_filter_output.txt to $FILTERED_MICROSATS
+    /bin/mv pal_finder_filter_output.txt $FILTERED_MICROSATS
+fi
+if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then
+    /bin/mv config.txt $OUTPUT_CONFIG_FILE
+fi
+##
+#