Mercurial > repos > pjbriggs > pal_finder
diff pal_finder_wrapper.sh @ 8:4e625d3672ba draft
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
author | pjbriggs |
---|---|
date | Wed, 16 May 2018 07:39:16 -0400 |
parents | 5e133b7b79a6 |
children |
line wrap: on
line diff
--- a/pal_finder_wrapper.sh Mon Mar 19 06:33:32 2018 -0400 +++ b/pal_finder_wrapper.sh Wed May 16 07:39:16 2018 -0400 @@ -26,11 +26,13 @@ # --primer-opt-tm VALUE: optimum melting temperature (Celsius) # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers # --output_config_file FNAME: write a copy of the config.txt file to FNAME -# --filter_microsats FNAME: write output of filter options FNAME +# --bad_primer_ranges FNAME: write a list of the read IDs generating bad primer ranges to FNAME +# --filter_microsats FNAME: write output of filter options to FNAME # -assembly FNAME: run the 'assembly' filter option and write to FNAME # -primers: run the 'primers' filter option # -occurrences: run the 'occurrences' filter option # -rankmotifs: run the 'rankmotifs' filter option +# --subset N: use a subset of reads of size N # # pal_finder is available from http://sourceforge.net/projects/palfinder/ # @@ -53,6 +55,9 @@ # Maximum size reporting log file contents MAX_LINES=500 # +# Get helper functions +. $(dirname $0)/pal_finder_wrapper_utils.sh +# # Initialise locations of scripts, data and executables # # Set these in the environment to overide at execution time @@ -63,31 +68,18 @@ # Filter script is in the same directory as this script PALFINDER_FILTER=$(dirname $0)/pal_filter.py if [ ! -f $PALFINDER_FILTER ] ; then - echo No $PALFINDER_FILTER script >&2 - exit 1 + fatal No $PALFINDER_FILTER script fi # # Check that we have all the components -function have_program() { - local program=$1 - local got_program=$(which $program 2>&1 | grep "no $(basename $program) in") - if [ -z "$got_program" ] ; then - echo yes - else - echo no - fi -} if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then - echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2 - exit 1 + fatal "primer3_core missing: ${PRIMER3_CORE_EXE} not found" fi if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then - echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2 - exit 1 + fatal "pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" fi if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then - echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2 - exit 1 + fatal "pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" fi # # Initialise parameters used in the config.txt file @@ -113,12 +105,14 @@ OUTPUT_ASSEMBLY= FILTERED_MICROSATS= FILTER_OPTIONS= +SUBSET= +RANDOM_SEED=568765 # # Collect command line arguments if [ $# -lt 2 ] ; then echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" - exits + fatal "Bad command line" fi if [ "$1" == "--454" ] ; then PLATFORM="454" @@ -212,6 +206,10 @@ shift OUTPUT_CONFIG_FILE=$1 ;; + --bad_primer_ranges) + shift + BAD_PRIMER_RANGES=$1 + ;; --filter_microsats) shift FILTERED_MICROSATS=$1 @@ -224,6 +222,10 @@ shift OUTPUT_ASSEMBLY=$1 ;; + --subset) + shift + SUBSET=$1 + ;; *) echo Unknown option: $1 >&2 exit 1 @@ -235,16 +237,33 @@ # Check that primer3_core is available got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` if [ -z "$got_primer3" ] ; then - echo ERROR primer3_core not found >&2 - exit 1 + fatal "primer3_core not found" fi # +# Check the n-mers specification +if [ $MIN_6_MER_REPS -ne 0 ] ; then + if [ $MIN_5_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 5-mers cannot be zero if number of 6-mers is non-zero" + fi +fi +if [ $MIN_5_MER_REPS -ne 0 ] ; then + if [ $MIN_4_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" + fi +fi +if [ $MIN_4_MER_REPS -ne 0 ] ; then + if [ $MIN_3_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 3-mers cannot be zero if number of 4-mers is non-zero" + fi +fi +if [ $MIN_2_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 2-mer repeats cannot be zero" +fi # Set up the working dir if [ "$PLATFORM" == "Illumina" ] ; then # Paired end Illumina data as input if [ $FASTQ_R1 == $FASTQ_R2 ] ; then - echo ERROR R1 and R2 fastqs are the same file >&2 - exit 1 + fatal ERROR R1 and R2 fastqs are the same file fi ln -s $FASTQ_R1 ln -s $FASTQ_R2 @@ -259,22 +278,19 @@ PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY) mkdir Output # +# Use a subset of reads +if [ ! -z "$SUBSET" ] ; then + echo "### Extracting subset of reads ###" + $(dirname $0)/fastq_subset.py -n $SUBSET -s $RANDOM_SEED $fastq_r1 $fastq_r2 + fastq_r1="subset_r1.fq" + fastq_r2="subset_r2.fq" +fi +# # Copy in the default config.txt file echo "### Creating config.txt file for pal_finder run ###" /bin/cp $PALFINDER_DATA_DIR/config.txt . # # Update the config.txt file with new values -function set_config_value() { - local key=$1 - local value=$2 - local config_txt=$3 - if [ -z "$value" ] ; then - echo "No value for $key, left as default" - else - echo Setting "$key" to "$value" - sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt - fi -} # Input files set_config_value platform $PLATFORM config.txt if [ "$PLATFORM" == "Illumina" ] ; then @@ -299,6 +315,7 @@ # Primer3 settings set_config_value primer3input Output/pr3in.txt config.txt set_config_value primer3output Output/pr3out.txt config.txt +set_config_value keepPrimer3files 1 config.txt set_config_value primer3executable $PRIMER3_CORE_EXE config.txt set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt @@ -327,18 +344,53 @@ fi tail -$MAX_LINES pal_finder.log # -# Check that log ends with "Done!!" message -if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then - echo ERROR pal_finder failed to complete successfully >&2 +# Check for success/failure +if [ ! -z "$(tail -n 1 pal_finder.log | grep 'No microsatellites found in any reads. Ending script.')" ] ; then + # No microsatellites found + fatal ERROR pal_finder failed to locate any microsatellites exit 1 +elif [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then + # Log doesn't end with "Done!!" (indicates failure) + fatal ERROR pal_finder failed to complete successfully +fi +echo "### pal_finder finished ###" +# +# Check for errors in pal_finder output +echo "### Checking for errors ###" +if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then + echo WARNING primer3 terminated prematurely due to bad product size ranges + $(find_bad_primer_ranges Output/pr3in.txt bad_primer_ranges.txt) + N_BAD_PRIMERS=$(cat bad_primer_ranges.txt | wc -l) + if [ -z "$BAD_PRIMER_RANGES" ] ; then + # No output file so report to stderr + cat <<EOF + +Pal_finder generated bad ranges for the following read IDs: + +EOF + cat bad_primer_ranges.txt + cat <<EOF + +This error can occur when input data contains short R1 reads and has +has not been properly trimmed and filtered. + +EOF + else + # Move the bad ranges to the specified file + echo "### Writing read IDs with bad primer ranges ###" + /bin/mv bad_primer_ranges.txt "$BAD_PRIMER_RANGES" + fi +else + N_BAD_PRIMERS=0 fi # # Sort microsat_summary output echo "### Sorting microsat summary output ###" head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted +echo "readsWithBadRanges:"$'\t'"$((N_BAD_PRIMERS * 2))" >>microsat_summary.sorted grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted -tail -n +11 Output/microsat_summary.txt >>microsat_summary.sorted +tail -n +11 Output/microsat_summary.txt | sort -r -n -k 5 >>microsat_summary.sorted mv microsat_summary.sorted Output/microsat_summary.txt # # Sort PAL_summary output @@ -362,11 +414,9 @@ fi tail -$MAX_LINES pal_filter.log if [ $? -ne 0 ] ; then - echo ERROR $PALFINDER_FILTER exited with non-zero status >&2 - exit 1 + fatal $PALFINDER_FILTER exited with non-zero status elif [ ! -f PAL_summary.filtered ] ; then - echo ERROR no output from $PALFINDER_FILTER >&2 - exit 1 + fatal no output from $PALFINDER_FILTER fi fi # @@ -386,8 +436,7 @@ if [ -f "$assembly" ] ; then /bin/mv $assembly "$OUTPUT_ASSEMBLY" else - echo ERROR no assembly output found >&2 - exit 1 + fatal no assembly output found fi fi if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then