changeset 8:4e625d3672ba draft

Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
author pjbriggs
date Wed, 16 May 2018 07:39:16 -0400
parents 5e133b7b79a6
children 52dbe2089d14
files README.rst fastq_subset.py pal_finder_macros.xml pal_finder_wrapper.sh pal_finder_wrapper.xml pal_finder_wrapper_utils.sh test-data/454_microsats.out.re_match~ test-data/illuminaPE_bad_primer_read_ids.out test-data/illuminaPE_filtered_microsats.out.re_match~ test-data/illuminaPE_filtered_microsats_occurrences.out.re_match~ test-data/illuminaPE_filtered_microsats_primers.out.re_match~ test-data/illuminaPE_filtered_microsats_rankmotifs.out.re_match~ test-data/illuminaPE_microsat_types.out.re_match~ test-data/illuminaPE_microsats.out.re_match~ test-data/illuminaPE_microsats_bad_ranges.out.re_match test-data/illuminaPE_microsats_subset.out.re_match test-data/illuminaPE_r1_bad_ranges.fq test-data/illuminaPE_r1_no_microsats.fq test-data/illuminaPE_r2_bad_ranges.fq test-data/illuminaPE_r2_no_microsats.fq
diffstat 20 files changed, 551 insertions(+), 96 deletions(-) [+]
line wrap: on
line diff
--- a/README.rst	Mon Mar 19 06:33:32 2018 -0400
+++ b/README.rst	Wed May 16 07:39:16 2018 -0400
@@ -61,6 +61,11 @@
 Version    Changes
 ---------- ----------------------------------------------------------------------
 
+0.02.04.7  - Trap for errors in ``pal_finder_v0.02.04.pl`` resulting in bad
+             ranges being supplied to ``primer3_core`` for some reads via
+             ``PRIMER_PRODUCT_RANGE_SIZE`` (and enable 'bad' reads to be output
+	     to a dataset); add new option to use a random subset of reads for
+	     microsatellite detection.
 0.02.04.6  - Update to get dependencies using ``conda`` when installed from the
              toolshed (this removes the explicit dependency on Perl 5.16
              introduced in 0.02.04.2, as a result the outputs from the tool are
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_subset.py	Wed May 16 07:39:16 2018 -0400
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+import argparse
+import random
+from Bio.SeqIO.QualityIO import FastqGeneralIterator
+
+def count_reads(fastq):
+    """
+    Count number of reads in a Fastq file
+    """
+    n = 0
+    with open(fastq,'r') as fq:
+        while True:
+            buf = fq.read()
+            n += buf.count('\n')
+            if buf == "": break
+    return n/4
+
+def fastq_subset(fastq_in,fastq_out,indices):
+    """
+    Output a subset of reads from a Fastq file
+
+    The reads to output are specifed by a list
+    of integer indices; only reads at those
+    positions in the input file will be written
+    to the output.
+    """
+    with open(fastq_in,'r') as fq_in:
+        fq_out = open(fastq_out,'w')
+        i = 0
+        for title,seq,qual in FastqGeneralIterator(fq_in):
+            if i in indices:
+                fq_out.write("@%s\n%s\n+\n%s\n" % (title,
+                                                   seq,
+                                                   qual))
+            i += 1
+        fq_out.close()
+
+if __name__ == "__main__":
+
+    p = argparse.ArgumentParser()
+    p.add_argument("fastq_r1")
+    p.add_argument("fastq_r2")
+    p.add_argument("-n",
+                   dest="subset_size",
+                   default=None,
+                   help="subset size")
+    p.add_argument("-s",
+                   dest="seed",
+                   type=int,
+                   default=None,
+                   help="seed for random number generator")
+    args = p.parse_args()
+
+    print "Processing fastq pair:"
+    print "\t%s" % args.fastq_r1
+    print "\t%s" % args.fastq_r2
+
+    nreads = count_reads(args.fastq_r1)
+    print "Counted %d reads in %s" % (nreads,args.fastq_r1)
+
+    if args.subset_size is not None:
+        subset_size = float(args.subset_size)
+        if subset_size < 1.0:
+            subset_size = int(nreads*subset_size)
+        else:
+            subset_size = int(subset_size)
+        print "Extracting subset of reads: %s" % subset_size
+        if args.seed is not None:
+            print "Random number generator seed: %d" % args.seed
+            random.seed(args.seed)
+        subset = random.sample(xrange(nreads),subset_size)
+        fastq_subset(args.fastq_r1,"subset_r1.fq",subset)
+        fastq_subset(args.fastq_r2,"subset_r2.fq",subset)
--- a/pal_finder_macros.xml	Mon Mar 19 06:33:32 2018 -0400
+++ b/pal_finder_macros.xml	Wed May 16 07:39:16 2018 -0400
@@ -14,6 +14,7 @@
 	<has_line line="readsWithMicrosat:&#009;13" />
 	<has_line line="totalBases:&#009;2320" />
 	<has_line line="totalReads:&#009;20&#009;(2 x 10)" />
+	<has_line line="readsWithBadRanges:&#009;0" />
 	<has_line line="Microsat Type&#009;monomer length&#009;total loci&#009;loci w/ primers&#009;reads with loci&#009;total bases&#009;extended&#009;extended w/ primers&#009;spanning&#009;spanning w/ primers" />
 	<has_line_matching expression="(AC|TG)\t2\t7\t4\t7\t116\t0?\t0?\t0?\t0?" />
 	<has_line_matching expression="(AT|CG)\t2\t8\t0\t6\t106\t0?\t0?\t0?\t0?" />
@@ -21,6 +22,44 @@
       </assert_contents>
     </output>
   </xml>
+  <xml name="output_illumina_microsat_subset_summary">
+    <output name="output_microsat_summary">
+      <assert_contents>
+	<has_line line="allExtended:&#009;0" />
+	<has_line line="allSpan:&#009;0" />
+	<has_line line="broken:&#009;0" />
+	<has_line line="compound:&#009;2" />
+	<has_line line="readsWithMicrosat:&#009;7" />
+	<has_line line="totalBases:&#009;1160" />
+	<has_line line="totalReads:&#009;10&#009;(2 x 5)" />
+	<has_line line="Microsat Type&#009;monomer length&#009;total loci&#009;loci w/ primers&#009;reads with loci&#009;total bases&#009;extended&#009;extended w/ primers&#009;spanning&#009;spanning w/ primers" />
+	<has_line_matching expression="(AC|TG)\t2\t6\t3\t6\t104\t0?\t0?\t0?\t0?" />
+	<has_line_matching expression="(AT|CG)\t2\t3\t0\t3\t38\t0?\t0?\t0?\t0?" />
+	<has_line_matching expression="(AG|TC)\t2\t0\t0\t0\t0\t0?\t0?\t0?\t0?" />
+      </assert_contents>
+    </output>
+  </xml>
+  <xml name="output_illumina_microsat_summary_bad_ranges">
+    <output name="output_microsat_summary">
+      <assert_contents>
+	<has_line line="allExtended:&#009;2" />
+	<has_line line="allSpan:&#009;0" />
+	<has_line line="broken:&#009;0" />
+	<has_line line="compound:&#009;4" />
+	<has_line line="readsWithMicrosat:&#009;12" />
+	<has_line line="totalBases:&#009;2231" />
+	<has_line line="totalReads:&#009;12&#009;(2 x 6)" />
+	<has_line line="readsWithBadRanges:&#009;2" />
+	<has_line line="Microsat Type&#009;monomer length&#009;total loci&#009;loci w/ primers&#009;reads with loci&#009;total bases&#009;extended&#009;extended w/ primers&#009;spanning&#009;spanning w/ primers" />
+	<!--
+	    I'd like to do a basic check on the remainder of the file but
+	    there are too many lines for the 'assert_contents' method (and
+	    the tag doesn't provide the functionality to do a simple line
+	    count
+	-->
+      </assert_contents>
+    </output>
+  </xml>
   <xml name="output_454_microsat_summary">
     <output name="output_microsat_summary">
       <assert_contents>
--- a/pal_finder_wrapper.sh	Mon Mar 19 06:33:32 2018 -0400
+++ b/pal_finder_wrapper.sh	Wed May 16 07:39:16 2018 -0400
@@ -26,11 +26,13 @@
 # --primer-opt-tm VALUE: optimum melting temperature (Celsius)
 # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers
 # --output_config_file FNAME: write a copy of the config.txt file to FNAME
-# --filter_microsats FNAME: write output of filter options FNAME
+# --bad_primer_ranges FNAME: write a list of the read IDs generating bad primer ranges to FNAME
+# --filter_microsats FNAME: write output of filter options to FNAME
 # -assembly FNAME: run the 'assembly' filter option and write to FNAME
 # -primers: run the 'primers' filter option
 # -occurrences: run the 'occurrences' filter option
 # -rankmotifs: run the 'rankmotifs' filter option
+# --subset N: use a subset of reads of size N
 #
 # pal_finder is available from http://sourceforge.net/projects/palfinder/
 #
@@ -53,6 +55,9 @@
 # Maximum size reporting log file contents
 MAX_LINES=500
 #
+# Get helper functions
+. $(dirname $0)/pal_finder_wrapper_utils.sh
+#
 # Initialise locations of scripts, data and executables
 #
 # Set these in the environment to overide at execution time
@@ -63,31 +68,18 @@
 # Filter script is in the same directory as this script
 PALFINDER_FILTER=$(dirname $0)/pal_filter.py
 if [ ! -f $PALFINDER_FILTER ] ; then
-    echo No $PALFINDER_FILTER script >&2
-    exit 1
+    fatal No $PALFINDER_FILTER script
 fi
 #
 # Check that we have all the components
-function have_program() {
-    local program=$1
-    local got_program=$(which $program 2>&1 | grep "no $(basename $program) in")
-    if [ -z "$got_program" ] ; then
-	echo yes
-    else
-	echo no
-    fi	
-}
 if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then
-    echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2
-    exit 1
+    fatal "primer3_core missing: ${PRIMER3_CORE_EXE} not found"
 fi
 if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then
-    echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2
-    exit 1
+    fatal "pal_finder config.txt not found in ${PALFINDER_DATA_DIR}"
 fi
 if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then
-    echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2
-    exit 1
+    fatal "pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}"
 fi
 #
 # Initialise parameters used in the config.txt file
@@ -113,12 +105,14 @@
 OUTPUT_ASSEMBLY=
 FILTERED_MICROSATS=
 FILTER_OPTIONS=
+SUBSET=
+RANDOM_SEED=568765
 #
 # Collect command line arguments
 if [ $# -lt 2 ] ; then
   echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
   echo "       $0 --454    FASTA    MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
-  exits
+  fatal "Bad command line"
 fi
 if [ "$1" == "--454" ] ; then
     PLATFORM="454"
@@ -212,6 +206,10 @@
 	    shift
 	    OUTPUT_CONFIG_FILE=$1
 	    ;;
+	--bad_primer_ranges)
+	    shift
+	    BAD_PRIMER_RANGES=$1
+	    ;;
 	--filter_microsats)
 	    shift
 	    FILTERED_MICROSATS=$1
@@ -224,6 +222,10 @@
 	    shift
 	    OUTPUT_ASSEMBLY=$1
 	    ;;
+	--subset)
+	    shift
+	    SUBSET=$1
+	    ;;
 	*)
 	    echo Unknown option: $1 >&2
 	    exit 1
@@ -235,16 +237,33 @@
 # Check that primer3_core is available
 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"`
 if [ -z "$got_primer3" ] ; then
-  echo ERROR primer3_core not found >&2
-  exit 1
+  fatal "primer3_core not found"
 fi
 #
+# Check the n-mers specification
+if [ $MIN_6_MER_REPS -ne 0 ] ; then
+    if [ $MIN_5_MER_REPS -eq 0 ] ; then
+	fatal "Minimum number of 5-mers cannot be zero if number of 6-mers is non-zero"
+    fi
+fi
+if [ $MIN_5_MER_REPS -ne 0 ] ; then
+    if [ $MIN_4_MER_REPS -eq 0 ] ; then
+	fatal "Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero"
+    fi
+fi
+if [ $MIN_4_MER_REPS -ne 0 ] ; then
+    if [ $MIN_3_MER_REPS -eq 0 ] ; then
+	fatal "Minimum number of 3-mers cannot be zero if number of 4-mers is non-zero"
+    fi
+fi
+if [ $MIN_2_MER_REPS -eq 0 ] ; then
+    fatal "Minimum number of 2-mer repeats cannot be zero"
+fi
 # Set up the working dir
 if [ "$PLATFORM" == "Illumina" ] ; then
     # Paired end Illumina data as input
     if [ $FASTQ_R1 == $FASTQ_R2 ] ; then
-	echo ERROR R1 and R2 fastqs are the same file >&2
-	exit 1
+	fatal ERROR R1 and R2 fastqs are the same file
     fi
     ln -s $FASTQ_R1
     ln -s $FASTQ_R2
@@ -259,22 +278,19 @@
 PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY)
 mkdir Output
 #
+# Use a subset of reads
+if [ ! -z "$SUBSET" ] ; then
+    echo "### Extracting subset of reads ###"
+    $(dirname $0)/fastq_subset.py -n $SUBSET -s $RANDOM_SEED $fastq_r1 $fastq_r2
+    fastq_r1="subset_r1.fq"
+    fastq_r2="subset_r2.fq"
+fi
+#
 # Copy in the default config.txt file
 echo "### Creating config.txt file for pal_finder run ###"
 /bin/cp $PALFINDER_DATA_DIR/config.txt .
 #
 # Update the config.txt file with new values
-function set_config_value() {
-    local key=$1
-    local value=$2
-    local config_txt=$3
-    if [ -z "$value" ] ; then
-	echo "No value for $key, left as default"
-    else
-	echo Setting "$key" to "$value"
-	sed -i 's,^'"$key"' .*,'"$key"'  '"$value"',' $config_txt
-    fi
-}
 # Input files
 set_config_value platform $PLATFORM config.txt
 if [ "$PLATFORM" == "Illumina" ] ; then
@@ -299,6 +315,7 @@
 # Primer3 settings
 set_config_value primer3input Output/pr3in.txt config.txt
 set_config_value primer3output Output/pr3out.txt config.txt
+set_config_value keepPrimer3files 1 config.txt
 set_config_value primer3executable $PRIMER3_CORE_EXE config.txt
 set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt
 set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt
@@ -327,18 +344,53 @@
 fi
 tail -$MAX_LINES pal_finder.log
 #
-# Check that log ends with "Done!!" message
-if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then
-    echo ERROR pal_finder failed to complete successfully >&2
+# Check for success/failure
+if [ ! -z "$(tail -n 1 pal_finder.log | grep 'No microsatellites found in any reads. Ending script.')" ] ; then
+    # No microsatellites found
+    fatal ERROR pal_finder failed to locate any microsatellites
     exit 1
+elif [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then
+    # Log doesn't end with "Done!!" (indicates failure)
+    fatal ERROR pal_finder failed to complete successfully
+fi
+echo "### pal_finder finished ###"
+#
+# Check for errors in pal_finder output
+echo "### Checking for errors ###"
+if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then
+    echo WARNING primer3 terminated prematurely due to bad product size ranges
+    $(find_bad_primer_ranges Output/pr3in.txt bad_primer_ranges.txt)
+    N_BAD_PRIMERS=$(cat bad_primer_ranges.txt | wc -l)
+    if [ -z "$BAD_PRIMER_RANGES" ] ; then
+	# No output file so report to stderr
+	cat <<EOF
+
+Pal_finder generated bad ranges for the following read IDs:
+
+EOF
+	cat bad_primer_ranges.txt
+	cat <<EOF
+
+This error can occur when input data contains short R1 reads and has
+has not been properly trimmed and filtered.
+
+EOF
+    else
+	# Move the bad ranges to the specified file
+	echo "### Writing read IDs with bad primer ranges ###"
+	/bin/mv bad_primer_ranges.txt "$BAD_PRIMER_RANGES"
+    fi
+else
+    N_BAD_PRIMERS=0
 fi
 #
 # Sort microsat_summary output
 echo "### Sorting microsat summary output ###"
 head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted
+echo "readsWithBadRanges:"$'\t'"$((N_BAD_PRIMERS * 2))" >>microsat_summary.sorted
 grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted
 grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted
-tail -n +11 Output/microsat_summary.txt >>microsat_summary.sorted
+tail -n +11 Output/microsat_summary.txt | sort -r -n -k 5 >>microsat_summary.sorted
 mv microsat_summary.sorted Output/microsat_summary.txt
 #
 # Sort PAL_summary output
@@ -362,11 +414,9 @@
     fi
     tail -$MAX_LINES pal_filter.log
     if [ $? -ne 0 ] ; then
-	echo ERROR $PALFINDER_FILTER exited with non-zero status >&2
-	exit 1
+	fatal $PALFINDER_FILTER exited with non-zero status
     elif [ ! -f PAL_summary.filtered ] ; then
-	echo ERROR no output from $PALFINDER_FILTER >&2
-	exit 1
+	fatal no output from $PALFINDER_FILTER
     fi
 fi
 #
@@ -386,8 +436,7 @@
     if [ -f "$assembly" ] ; then
 	/bin/mv $assembly "$OUTPUT_ASSEMBLY"
     else
-	echo ERROR no assembly output found >&2
-	exit 1
+	fatal no assembly output found
     fi
 fi
 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then
--- a/pal_finder_wrapper.xml	Mon Mar 19 06:33:32 2018 -0400
+++ b/pal_finder_wrapper.xml	Wed May 16 07:39:16 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.6">
+<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.7">
   <description>Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them</description>
   <macros>
     <import>pal_finder_macros.xml</import>
@@ -9,7 +9,7 @@
     <requirement type="package" version="1.65">biopython</requirement>
     <requirement type="package" version="2.8.1">pandaseq</requirement>
   </requirements>
-  <command><![CDATA[
+  <command detect_errors="exit_code"><![CDATA[
   @CONDA_PAL_FINDER_SCRIPT_DIR@ &&
   @CONDA_PAL_FINDER_DATA_DIR@ &&
   bash $__tool_directory__/pal_finder_wrapper.sh
@@ -26,6 +26,9 @@
     --454 "$platform.input_fasta"
   #end if
   $output_microsat_summary $output_pal_summary
+  #if $report_bad_primer_ranges
+    --bad_primer_ranges "$output_bad_primer_read_ids"
+  #end if
   #if $keep_config_file
     --output_config_file "$output_config_file"
   #end if
@@ -61,6 +64,10 @@
     #if str( $platform.assembly ) == '-assembly'
       $platform.assembly "$output_assembly"
     #end if
+    #set $use_all_reads = $platform.subset_conditional.use_all_reads
+    #if str( $use_all_reads ) != "yes"
+      --subset "$platform.subset_conditional.subset"
+    #end if
   #end if
   ]]></command>
   <inputs>
@@ -88,6 +95,13 @@
 		   label="Select FASTQ dataset collection with R1/R2 pair" />
 	  </when>
 	</conditional>
+	<conditional name="subset_conditional">
+	  <param name="use_all_reads" type="boolean" label="Use all reads for microsatellite detection?" checked="True" truevalue="yes" falsevalue="no" />
+	  <when value="no">
+	    <param name="subset" type="text" value="0.5" label="Number or fraction of reads to use" help="Either an integer number of reads or a decimal fraction (e.g. 0.5 to select 50% of reads)" />
+	  </when>
+	  <when value="yes" />
+	</conditional>
 	<param name="filters" type="select" display="checkboxes"
 	       multiple="True" label="Filters to apply to the pal_finder results"
 	       help="Apply none, one or more filters to refine results">
@@ -103,7 +117,7 @@
 	<param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" />
       </when>
     </conditional>
-    <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" help="Set to zero to ignore repeats of this n-mer unit" />
+    <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" min="1" help="Must detect at least one repeat of this n-mer unit" />
     <param name="min_3mer_repeats" type="integer" value="0" label="Minimum number of 3-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />
     <param name="min_4mer_repeats" type="integer" value="0" label="Minimum number of 4-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />
     <param name="min_5mer_repeats" type="integer" value="0" label="Minimum number of 5-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />
@@ -155,7 +169,9 @@
 	       label="Maximum acceptable difference between melting temperatures of left and right primers (PRIMER_PAIR_MAX_DIFF_TM)"
 	       help="Temperature should be in degrees Celsius" />
       </when>
+      <when value="default" />
     </conditional>
+    <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer product size ranges" help="Can be used to screen reads in input Fastqs " />
     <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False"
 	   label="Output the config file to the history"
 	   help="Can be used to run pal_finder outside of Galaxy" />
@@ -169,6 +185,9 @@
     <data name="output_assembly" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: assembly">
       <filter>platform['assembly'] is True</filter>
     </data>
+    <data name="output_bad_primer_read_ids" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: read IDs generating bad primer ranges">
+      <filter>report_bad_primer_ranges is True</filter>
+    </data>
     <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: config file">
       <filter>keep_config_file is True</filter>
     </data>
@@ -247,6 +266,77 @@
       <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" />
       <output name="output_filtered_microsats" compare="re_match" file="illuminaPE_filtered_microsats_rankmotifs.out.re_match" />
     </test>
+    <!-- Test with Illumina input using subset of reads -->
+    <test>
+      <param name="platform_type" value="illumina" />
+      <param name="filters" value="" />
+      <param name="assembly" value="false" />
+      <param name="use_all_reads" value="no" />
+      <param name="subset" value="0.5" />
+      <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
+      <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
+      <expand macro="output_illumina_microsat_subset_summary" />
+      <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_subset.out.re_match" />
+    </test>
+    <!-- Test with Illumina input filter that doesn't find any
+	 microsatellites -->
+    <test expect_failure="true">
+      <param name="platform_type" value="illumina" />
+      <param name="filters" value="" />
+      <param name="assembly" value="false" />
+      <param name="min_2mer_repeats" value="8" />
+      <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" />
+      <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" />
+      <assert_stderr>
+	<has_text text="pal_finder failed to locate any microsatellites" />
+      </assert_stderr>
+    </test>
+    <!-- Test with Illumina input generating bad ranges -->
+    <test>
+      <param name="platform_type" value="illumina" />
+      <param name="filters" value="" />
+      <param name="assembly" value="false" />
+      <param name="min_2mer_repeats" value="8" />
+      <param name="input_fastq_r1" value="illuminaPE_r1_bad_ranges.fq" ftype="fastqsanger" />
+      <param name="input_fastq_r2" value="illuminaPE_r2_bad_ranges.fq" ftype="fastqsanger" />
+      <param name="min_2mer_repeats" value="8" />
+      <param name="min_3mer_repeats" value="8" />
+      <param name="min_4mer_repeats" value="8" />
+      <param name="min_5mer_repeats" value="8" />
+      <param name="min_6mer_repeats" value="8" />
+      <param name="primer_options" value="custom" />
+      <param name="primer_opt_size" value="25" />
+      <param name="primer_min_size" value="21" />
+      <param name="primer_max_size" value="30" />
+      <param name="primer_min_gc" value="40.0" />
+      <param name="primer_max_gc" value="60.0" />
+      <param name="primer_gc_clamp" value="3" />
+      <param name="primer_max_end_gc" value="5" />
+      <param name="primer_min_tm" value="60.0" />
+      <param name="primer_max_tm" value="80.0" />
+      <param name="primer_opt_tm" value="68.0" />
+      <param name="primer_pair_max_diff_tm" value="3.0" />
+      <param name="report_bad_primer_ranges" value="true" />
+      <expand macro="output_illumina_microsat_summary_bad_ranges" />
+      <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_bad_ranges.out.re_match" />
+      <output name="output_bad_primer_read_ids" file="illuminaPE_bad_primer_read_ids.out" />
+    </test>
+    <!-- Test with bad n-mers specified -->
+    <test expect_failure="true">
+      <param name="platform_type" value="illumina" />
+      <param name="filters" value="" />
+      <param name="assembly" value="false" />
+      <param name="min_2mer_repeats" value="8" />
+      <param name="min_3mer_repeats" value="8" />
+      <param name="min_4mer_repeats" value="0" />
+      <param name="min_5mer_repeats" value="8" />
+      <param name="min_6mer_repeats" value="8" />
+      <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" />
+      <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" />
+      <assert_stderr>
+	<has_text text="Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" />
+      </assert_stderr>
+    </test>
     <!-- Test with 454 input -->
     <test>
       <param name="platform_type" value="454" />
@@ -282,6 +372,52 @@
 
 .. class:: infomark
 
+**Known issues**
+
+.. class:: warning
+
+**Low number of reads used for microsatellite detection/bad primer product size ranges**
+
+For some datasets pal_finder may generate 'bad' product size ranges (where the
+lower limit exceeds the upper limit) for one or more reads, for input into
+primer3_core. In these cases primer3_core will terminate prematurely, which can
+result in a substantially lower number of reads being used for microsatellite
+detection and potentially sub-optimal primer design.
+
+The number of reads generating the bad size ranges are reported in the
+*Summary of microsat types* output dataset as 'readsWithBadRanges'. Ideally
+the reported value should be zero.
+
+The conditions which cause this issue within pal_finder are still unclear,
+however we believe it to be associated with short or low quality reads. If this
+problem affects your data then:
+
+* Ensure that the input data are sufficiently trimmed and filtered (using
+  e.g. the Trimmomatic tool) before rerunning pal_finder.
+
+* A list of read IDs for which pal_finder generates bad product size ranges can
+  be output by turning on *Output IDs for input reads which generate bad primer
+  ranges*. This outputs an additional dataset with a list of read IDs which can
+  be used to remove read pairs from the input Fastq files (using e.g. the *Filter
+  sequences by ID* tool) before rerunning pal_finder.
+
+.. class:: warning
+
+**Pal_finder takes a long time to run for large input datasets**
+
+pal_finder was originally developed using MiSeq data, and is not optimised for
+working with the larger Fastqs that are output from other platforms such as
+HiSeq and NextSeq. As a consequence pal_finder may take a very long time to
+complete when operating on larger datasets.
+
+If this is a problem then the tool can be run using a subset of the input reads
+by unchecking the *Use all reads...* option and entering either an integer number
+of reads to use, or a decimal fraction (e.g. 0.5 will select 50% of the reads).
+
+-------------
+
+.. class:: infomark
+
 **Credits**
 
 This Galaxy tool has been developed by Peter Briggs within the Bioinformatics Core
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pal_finder_wrapper_utils.sh	Wed May 16 07:39:16 2018 -0400
@@ -0,0 +1,75 @@
+#!/bin/bash
+#
+# Helper functions for the pal_finder_wrapper.sh script
+#
+# Utility function for terminating on fatal error
+function fatal() {
+    echo "FATAL $@" >&2
+    exit 1
+}
+#
+# Check that specified program is available
+function have_program() {
+    local program=$1
+    local got_program=$(which $program 2>&1 | grep "no $(basename $program) in")
+    if [ -z "$got_program" ] ; then
+	echo yes
+    else
+	echo no
+    fi
+}
+#
+# Set the value for a parameter in the pal_finder config file
+function set_config_value() {
+    local key=$1
+    local value=$2
+    local config_txt=$3
+    if [ -z "$value" ] ; then
+       echo "No value for $key, left as default"
+    else
+       echo Setting "$key" to "$value"
+       sed -i 's,^'"$key"' .*,'"$key"'  '"$value"',' $config_txt
+    fi
+}
+#
+# Identify 'bad' PRIMER_PRODUCT_SIZE_RANGE from pr3in.txt file
+function find_bad_primer_ranges() {
+    # Parses a pr3in.txt file from pal_finder and reports
+    # sequence ids where the PRIMER_PRODUCT_SIZE_RANGE has
+    # upper limit which is smaller than lower limit
+    local pr3in=$1
+    local outfile=$2
+    local pattern="^(SEQUENCE_ID|PRIMER_PRODUCT_SIZE_RANGE)"
+    for line in $(grep -E "$pattern" $pr3in | sed 's/ /^/' | sed 'N;s/\n/*/')
+    do
+	# Loop over pairs of SEQUENCE_ID and PRIMER_PRODUCT_SIZE_RANGE
+	# keywords in the primer3 input
+	if [ ! -z "$(echo $line | grep ^SEQUENCE_ID)" ] ; then
+	    # Lines look like:
+	    # SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535*PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194
+	    local size_range=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2 | tr '^' ' ')
+	    local seq_id=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2)
+	elif [ ! -z "$(echo $line | grep ^PRIMER_PRODUCT_SIZE_RANGE)" ] ; then
+	    # Lines look like:
+	    # PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194*SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535
+	    local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2 | tr '^' ' ')
+	    local seq_id=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2)
+	fi
+	seq_id=$(echo $seq_id | cut -d')' -f3)
+	# Check the upper and lower limits in each range
+	# to see if it's okay
+	local bad_range=
+	for range in $(echo $size_range) ; do
+	    local lower=$(echo $range | cut -d'-' -f1)
+	    local upper=$(echo $range | cut -d'-' -f2)
+	    if [ "$lower" -gt "$upper" ] ; then
+		bad_range=yes
+		break
+	    fi
+	done
+	# Report if the range is wrong
+	if [ ! -z "$bad_range" ] ; then
+	    echo "${seq_id}"$'\t'"(${size_range})" >>$outfile
+	fi
+    done
+}
--- a/test-data/454_microsats.out.re_match~	Mon Mar 19 06:33:32 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-SequenceID\	Repeat\ Motif\ Size\	Repeat\ Motif\	Number\ Tandem\ Repeats\	Primer\ Designed\ \(1\=y\,0\=n\)\	F\ Primer\ Name\	Forward\ Primer\	R\ Primer\ Name\	Reverse\ Primer\	Total\ Repeats\ In\ Amplicon\	Occurances\ of\ Forward\ Primer\ in\ Reads\	Occurances\ of\ Reverse\ Primer\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs
-FW1N29Q04EP35X\	2\	AC\	18\	0\	\	\	\	\	\	\	\	\	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_bad_primer_read_ids.out	Wed May 16 07:39:16 2018 -0400
@@ -0,0 +1,1 @@
+M00879:99:000000000-AH9KG:1:2107:14372:5471	(74-71 77-74)
--- a/test-data/illuminaPE_filtered_microsats.out.re_match~	Mon Mar 19 06:33:32 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-readPairID\	Motifs\(bases\)\	Bases\ in\ all\ Motifs\	Possible\ Extended\	Possible\ Spanning\	Primers\ found\ \(1\=y\,0\=n\)\	F\ Primer\ Name\	Forward\ Primer\	R\ Primer\ Name\	Reverse\ Primer\	Amplicon\ Motifs\	Number\ motif\ bases\ in\ amplicon\	Primers\ on\ sep\ reads\	Extend\ with\ primers\	Spand\ with\ primers\	Occurances\ of\ Forward\ Primer\ in\ Reads\	Occurances\ of\ Reverse\ Primer\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs\	R1\_Sequence\_ID\	R1\_Sequence\	R2\_Sequence\_ID\	R2\_Sequence
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\	AC\(12\)\ \	12\	\	\	1\	test\_3\	AAGTACAGTGGGGAGGCTGG\	test\_6\	TTTTCTACACAGCTCAAGTAGCCC\	AC\(12\)\ \	12\	1\	\	\	1\	1\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 1\:N\:0\:TCCTGA\	TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 2\:N\:0\:TCCTGA\	TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\	AC\(12\)\ \	12\	\	\	1\	test\_7\	GCAGTAAACAAAGGCAAAGGG\	test\_4\	CCTGGGCAGAGGTGTTCC\	AC\(12\)\ \	12\	1\	\	\	1\	1\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 1\:N\:0\:TCCTGA\	TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 2\:N\:0\:TCCTGA\	TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\	AG\(14\)\ \	14\	\	\	1\	test\_5\	TTCTCCCACTATATTTTGCATTGG\	test\_1\	TCCAGACTGAAGCTACCCTGG\	AG\(14\)\ \	14\	1\	\	\	1\	1\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 1\:N\:0\:TCCTGA\	TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 2\:N\:0\:TCCTGA\	TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA
--- a/test-data/illuminaPE_filtered_microsats_occurrences.out.re_match~	Mon Mar 19 06:33:32 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-readPairID\	Motifs\(bases\)\	Bases\ in\ all\ Motifs\	Possible\ Extended\	Possible\ Spanning\	Primers\ found\ \(1\=y\,0\=n\)\	F\ Primer\ Name\	Forward\ Primer\	R\ Primer\ Name\	Reverse\ Primer\	Amplicon\ Motifs\	Number\ motif\ bases\ in\ amplicon\	Primers\ on\ sep\ reads\	Extend\ with\ primers\	Spand\ with\ primers\	Occurances\ of\ Forward\ Primer\ in\ Reads\	Occurances\ of\ Reverse\ Primer\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs\	R1\_Sequence\_ID\	R1\_Sequence\	R2\_Sequence\_ID\	R2\_Sequence
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\	AG\(14\)\ \	14\	\	\	1\	test\_7\	TTCTCCCACTATATTTTGCATTGG\	test\_6\	TCCAGACTGAAGCTACCCTGG\	AG\(14\)\ \	14\	1\	\	\	1\	1\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 1\:N\:0\:TCCTGA\	TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 2\:N\:0\:TCCTGA\	TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\	AC\(12\)\ \	12\	\	\	1\	test\_5\	GCAGTAAACAAAGGCAAAGGG\	test\_2\	CCTGGGCAGAGGTGTTCC\	AC\(12\)\ \	12\	1\	\	\	1\	1\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 1\:N\:0\:TCCTGA\	TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 2\:N\:0\:TCCTGA\	TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\	AC\(12\)\ \	12\	\	\	1\	test\_8\	AAGTACAGTGGGGAGGCTGG\	test\_4\	TTTTCTACACAGCTCAAGTAGCCC\	AC\(12\)\ \	12\	1\	\	\	1\	1\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 1\:N\:0\:TCCTGA\	TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 2\:N\:0\:TCCTGA\	TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC
--- a/test-data/illuminaPE_filtered_microsats_primers.out.re_match~	Mon Mar 19 06:33:32 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-readPairID\	Motifs\(bases\)\	Bases\ in\ all\ Motifs\	Possible\ Extended\	Possible\ Spanning\	Primers\ found\ \(1\=y\,0\=n\)\	F\ Primer\ Name\	Forward\ Primer\	R\ Primer\ Name\	Reverse\ Primer\	Amplicon\ Motifs\	Number\ motif\ bases\ in\ amplicon\	Primers\ on\ sep\ reads\	Extend\ with\ primers\	Spand\ with\ primers\	Occurances\ of\ Forward\ Primer\ in\ Reads\	Occurances\ of\ Reverse\ Primer\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs\	R1\_Sequence\_ID\	R1\_Sequence\	R2\_Sequence\_ID\	R2\_Sequence
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\	AG\(14\)\ \	14\	\	\	1\	test\_7\	TTCTCCCACTATATTTTGCATTGG\	test\_2\	TCCAGACTGAAGCTACCCTGG\	AG\(14\)\ \	14\	1\	\	\	1\	1\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 1\:N\:0\:TCCTGA\	TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 2\:N\:0\:TCCTGA\	TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\	TG\(12\)\ \	12\	\	\	1\	test\_5\	GCAGTAAACAAAGGCAAAGGG\	test\_3\	CCTGGGCAGAGGTGTTCC\	TG\(12\)\ \	12\	1\	\	\	1\	1\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 1\:N\:0\:TCCTGA\	TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 2\:N\:0\:TCCTGA\	TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\	TG\(12\)\ \	12\	\	\	1\	test\_4\	AAGTACAGTGGGGAGGCTGG\	test\_6\	TTTTCTACACAGCTCAAGTAGCCC\	TG\(12\)\ \	12\	1\	\	\	1\	1\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 1\:N\:0\:TCCTGA\	TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 2\:N\:0\:TCCTGA\	TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\	TG\(12\)\ TG\(12\)\ \	24\	\	\	1\	test\_8\	TCTTTATCTAAACACATCCTGAAATACC\	test\_1\	AAACGCAATTATTTTGAGATGTCC\	TG\(12\)\ TG\(12\)\ \	24\	1\	\	\	1\	2\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\ 1\:N\:0\:TCCTGA\	TCTTTATCTAAACACATCCTGAAATACCATCTGTTACACACACACACAGCAGTGGAAGTATAAAAAAAAATCTGGACATCTCAAAATAATTGCGTTTCTGAAGTGTTACATTTTTC\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\ 2\:N\:0\:TCCTGA\	TATCATTGAAATTTTTATAAAAACTGTGAAGAGAAAAATGTAACACTTCAGAAACGCAATTATTTTGAGATGTCCAGATTTTTTTTTATACTTCCACTGCTGTGTGTGTGTGTAAC
--- a/test-data/illuminaPE_filtered_microsats_rankmotifs.out.re_match~	Mon Mar 19 06:33:32 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-readPairID\	Motifs\(bases\)\	Bases\ in\ all\ Motifs\	Possible\ Extended\	Possible\ Spanning\	Primers\ found\ \(1\=y\,0\=n\)\	F\ Primer\ Name\	Forward\ Primer\	R\ Primer\ Name\	Reverse\ Primer\	Amplicon\ Motifs\	Number\ motif\ bases\ in\ amplicon\	Primers\ on\ sep\ reads\	Extend\ with\ primers\	Spand\ with\ primers\	Occurances\ of\ Forward\ Primer\ in\ Reads\	Occurances\ of\ Reverse\ Primer\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs\	R1\_Sequence\_ID\	R1\_Sequence\	R2\_Sequence\_ID\	R2\_Sequence
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\	AC\(12\)\ \	12\	\	\	1\	test\_3\	AAGTACAGTGGGGAGGCTGG\	test\_4\	TTTTCTACACAGCTCAAGTAGCCC\	AC\(12\)\ \	12\	1\	\	\	1\	1\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 1\:N\:0\:TCCTGA\	TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 2\:N\:0\:TCCTGA\	TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8044\:1926\	AT\(12\)\ \	12\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8044\:1926\ 1\:N\:0\:TCCTGA\	TAGATTTTTTTTTTTATATATATATAAATATAGATGTACATATATTTATATAAATATAAAAGCACAGCATCCTCCTGTCTCTCCTCCTGATTTATTATGGTTAAAGCTTGTGACAG\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8044\:1926\ 2\:N\:0\:TCCTGA\	TCAGGCAAGGTCACTGCCACCACTGGGGAGTGCCTGTTTCTGAAGGGCCCAGCCAACTCTGTCACAAGCTTTAACCATAATAAATCAGGAGGAGAGACAGGAGGATGCTGTGCTTT
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:6204\:1090\	TC\(12\)\ \	12\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:6204\:1090\ 1\:N\:0\:TCCTGA\	TGCTTTGGTTCTAAGAGAAAAACAATTATTATAAATGTTTATAATTGATGATAAGCATTTTTGTACAAAGCCAAGACCATTCTGAATGAAGCACCCAAAAAGCCCGGAGGCAACAA\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:6204\:1090\ 2\:N\:0\:TCCTGA\	TGCTTTGGTTCTAAGAGAAAAACAAGTGATGCACAAGCAATTCCTCGCCACCACCCAACTGATGCCCAGCCACCCCCCCAAGCAGTGAAAGAGAGAGAGAGATGAACCCCCTTCAA
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\	AT\(12\)\ \	12\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\ 1\:N\:0\:TCCTGA\	TCCCCACCCTGTCATGGTTCTATGTTTTTGTTTTTGTTTTTGTTTTTATGGTTTCCGTATTCCACATTAAAACCTTATGTAACGTACGGGCCAATAAATAGTTACTCGCCATATCC\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\ 2\:N\:0\:TCCTGA\	TCCCCACCCTGTCATGGTTCTATGTATATATATATAGCCATGTGTGTGGTACCAGGGATAGGTACCTGGGATTGGGGCAGTGACACTTTAGTGCCCCGTACACTACATGATGTTTT
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\	AC\(12\)\ \	12\	\	\	1\	test\_6\	GCAGTAAACAAAGGCAAAGGG\	test\_1\	CCTGGGCAGAGGTGTTCC\	AC\(12\)\ \	12\	1\	\	\	1\	1\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 1\:N\:0\:TCCTGA\	TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 2\:N\:0\:TCCTGA\	TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\	AC\(36\)\ \	36\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\ 1\:N\:0\:TCCTGA\	TCGTAGCATGTGTATGCTTTGGGGTTTCATGCTGTTGATTCATAACTGCTGCTGGCTGTAGACTGAACCTTCTGGGTAGGAGGAATATGCTTAGACAAGCACACCAGTCAGCCCGA\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\ 2\:N\:0\:TCCTGA\	TCTGTGTGTGAGCACACACACACACACACACACACACACACACACACATGCAGGTACTTGCTCTGCCACCCCTGGCGGGCTGCGTGGTGTGCCTGACGACGTATTCTAATCCTACA
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\	TC\(14\)\ \	14\	\	\	1\	test\_7\	TTCTCCCACTATATTTTGCATTGG\	test\_2\	TCCAGACTGAAGCTACCCTGG\	TC\(14\)\ \	14\	1\	\	\	1\	1\	1\	1\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 1\:N\:0\:TCCTGA\	TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA\	\>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 2\:N\:0\:TCCTGA\	TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA
--- a/test-data/illuminaPE_microsat_types.out.re_match~	Mon Mar 19 06:33:32 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-allExtended\:\	0
-allSpan\:\	0
-broken\:\	2
-compound\:\	2
-readsWithMicrosat\:\	13
-totalBases\:\	2320
-totalReads\:\	20\	\(2\ x\ 10\)
-
-
-Microsat\ Type\	monomer\ length\	total\ loci\	loci\ w\/\ primers\	reads\ with\ loci\	total\ bases\	extended\	extended\ w\/\ primers\	spanning\	spanning\ w\/\ primers
-AC\	2\	7\	4\	7\	116\	0\	0\	0\	0
-AG\	2\	0\	0\	0\	0\	0\	0\	0\	0
-AT\	2\	8\	0\	6\	106\	0\	0\	0\	0
-CG\	2\	0\	0\	0\	0\	0\	0\	0\	0
-TC\	2\	2\	1\	2\	26\	\	\	\	
--- a/test-data/illuminaPE_microsats.out.re_match~	Mon Mar 19 06:33:32 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-readPairID\	Motifs\(bases\)\	Bases\ in\ all\ Motifs\	Possible\ Extended\	Possible\ Spanning\	Primers\ found\ \(1\=y\,0\=n\)\	F\ Primer\ Name\	Forward\ Primer\	R\ Primer\ Name\	Reverse\ Primer\	Amplicon\ Motifs\	Number\ motif\ bases\ in\ amplicon\	Primers\ on\ sep\ reads\	Extend\ with\ primers\	Spand\ with\ primers\	Occurances\ of\ Forward\ Primer\ in\ Reads\	Occurances\ of\ Reverse\ Primer\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\	TC\(14\)\ \	14\	\	\	1\	test\_7\	TTCTCCCACTATATTTTGCATTGG\	test\_2\	TCCAGACTGAAGCTACCCTGG\	TC\(14\)\ \	14\	1\	\	\	1\	1\	1\	1
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\	AC\(36\)\ \	36\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:19063\:1614\	AT\(14\)\ AT\(14\)\ AT\(14\)\ AT\(14\)\ \	56\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\	AC\(12\)\ \	12\	\	\	1\	test\_6\	GCAGTAAACAAAGGCAAAGGG\	test\_1\	CCTGGGCAGAGGTGTTCC\	AC\(12\)\ \	12\	1\	\	\	1\	1\	1\	1
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5626\:1554\	AT\(14\)\ AC\(16\)\ AC\(16\)\ AT\(12\)\ \	58\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\	AT\(12\)\ \	12\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:6204\:1090\	TC\(12\)\ \	12\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8044\:1926\	AT\(12\)\ \	12\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\	AC\(12\)\ \	12\	\	\	1\	test\_3\	AAGTACAGTGGGGAGGCTGG\	test\_4\	TTTTCTACACAGCTCAAGTAGCCC\	AC\(12\)\ \	12\	1\	\	\	1\	1\	1\	1
-ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\	AC\(12\)\ AC\(12\)\ \	24\	\	\	1\	test\_8\	TCTTTATCTAAACACATCCTGAAATACC\	test\_5\	AAACGCAATTATTTTGAGATGTCC\	AC\(12\)\ AC\(12\)\ \	24\	1\	\	\	1\	2\	1\	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_microsats_bad_ranges.out.re_match	Wed May 16 07:39:16 2018 -0400
@@ -0,0 +1,7 @@
+readPairID\	Motifs\(bases\)\	Bases\ in\ all\ Motifs\	Possible\ Extended\	Possible\ Spanning\	Primers\ found\ \(1\=y\,0\=n\)\	F\ Primer\ Name\	Forward\ Primer\	R\ Primer\ Name\	Reverse\ Primer\	Amplicon\ Motifs\	Number\ motif\ bases\ in\ amplicon\	Primers\ on\ sep\ reads\	Extend\ with\ primers\	Spand\ with\ primers\	Occurances\ of\ Forward\ Primer\ in\ Reads\	Occurances\ of\ Reverse\ Primer\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs
+M00879\:99\:000000000\-AH9KG\:1\:2107\:10006\:2535\	AT\(16\)\ AT\(16\)\ \	32\	AT\ \	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	
+M00879\:99\:000000000\-AH9KG\:1\:2107\:10032\:7900\	.*\ \	164\	\	\	1\	test\_.*\	(CGAAAGATGCTATAGAAGCGATGGGG|TATCTATCTATCAATCCGCTCCCC)\	test\_.*\	(GGACATCGAGATAGAAAGGGGACCG|TGATTGGACATCGAGATAGAAAGGG)\	.*\ \	80\	1\	\	\	.*\	.*\	1\	1
+M00879\:99\:000000000\-AH9KG\:1\:2107\:10061\:6317\	.*\ \	76\	\	\	1\	test\_.*\	GAGAGAGTACATAGATATCTCACGGGGCG\	test\_.*\	GCAACGGCACAGATCTCTTCTACGG\	.*\ \	22\	1\	\	\	1\	1\	1\	1
+M00879\:99\:000000000\-AH9KG\:1\:2107\:10072\:8112\	.*\ \	44\	\	\	1\	test\_.*\	AGTTTGTTACAGGGCATGACAACGG\	test\_.*\	TCCTGTTATCTTCTTGTTGCTTGGC\	.*\ \	22\	1\	\	\	1\	1\	1\	1
+M00879\:99\:000000000\-AH9KG\:1\:2107\:10084\:6474\	.*\ \	100\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	
+M00879\:99\:000000000\-AH9KG\:1\:2107\:14372\:5471\	.*\ \	68\	.*\ \	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_microsats_subset.out.re_match	Wed May 16 07:39:16 2018 -0400
@@ -0,0 +1,6 @@
+readPairID\	Motifs\(bases\)\	Bases\ in\ all\ Motifs\	Possible\ Extended\	Possible\ Spanning\	Primers\ found\ \(1\=y\,0\=n\)\	F\ Primer\ Name\	Forward\ Primer\	R\ Primer\ Name\	Reverse\ Primer\	Amplicon\ Motifs\	Number\ motif\ bases\ in\ amplicon\	Primers\ on\ sep\ reads\	Extend\ with\ primers\	Spand\ with\ primers\	Occurances\ of\ Forward\ Primer\ in\ Reads\	Occurances\ of\ Reverse\ Primer\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\	Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs
+ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\	(AC|TG)\(36\)\ \	36\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	
+ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5626\:1554\	AT\(14\)\ (AC|TG)\(16\)\ (AC|TG)\(16\)\ AT\(12\)\ \	58\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	
+ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\	AT\(12\)\ \	12\	\	\	0\	\	\	\	\	\	\	\	\	\	\	\	\	
+ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\	(AC|TG)\(12\)\ \	12\	\	\	1\	test\_.*\	AAGTACAGTGGGGAGGCTGG\	test\_.*\	TTTTCTACACAGCTCAAGTAGCCC\	(AC|TG)\(12\)\ \	12\	1\	\	\	1\	1\	1\	1
+ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\	(AC|TG)\(12\)\ (AC|TG)\(12\)\ \	24\	\	\	1\	test\_.*\	TCTTTATCTAAACACATCCTGAAATACC\	test\_.*\	AAACGCAATTATTTTGAGATGTCC\	(AC|TG)\(12\)\ (AC|TG)\(12\)\ \	24\	1\	\	\	1\	2\	1\	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_r1_bad_ranges.fq	Wed May 16 07:39:16 2018 -0400
@@ -0,0 +1,24 @@
+@M00879:99:000000000-AH9KG:1:2107:10006:2535 1:N:0:TAAGGCGA+TAGATCGC
+ATATATATATATATATGTAGTATAATCTCACTGGAAAGGAGACGAAAACGAGCAGCTCCGAGCTTTCGACTTTATTTCAAGTCATCTTCAGGGCAACTGACAAATTTTTGTGTAGCAATAGTATATAGACACCAGACGAGATTCCTGACCTCACATCTGGGAGG
++
+CCCCCFGGGGGGGGGGGGGGGFCGGGGGGFGCG@FFGCDGGGGGGGGEGGGDGGFDCFGGGCDGGGGGGGGGGGGGGGGDGFGFGFGFFGGG@FEGGDEFGGGGGEFGGGGGGGFGFGGGGGGAFE?9FGGGGGGGG@F7F7AFDGGFFGFFGGG9EE9D8?>8
+@M00879:99:000000000-AH9KG:1:2107:10032:7900 1:N:0:TAAGGCGA+TAGATCGC
+ATCTATGTATTTATCTATCTCCTCTCTCTCTCTCTCTCTCTCTCTCTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCCGCTCCCCTTTCTATCTCGATGTCCAATCATTATACACACACCTACACGAAAGATGCTATAGAAGCGATGGGGGACTATAGGTGTATAGCAACTCTATACATCAACCAGTCTCTGCGCTCGTCCCCCTGTCG
++
+CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEGGGGGGGGGGGGGGGFGGGCFGG@FGGEFGGDGGGGGGGGGGGGGGA9EFFC,@@F@FG8FGG9?EGG,,4:?@FFAGG@EFF<B,CFBEGGD:EFGD7F+>+,4,@,9E9,@ECEEEFC+8+4>B88;=E3,,2,@A68,7=@DDGED=A8=A8FGE+@DDD,++0@+=0+<3<>095?**
+@M00879:99:000000000-AH9KG:1:2107:10061:6317 1:N:0:TAAGGCGA+TAGATCGC
+TATATATATATATGAGAAAACTGGTGTCGGTGAGGACTCATCAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGTACATAGATATCTCACGGGGCGAGCTGCCGCTGCTGCTGCTGCCGCGGAGGGTGCAGAATAAGACGAGGAGGTGGAGAGAGGTGTGGAAAAAGTGCCGTAGAAGAGATCTGTGCCGTCGCACAGCTCTCGTGGTGCACCTATGGGAGAGGCGG
++
+CCCC@FFFFFGGFGGGGGGGG8EFG?FFGGEGGGFGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGFG@CFFGGGGGGGGECGGGGGGGF,9,,C,E,?,,9,5<,,:=+78B+++@,,,+++6+,8,,A55A,,+++@+87:*3*@3D79;,,,451***>B4<**6,=***4=;8*+++*/*21+0**3//CEG86+A9:1<++2**)*)2)**+629*9*715:***00*:*755*0*.00
+@M00879:99:000000000-AH9KG:1:2107:10072:8112 1:N:0:TAAGGCGA+TAGATCGC
+GTGTGTGTGTGTGTGTGTGTGTTGGTTGTGAAAGTTATCTCAGCTTGTTATATGGAATGACGACTGTCAAGCTTACCAGTTTCTTACATGGGATGACAAAGGCTAGGCTTACCAACTTGTTACATGTAATGACATCGGTAAAGCTTATCAGTTTGTTACAGGGCATGACAACGGTCAAGCTTACCAGCATGATACATGTAGTGACGAAGGCCAAGCCACAAGAAGATAACAGGCAC
++
+CCCCCFGGGGFFFGGFFGFGGGGGGGGGGDFGGGCGFGGGG,FFGGFFFCDCDEFFCFGGFGCEGDCG<DGGFA@FCFCFDGFGCEFFGG,CEECFGFGFG,BAFFGGGGGGGAFEC,EFDGGGFGGGGGFGFGGGGEEFGBAFDFCGCAFGGGGEEFFGCFG+9EFGGGD88BECEGGG?EGGGG8,@EF,,@DDFA@3@:DA9CEEDE6AFGGF7D+5CEF591C:>7>CD*;F
+@M00879:99:000000000-AH9KG:1:2107:14372:5471 1:N:0:TAAGGCGA+TAGATCGC
+GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAG
++
+CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
+@M00879:99:000000000-AH9KG:1:2107:10084:6474 1:N:0:TAAGGCGA+TAGATCGC
+CAATTACCTAAGTATCTGAATAGTCATTTCCCTATTAGGGGTGATCATAGTCGAACGGTAGCACTCCCGCTGGTCACGCTCTGGACCAGAGTTCAAATCTTGGCTGGTCGAGGTTTACGTGTTTTATATCTATCTATCTATCTATCTATCTATCTATCTATATATATATATATATACATATATAT
++
+CCCCCGGGGGGGFGGGGGGGGGGFGGGGGGGGGCFGGGGGGFGGGGGGGGGGGGGGG@FGGGGGFGGGGGGDGGGGGGGGGGGGGGGGGGFECCEEEFGGGGGGGG>FGG>C+BE,BF:FGFFGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGFDEEGGGGFGGGCFGGGGGGGGFDFDCFCD;E
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_r1_no_microsats.fq	Wed May 16 07:39:16 2018 -0400
@@ -0,0 +1,32 @@
+@ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 1:N:0:TCCTGA
+TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA
++
+IIIIIHIIIGHHIIDGHIIIIIIHIIIIIIIIIIIIHGIIIIHGIIIIHHHIHHGHIIHHEHHIGHGDHDHHHGGHEGAHGDGEHEGEGBGCEDBDDGEEFDEECGDEGDEDEDE8
+@ILLUMINA-545855:49:FC61RLR:2:1:19063:1614 1:N:0:TCCTGA
+TATATATATATATACACATATATATATATATTTTTTACATTATTTCACTTCGCCCAAACTAGAGAGTCTAACAAAGTACAACCCAGCATATTAAAGTTCATCTCAGTTTTGTTCTG
++
+IIIIIIIGIEIIIIGIIIHHIHIGIGIIIIIHIIIIHGBEIIIIHIFHHIHFIIIIFHFIHIIHFF>EEEBDGDD;BD8DDBBDDBGHGHHHHEFE=DBCDEEEBEBEGHGAFH@E
+@ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 1:N:0:TCCTGA
+TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA
++
+DDDDDBDBDDB7?BBB5BB>1?<?88>DDD?4(6367;>?2<164=;>8<0DDD>>A6A?>;;6>+6>><>4-8484888&@D@@DD<D;D>>?<9DD;6DDC<DD;<88@#####
+@ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 1:N:0:TCCTGA
+TCCCCACCCTGTCATGGTTCTATGTTTTTGTTTTTGTTTTTGTTTTTATGGTTTCCGTATTCCACATTAAAACCTTATGTAACGTACGGGCCAATAAATAGTTACTCGCCATATCC
++
+####################################################################################################################
+@ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 1:N:0:TCCTGA
+TGCTTTGGTTCTAAGAGAAAAACAATTATTATAAATGTTTATAATTGATGATAAGCATTTTTGTACAAAGCCAAGACCATTCTGAATGAAGCACCCAAAAAGCCCGGAGGCAACAA
++
+####################################################################################################################
+@ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 1:N:0:TCCTGA
+TAGATTTTTTTTTTTATATATATATAAATATAGATGTACATATATTTATATAAATATAAAAGCACAGCATCCTCCTGTCTCTCCTCCTGATTTATTATGGTTAAAGCTTGTGACAG
++
+HHHHHHHHHHHHHHHFHFGFHEFFDF92=@=>;;;=HHHGHHHHHHHHHHHHHHHHGHHHFHHHHFHFDHHHHHHHHHHHHHHHHGHHGGHHHHHHHHHHEHHHHGEHHHEFE>EB
+@ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 1:N:0:TCCTGA
+TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT
++
+HHHHHHHHHHHHHFHHHHHGHHHHHHGHHHHGHHHHHH>HHGGGGFFHHHHGHGHHFHHHHGGBGDCAC>CBDBDDCFGED@BDDB@BBBBECCD@<CC>C=?9==@CECB=BBB8
+@ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 1:N:0:TCCTGA
+TCTTTATCTAAACACATCCTGAAATACCATCTGTTACACACACACACAGCAGTGGAAGTATAAAAAAAAATCTGGACATCTCAAAATAATTGCGTTTCTGAAGTGTTACATTTTTC
++
+IIIIIHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIIIGIIIIIIIIIIIIIIHHGIIIIHIHHHHIHHHHHHHGHHHHHGFHEHEHHHHHHHII>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_r2_bad_ranges.fq	Wed May 16 07:39:16 2018 -0400
@@ -0,0 +1,24 @@
+@M00879:99:000000000-AH9KG:1:2107:10006:2535 2:N:0:TAAGGCGA+TAGATCGC
+CCTCCCAGATGTGAGGTCAGGAATCTCGTCTGGTGTCTATATACTATTGCTACACAAAAATTTGTCAGTTGCCCTGAAGATGACTTGAAATAAAGTCGAAAGCTCGGAGCTGCTCGTTTTCGTCTCCTTTCCAGTGAGATTATACTACATATATATATATATAT
++
+CCCCCGGGGGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8FFFGGGGGGFGGGGGGGCEFFGGGGGFGGGGGGGGGGGGGGGFGFCEGGFGG8EFEFDFGEFGFFGGCFGGFAFF,9AFDGGGGGGGGGGGGGGEADDFGFEA
+@M00879:99:000000000-AH9KG:1:2107:10032:7900 2:N:0:TAAGGCGA+TAGATCGC
+CTACAGGGGGCCGAGCACAGAGACTGGTTGATGTATAGAGTTGCTATACTCCTATAGTCCCCCATCGCTTCTATAGCATCTTTCGTGTAGGTGTGTCTATAATGATTGGACATCGAGATAGAAAGGGGACCGGATTGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAGAGAGAGAGAGAGAGAGAGAGCGAGGAGATAGATAAATACATCGAT
++
+CCCCCGGGGDGGGEGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGGGGGGGGGGGCGFGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGFFGGGFGDGGGFGFGGGGGGGGGGEGFFGFFCGFGGGCFG9FFGGGGFCFGGGGGGGGG8DAFGFFGFGGDGDFGFCFFD72DAFF>?BCFFF5>9A>DFFFFFF*):494?>BFEE4<*9*@:=?*-(-
+@M00879:99:000000000-AH9KG:1:2107:10061:6317 2:N:0:TAAGGCGA+TAGATCGC
+GCCTCGGGACCTTGGCACGCCGCCTCTCCCATAGATGCACCATGAGATCCGTGCAACGGCACAGATCTCTTCTACGGAACTTTTGCCACACCTCACTTCACCTTCTTGCCTTATTCTGCACCACCTGCAGCAGCAGCAGCAGCAGCAGCTCGCCCCGAGTTACATCTATGTACTCTCTCCCTCTCTCTCTCTCTCTCTCTCTCCCTCTCTCTCTCCCTCCCTCTCCTCTCACCCCACACTCACACCCGC
++
+CCCCCFGGGGGGGFFGGGGGGGGGGGGGGGGGGFGGGFEGGGGEGFGGGGGGGGGGGEGGGGGGGGGCGFFGFGGFG@FGGFEF9FGGGCGGEFEFGGGDGG@FE?EEFGGGGG,EE,EFGGFGGFDG,@FFFFG8D8=E8>EEEGC=D=D6CEGC61C=8:ECFG8AA9<2CCC<C?C+=0==DAF9C7;;@0;0@09*@*9*)3;):/;.7.>*7):(54>3-(0(*(0*-(511(6(/(6)6/(,(
+@M00879:99:000000000-AH9KG:1:2107:10072:8112 2:N:0:TAAGGCGA+TAGATCGC
+GTTCCTGTTATCTTCTTGTTGCTTGGCCTTCGTCACTACATGTAACATGCTGGTAAGCTTGACCGTTGTCATTCCCTGTAACAAACTGATAAGCTTTACCGATGTCATTACATGTAACCAGTTGGTAAGCCTAGCCTTTGTCATCCCATGTAAGAAACTGGTAAGCTTGACAGTCGTCATTCCATATAACAAGCTGAGATAACTTTCACAACCAACACACACACACACACACACACCTGTCTCCTATAC
++
+CCCCCGGGGGGGGGGGGFFGFGGGG7FFGGGGGGGGGGGGGFGFFFAEGGGCFFG@AGGGC,EFGGGGGGGG@EEGGGGFGGFGGGFGGFGCFGGGGGGGGCEEGGCFEFGDEFGGFG,CFFFEGGGDG9EFFFGGGGFAFGGGGG84E=EFGGG;AGDDFFFGGFGG8=DAFGFG=D88FFG@9D@@FDD+;D56D?FFFFFFD=7*;2:)=855)=DF=>=AAD==@DDA)=@@5)):)3;9A***9
+@M00879:99:000000000-AH9KG:1:2107:14372:5471 2:N:0:TAAGGCGA+TAGATCGC
+CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC
++
+ACCCCFGE<FFFGGGGCFFFEF@EGGGGGGGGGGG
+@M00879:99:000000000-AH9KG:1:2107:10084:6474 2:N:0:TAAGGCGA+TAGATCGC
+ATATATATGTATATATATATATATATAGATAGATAGATAGATAGATAGATAGATAGATATAAAACACGTAAACCTCGACCAGCCAAGATTTGAACTCTGGTCCAGAGCGTGACCAGCGGGAGTGCTACCGTTCGACTATGATCACCCCTAATAGGGAACTGACTATTCAGATACTTAGGTAATTG
++
+CCCCCGGGGGGGGGGGGGGGGGGGCFGFGGGCGGGFGFGFGCFGGFGGGGGGGGGGGGGGGGGGGGFCFFFGGGG<FEFGGFCEFGGGGGGFFAGFGGCCFGGEDGFDGGGGGGGGGG7@FG=AEFGGGGDGC8EGGGGFFEFEGGGGGGGDCF8@FE+==AF9=FFGGFGDGFDA=?DDG+3?9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_r2_no_microsats.fq	Wed May 16 07:39:16 2018 -0400
@@ -0,0 +1,32 @@
+@ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 2:N:0:TCCTGA
+TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA
++
+IIIIIHIIIIIHIIIIHIHIIIIIIIHIIIHIIIHIHGIIIIIIHEHHHHHHHHIIHIIHFHHEHHIGHHGHGIHGHHFGAGIEHHGEGHBFIFDBHHHGDFHBDBHGGFGD8EB@
+@ILLUMINA-545855:49:FC61RLR:2:1:19063:1614 2:N:0:TCCTGA
+TATATATATATATAAACATATATATATATATTTTTTTCTCATTTCAGAACAAAAGTGAGATGAACTTTAATATGGTGGGGTGTATTTTGAGAGACTCTCTAGTTTGGGAGGAGTGA
++
+DDDDDDDDDDDD:D@D+DDDDDDDDDDDD6DDDDD>A@:5>@##########################################################################
+@ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 2:N:0:TCCTGA
+TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT
++
+B?/?################################################################################################################
+@ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 2:N:0:TCCTGA
+TCCCCACCCTGTCATGGTTCTATGTATATATATATAGCCATGTGTGTGGTACCAGGGATAGGTACCTGGGATTGGGGCAGTGACACTTTAGTGCCCCGTACACTACATGATGTTTT
++
+HHHHHHHHHHHHHHHHHEHHGGGGFDHGHGHHHHHHFHHHHHGHEHEHEBEHEFB8EEEFEDAGEDBCEBBB@>BEC>@B@DCBBBECBB<AECBBBC>BA>B<;BA@A@######
+@ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 2:N:0:TCCTGA
+TGCTTTGGTTCTAAGAGAAAAACAAGTGATGCACAAGCAATTCCTCGCCACCACCCAACTGATGCCCAGCCACCCCCCCAAGCAGTGAAAGAGAGAGAGAGATGAACCCCCTTCAA
++
+HHHDBHHHEFGHHHHEHFHEHDDDDD@HHEHHEEHEFFEEEGDEGGGGEGEB>EBC>@@@@@@BB@FEBFBBB@A>AAA<;>A>;3>=??>>BB>?>@?1?>:9*@##########
+@ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 2:N:0:TCCTGA
+TCAGGCAAGGTCACTGCCACCACTGGGGAGTGCCTGTTTCTGAAGGGCCCAGCCAACTCTGTCACAAGCTTTAACCATAATAAATCAGGAGGAGAGACAGGAGGATGCTGTGCTTT
++
+IIIIIIIIIIHIIIIIIIIIIIIIIGIIIIFIIIIIGHHIIIIIGGEIHGHGHHHGGCHHCGGGGGGHGHGEGEGEGGGDGBECCBGGEDGB;8?BBBBDABC@:3==;=:<30:6
+@ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 2:N:0:TCCTGA
+TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC
++
+HFHHHHHHHHHHHHDHGHHDHHHHHHHHHHHHHHHHHHGGGHHGHHHHHHHHHHHGHHHHH@HHHFHGHFHHEHHHHHHDHBFEBBHFFDHE>EFHBEFD################
+@ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 2:N:0:TCCTGA
+TATCATTGAAATTTTTATAAAAACTGTGAAGAGAAAAATGTAACACTTCAGAAACGCAATTATTTTGAGATGTCCAGATTTTTTTTTATACTTCCACTGCTGTGTGTGTGTGTAAC
++
+IG+GGGIIIIIIIIIIDIIIIIIGHHIIHIIIIIEGHIHIHIIHIIIIIIIIIIIIIIHIIDIIIIIEDIIIDIHGHFIIIIIIIIIHIFFBHGIGBGGHBDBFEGEGCGEIEDEB