# HG changeset patch
# User pjbriggs
# Date 1449233010 18000
# Node ID b6ccc7dd7b025f4ed22da97efb4a0c02db051dde
# Parent  771ebe02636fe4566d5e3ce759700e37e9e9c2bf
Version 0.02.04.3.

diff -r 771ebe02636f -r b6ccc7dd7b02 README.rst
--- a/README.rst	Mon Mar 23 07:01:37 2015 -0400
+++ b/README.rst	Fri Dec 04 07:43:30 2015 -0500
@@ -7,16 +7,17 @@
 ======================
 
 Installation via the Galaxy Tool Shed will take of installing the tool wrapper and
-the pal_finder and primer3_core programs, and setting the appropriate environment
-variables.
+the pal_finder and primer3_core programs (plus additional dependencies), and setting
+the appropriate environment variables.
 
 Manual Installation
 ===================
 
-There are two files to install:
+There are three files to install:
 
 - ``pal_finder_wrapper.xml`` (the Galaxy tool definition)
 - ``pal_finder_wrapper.sh`` (the shell script wrapper)
+- ``pal_finder_filter_and_assembly.py`` (filtering utility)
 
 The suggested location is in a ``tools/pal_finder_wrapper/`` folder. You will then
 need to modify the ``tools_conf.xml`` file to tell Galaxy to offer the tool
@@ -30,10 +31,15 @@
 - ``Primer3`` version 2.0.0-alpha (see the pal_finder installation notes) can be
   obtained from http://primer3.sourceforge.net/releases.php
 
-The tool wrapper must be able to locate the pal_finder Perl script, the example
-pal_finder config.txt and simple.ref data files, and the primer3_core program - the
-locations of these are taken from the following enviroment variables which you will
-need to set manually:
+Additionally the filtering script needs ``BioPython`` and the ``PANDASeq`` program:
+
+- ``BioPython`` can be obtained from https://pypi.python.org/packages/source/b/biopython/
+- ``PANDASeq`` version 2.8.1 can be obtained from https://github.com/neufeld/pandaseq/
+
+The tool wrapper must be able to locate the ``pal_finder_v0.02.04.pl`` script, the
+example pal_finder ``config.txt`` and ``simple.ref`` data files, and the
+``primer3_core`` program - the locations of these are taken from the following
+enviroment variables which you will need to set manually:
 
 - ``PALFINDER_SCRIPT_DIR``: location of the pal_finder Perl script (defaults to /usr/bin)
 - ``PALFINDER_DATA_DIR``: location of the pal_finder data files (specifically config.txt
@@ -54,6 +60,10 @@
 ========== ======================================================================
 Version    Changes
 ---------- ----------------------------------------------------------------------
+0.02.04.3  - Update to the Illumina filtering script from Graeme Fox (including
+             new option to run ``PANDASeq`` assembly/QC steps), and corresponding
+	     update to the tool; add support for input FASTQs to be a dataset
+	     collection pair.
 0.02.04.2  - Fix bug that causes tool to fail when prefix includes spaces;
              add explicit dependency on Perl 5.16.3.
 0.02.04.1  - Add option to run Graeme Fox's ``pal_finder_filter.pl`` script to
diff -r 771ebe02636f -r b6ccc7dd7b02 pal_finder_filter.pl
--- a/pal_finder_filter.pl	Mon Mar 23 07:01:37 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-############################################################
-# Graeme Fox  -  25/09/2014 - graeme.fox@manchester.ac.uk
-#
-# Program to filter output from pal_finder/primer3 workflow
-#
-# Filters output based on "Primers found (1=y,0=n)" and 
-# "occurrences of forward/reverse amplifiable primer in reads".
-# Filters out any non-perfect microsatellites.
-# Ranks microsatellites by motif size (largest first)
-#
-# Usage:
-# Give it the file that ends with:
-# "(microsatellites_with_read_IDs_and_primer_pairs)].txt"
-#
-# with the following syntax: 
-# perl pal_finder_filter.pl /path/to/input/file
-#
-# File will be created called "pal_finder_filter_output.txt"
-# in the current directory
-############################################################
-
-#!/usr/bin/perl -w
-use strict;
-
-my @lines;                                   
-my @output;                                  
-my $outfile = 'pal_finder_filter_output.txt';     
-
-# Open the file for reading
-my $filename = $ARGV[0];   
-open (my $file, '<', $filename);  
-
-# Grab the headers and store them 
-my $header;
-while (my $line = <$file>)
-{
-  if ($line =~ m/^readPair/) {
-#	push (@output, $line)             
-	$header = $line;	
-	} else {
-
-# Send everything else to array for sorting
-	push (@lines, $line);                  
-	}
-}
-close $file;
-chomp (@lines);
-
-############################################################
-# Filter the file on the "Primers found (1=y,0=n)" column
-# ie. Drop all the lines which do not have primer sequences
-############################################################
-my @temporary1;
-my @temporary2;
-foreach (@lines) { 
-@temporary1 = split(/\t/, $_);
-	if ($temporary1[5] == 1) {
-	push (@temporary2, $_);
-		}
-}
-
-############################################################
-# Filter any lines which do not have "1" in the "Occurances 
-# of Reverse/Forward Primer in Reads" field
-############################################################
-my @temporary3;
-my @temporary4;
-foreach (@temporary2) { 
-@temporary3 = split(/\t/, $_);
-	if ($temporary3[16] == 1 && $temporary3[15] == 1) {
-	push (@temporary4, $_);
-		}
-}
-
-############################################################
-# Filter any non-perfect repeats
-############################################################
-my @temporary5;
-my @temporary6;
-my $count;
-foreach (@temporary4) { 
-	@temporary5 = split(/\t/, $_);
-	$count = ($temporary5[1] =~ tr/\(//);
-		if ($count == 1) {
-		push (@temporary6, $_);
-		}
-}
-############################################################
-# Get size of repeat motif. Order by size (largest first)
-############################################################
-my @temporary7;
-my @temporary8;
-my $motif;
-my $count2 = 2;
-while ($count2 < 10) {
-	foreach (@temporary6) {
-		@temporary7 = split(/\t/, $_);  
-		#get size of motif:
-		$motif = () = ($temporary7[1] =~ /[A-Z]/gi );  
-		if ($motif == $count2) {
-			unshift (@output, join( "\t", @temporary7),"\n");	
-		}
-	}
-$count2++;
-}
-############################################################
-# Open and populate the output file
-############################################################
-unshift (@output, $header);
-open (FILE, "> $outfile") || die "Problem opening $outfile\n";
-print FILE @output;
-close(FILE);
diff -r 771ebe02636f -r b6ccc7dd7b02 pal_finder_filter_and_assembly.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pal_finder_filter_and_assembly.py	Fri Dec 04 07:43:30 2015 -0500
@@ -0,0 +1,363 @@
+#!/usr/bin/python -tt
+
+###########################################################
+# Graeme Fox - 26/09/2015 - graeme.fox@manchester.ac.uk
+# Tested on (L)ubuntu 15.04 only, with Python 2.7
+###########################################################
+# PROGRAM DESCRIPTION
+# Program to pick optimum loci from the output of pal_finder_v0.02.04
+# and to predict the fragment length.
+#
+# This program can be used to filter pal_finder output to choose the 'optimum' loci.
+#
+# Additionally this program can assemble the two paired end reads,
+# find the primers within HQ assembly, log their positions
+# and calculate the difference to give the fragment length.
+#
+# For best results in your PCR, I suggest doing both.
+#
+###########################################################
+# Requirements:
+# Must have Biopython (www.biopython.org).
+#
+# If you with to perform the assembly QC step, you must have:
+# PandaSeq (https://github.com/neufeld/pandaseq)
+# PandaSeq must be in your path / able to run from anywhere
+###########################################################
+# Required options:
+# -i forward_paired_ends.fastQ
+# -j reverse_paired_ends.fastQ
+# -p pal_finder output - the "(microsatellites with read IDs and primer pairs)" file
+#
+# By default it does nothing.
+#
+# Non-required options:
+# -assembly:  turn on the pandaseq assembly QC step
+# -primers:  filter microsatellite loci to just those which have primers designed
+# -occurrences:  filter microsatellite loci to those with primers which appear only once in the dataset
+# -rankmotifs:  filter microsatellite loci to just those with perfect motifs. Rank the output by size of motif (largest first)
+#
+###########################################################
+# For repeat analysis, the following extra non-required options may be useful:
+# PandaSeq Assembly, and fastq -> fasta conversion are slow.
+#
+# Do them the first time, generate the files and then skip either, or both steps with the following:
+# -a:  skip assembly step
+# -c:  skip fastq -> fasta conversion step
+#
+# # Just make sure to keep the files in the correct directory with the correct filename
+###########################################################
+#
+# Example usage:
+# pal_finder_filter_and_assembly.py -i R1.fastq -j R2.fastq -p pal_finder_output_(microsatellites with read IDs and primer pairs).tabular -primers -occurrences -rankmotifs -assembly
+#
+###########################################################
+import Bio, subprocess, argparse, csv, os, re, time
+from Bio import SeqIO
+
+# Get values for all the required and optional arguments
+
+parser = argparse.ArgumentParser(description='Frag_length_finder')
+parser.add_argument('-i','--input1', help='Forward paired-end fastq file', required=True)
+parser.add_argument('-j','--input2', help='Reverse paired-end fastq file', required=True)
+parser.add_argument('-p','--pal_finder', help='Output from pal_finder ', required=True)
+parser.add_argument('-assembly','--assembly_QC', help='Perform the PandaSeq based QC', nargs='?', const=1, type=int, required=False)
+parser.add_argument('-a','--skip_assembly', help='If the assembly has already been run, skip it with -a', nargs='?', const=1, type=int, required=False)
+parser.add_argument('-c','--skip_conversion', help='If the fastq to fasta conversion has already been run, skip it with -c', nargs='?', const=1, type=int, required=False)
+parser.add_argument('-primers','--filter_by_primer_column', help='Filter pal_finder output to just those loci which have primers designed', nargs='?', const=1, type=int, required=False)
+parser.add_argument('-occurrences','--filter_by_occurrences_column', help='Filter pal_finder output to just loci with primers which only occur once in the dataset', nargs='?', const=1, type=int, required=False)
+parser.add_argument('-rankmotifs','--filter_and_rank_by_motif_size', help='Filter pal_finder output to just loci which are a perfect repeat unit. Also, rank the loci by motif size (largest first)', nargs='?', const=1, type=int, required=False)
+args = vars(parser.parse_args())
+
+# parse arguments
+
+R1_input = args['input1'];
+R2_input = args['input2'];
+pal_finder_output = args['pal_finder'];
+perform_assembly = args['assembly_QC']
+skip_assembly = args['skip_assembly'];
+skip_conversion = args['skip_conversion'];
+filter_primers = args['filter_by_primer_column'];
+filter_occurrences = args['filter_by_occurrences_column'];
+filter_rank_motifs = args['filter_and_rank_by_motif_size'];
+
+# set default values for arguments
+if perform_assembly is None:
+    perform_assembly = 0
+if skip_assembly is None:
+    skip_assembly = 0
+if skip_conversion is None:
+    skip_conversion = 0
+if filter_primers is None:
+    filter_primers = 0
+if filter_occurrences is None:
+    filter_occurrences = 0
+if filter_rank_motifs is None:
+    filter_rank_motifs = 0
+
+############################################################
+# Function List                                            #
+############################################################
+# Reverse complement a sequence
+def ReverseComplement1(seq):
+    seq_dict = {'A':'T','T':'A','G':'C','C':'G'}
+    return "".join([seq_dict[base] for base in reversed(seq)])
+
+# Convert a .fastq to a .fasta, filter to just lines we want and strip MiSeq barcodes
+def fastq_to_fasta(file):
+    file_name = os.path.splitext(os.path.basename(file))[0]
+    with open(file_name + "_filtered.fasta", "w") as out:
+        for record in SeqIO.parse(file, "fastq"):
+            ID = str(record.id)
+            SEQ = str(record.seq)
+            if ID in wanted:
+                out.write(">" + ID + "\n" + SEQ + "\n")
+
+# strip the miseq barcodes from a fasta file
+def strip_barcodes(file):
+    file_name = os.path.splitext(os.path.basename(file))[0]
+    with open(file_name + "_adapters_removed.fasta", "w") as out:
+        for record in SeqIO.parse(file, "fasta"):
+            match = re.search(r'\S*:', record.id)
+            if match:
+                correct = match.group().rstrip(":")
+            else:
+                correct = str(record.id)
+            SEQ = str(record.seq)
+            if correct in wanted:
+                out.write(">" + correct + "\n" + SEQ + "\n")
+
+############################################################
+# MAIN PROGRAM                                             #
+############################################################
+
+if (perform_assembly == 0 and filter_primers == 0 and filter_occurrences == 0 and filter_rank_motifs == 0):
+    print "\nNo optional arguments supplied."
+    print "\nBy default this program does nothing."
+    print "\nNo files produced and no modifications made."
+    print "\nFinished.\n"
+    exit()
+
+else:
+    print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+    print "Checking supplied filtering parameters:"
+    print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+    if filter_primers == 1:
+        print "-primers flag supplied. Filtering pal_finder output on the \"Primers found (1=y,0=n)\" column."
+    if filter_occurrences == 1:
+        print "-occurrences flag supplied. Filtering pal_finder output on the \"Occurances of Forward Primer in Reads\" and \"Occurances of Reverse Primer in Reads\" columns."
+    if filter_rank_motifs == 1:
+        print "-rankmotifs flag supplied. Filtering pal_finder output on the \"Motifs(bases)\" column to just those with perfect repeats. Ranking output by size of motif (largest first)."
+
+# index the raw fastq files so that the sequences can be pulled out and added to the filtered output file
+R1fastq_sequences_index = SeqIO.index(R1_input,'fastq')
+R2fastq_sequences_index = SeqIO.index(R2_input,'fastq')
+
+# create a set to hold the filtered output
+wanted_lines = set()
+
+# get lines from the pal_finder output which meet filter settings
+
+# read the pal_finder output file into a csv reader
+with open (pal_finder_output) as csvfile_infile:
+    csv_f = csv.reader(csvfile_infile, delimiter='\t')
+    header =  csv_f.next()
+    with open(os.path.splitext(os.path.basename(pal_finder_output))[0] + ".filtered", 'w') as csvfile_outfile:
+        # write the header line for the output file
+        csvfile_outfile.write('\t'.join(header) + "\tR1_Sequence_ID\tR1_Sequence\tR2_Sequence_ID\tR2_Sequence\n")
+        for row in csv_f:
+            # get the sequence ID
+            seq_ID = row[0]
+            # get the raw sequence reads and convert to a format that can go into a tsv file
+            R1_sequence = R1fastq_sequences_index[seq_ID].format("fasta").replace("\n","\t",1).replace("\n","")
+            R2_sequence = R2fastq_sequences_index[seq_ID].format("fasta").replace("\n","\t",1).replace("\n","")
+            seq_info = "\t" + R1_sequence + "\t" + R2_sequence + "\n"
+        # navigate through all different combinations of filter options
+        # if the primer filter is switched on
+            if filter_primers == 1:
+            # check the occurrences of primers field
+                if row[5] == "1":
+                # if filter occurrences of primers is switched on
+                    if filter_occurrences == 1:
+                        # check the occurrences of primers field
+                        if (row[15] == "1" and row[16] == "1"):
+                            # if rank by motif is switched on
+                            if filter_rank_motifs == 1:
+                                    # check for perfect motifs
+                                if row[1].count('(') == 1:
+                                    # all 3 filter switched on - write line out to output
+                                    csvfile_outfile.write('\t'.join(row) + seq_info)
+
+                            else:
+                                csvfile_outfile.write('\t'.join(row) + seq_info)
+                    elif filter_rank_motifs == 1:
+                        if row[1].count('(') == 1:
+                            csvfile_outfile.write('\t'.join(row) + seq_info)
+                    else:
+                        csvfile_outfile.write('\t'.join(row) + seq_info)
+            elif filter_occurrences == 1:
+                if (row[15] == "1" and row[16] == "1"):
+                    if filter_rank_motifs == 1:
+                        if row[1].count('(') == 1:
+                            csvfile_outfile.write('\t'.join(row) + seq_info)
+                    else:
+                        csvfile_outfile.write('\t'.join(row) + seq_info)
+            elif filter_rank_motifs == 1:
+                if row[1].count('(') == 1:
+                    csvfile_outfile.write('\t'.join(row) + seq_info)
+            else:
+                csvfile_outfile.write('\t'.join(row) + seq_info)
+
+# if filter_rank_motifs is active, order the file by the size of the motif
+if filter_rank_motifs == 1:
+    rank_motif = []
+    ranked_list = []
+    # read in the non-ordered file and add every entry to rank_motif list
+    with open(os.path.splitext(os.path.basename(pal_finder_output))[0] + ".filtered") as csvfile_ranksize:
+        csv_rank = csv.reader(csvfile_ranksize, delimiter='\t')
+        header = csv_rank.next()
+        for line in csv_rank:
+            rank_motif.append(line)
+
+    # open the file ready to write the ordered list
+    with open(os.path.splitext(os.path.basename(pal_finder_output))[0] + ".filtered", 'w') as rank_outfile:
+        rankwriter = csv.writer(rank_outfile, delimiter='\t', lineterminator='\n')
+        rankwriter.writerow(header)
+        count = 2
+        while count < 10:
+            for row in rank_motif:
+                # count size of motif
+                motif = re.search(r'[ATCG]*',row[1])
+                if motif:
+                    the_motif = motif.group()
+                    # rank it and write into ranked_list
+                    if len(the_motif) == count:
+                        ranked_list.insert(0, row)
+            count = count +  1
+        # write out the ordered list, into the .filtered file
+        for row in ranked_list:
+            rankwriter.writerow(row)
+
+print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+print "Checking assembly flags supplied:"
+print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+if perform_assembly == 0:
+    print "Assembly flag not supplied. Not performing assembly QC.\n"
+if perform_assembly == 1:
+    print "-assembly flag supplied: Performing PandaSeq assembly quality checks."
+    time.sleep(5)
+
+# Get readID, F primers, R primers and motifs from the filtered pal_finder output
+    seqIDs = []
+    motif = []
+    F_primers = []
+    R_primers = []
+    with open(os.path.splitext(os.path.basename(pal_finder_output))[0] + ".filtered") as input_csv:
+        pal_finder_csv = csv.reader(input_csv, delimiter='\t')
+        header = pal_finder_csv.next()
+        for row in pal_finder_csv:
+            seqIDs.append(row[0])
+            motif.append(row[1])
+            F_primers.append(row[7])
+            R_primers.append(row[9])
+
+# Get a list of just the unique IDs we want
+    wanted = set()
+    for line in seqIDs:
+        wanted.add(line)
+
+# Assemble the paired end reads into overlapping contigs using PandaSeq
+# (can be skipped with the -a flag if assembly has already been run
+# and the appropriate files are in the same directory as the script,
+# and named "Assembly.fasta" and "Assembly_adapters_removed.fasta")
+#
+# I suggest you run this script WITHOUT -a first, and then use the -a flag in
+# any subsequent reanalysis
+
+    if skip_assembly == 0:
+        pandaseq_command = 'pandaseq -A pear -f ' + R1_input + ' -r ' + R2_input + ' -o 25 -t 0.95 -w Assembly.fasta'
+        subprocess.call(pandaseq_command, shell=True)
+        strip_barcodes("Assembly.fasta")
+        print "\nPaired end reads been assembled into overlapping reads."
+        print "\nFor future analysis, you can skip this assembly step using the -a flag, provided that the assembly.fasta file is intact and in the same location."
+    else:
+        print "\n(Skipping the assembly step as you provided the -a flag)"
+
+# FastQ files need to be converted to fasta. Again, I suggest running this section
+# the first time WITHOUT the -c flag and then skipping it later using the -c flag
+# Make sure the fasta files are in the same location and do not change the filenames
+
+    if skip_conversion ==0:
+        fastq_to_fasta(R1_input)
+        fastq_to_fasta(R2_input)
+        print "\nThe input fastq files have been converted to the fasta format."
+        print "\nFor any future analysis, you can skip this conversion step using the -c flag, provided that the fasta files are intact and in the same location."
+    else:
+        print "\n(Skipping the fastq -> fasta conversion as you provided the -c flag)"
+
+# get the files and everything else we will need
+    assembly_file = "Assembly_adapters_removed.fasta" # Assembled fasta file
+    R1_fasta = os.path.splitext(os.path.basename(R1_input))[0] + "_filtered.fasta"    # filtered R1 reads
+    R2_fasta = os.path.splitext(os.path.basename(R2_input))[0] + "_filtered.fasta"   # filtered R2 reads
+    outputfilename = os.path.splitext(os.path.basename(R1_input))[0]
+
+# parse the files with SeqIO
+    assembly_sequences = SeqIO.parse(assembly_file,'fasta')
+    R1fasta_sequences = SeqIO.parse(R1_fasta,'fasta')
+
+# create some empty lists to hold the ID tags we are interested in
+    assembly_IDs = []
+    fasta_IDs = []
+
+# populate the above lists with sequence IDs
+    for sequence in assembly_sequences:
+        assembly_IDs.append(sequence.id)
+    for sequence in R1fasta_sequences:
+        fasta_IDs.append(sequence.id)
+
+# Index the assembly fasta file
+    assembly_sequences_index = SeqIO.index(assembly_file,'fasta')
+    R1fasta_sequences_index = SeqIO.index(R1_fasta,'fasta')
+    R2fasta_sequences_index = SeqIO.index(R2_fasta,'fasta')
+
+# prepare the output file
+    with open (outputfilename + "_pal_finder_assembly_output.txt", 'w') as outputfile:
+        outputfile.write("readPairID\t Forward Primer\t F Primer Position in Assembled Read\t Reverse Primer\t R Primer Position in Assembled Read\t Predicted Amplicon Size (bp)\t Motifs(bases)\t Assembled Read ID\t Assembled Read Sequence\t Raw Forward Read ID\t Raw Forward Read Sequence\t Raw Reverse Read ID\t Raw Reverse Read Sequence\n")
+
+# cycle through parameters from the pal_finder output
+        for x, y, z, a in zip(seqIDs, F_primers, R_primers, motif):
+            if str(x) in assembly_IDs:
+            # get the raw sequences ready to go into the output file
+                assembly_seq = (assembly_sequences_index.get_raw(x).decode())
+            # fasta entries need to be converted to single line so they sit nicely in the output
+                assembly_output = assembly_seq.replace("\n","\t")
+                R1_fasta_seq = (R1fasta_sequences_index.get_raw(x).decode())
+                R1_output = R1_fasta_seq.replace("\n","\t",1).replace("\n","")
+                #R1_output = R1_output.replace("\n","")
+                R2_fasta_seq = (R2fasta_sequences_index.get_raw(x).decode())
+                R2_output = R2_fasta_seq.replace("\n","\t",1).replace("\n","")
+                #R2_output = R2_output.replace("\n","")
+                assembly_no_id = '\n'.join(assembly_seq.split('\n')[1:])
+
+# check that both primer sequences can be seen in the assembled contig
+                if y or ReverseComplement1(y) in assembly_no_id and z or ReverseComplement1(z) in assembly_no_id:
+                    if y in assembly_no_id:
+                    # get the positions of the primers in the assembly to predict fragment length
+                        F_position = assembly_no_id.index(y)+len(y)+1
+                    if ReverseComplement1(y) in assembly_no_id:
+                        F_position = assembly_no_id.index(ReverseComplement1(y))+len(ReverseComplement1(y))+1
+                    if z in assembly_no_id:
+                        R_position = assembly_no_id.index(z)+1
+                    if ReverseComplement1(z) in assembly_no_id:
+                        R_position = assembly_no_id.index(ReverseComplement1(z))+1
+                # calculate fragment length
+                    fragment_length = R_position-F_position
+
+# write everything out into the output file
+                    output = (str(x) + "\t" + y + "\t" + str(F_position) + "\t" + (z) + "\t" + str(R_position) + "\t" + str(fragment_length) + "\t" + a + "\t" + assembly_output + R1_output + "\t" + R2_output + "\n")
+                    outputfile.write(output)
+    print "\nFinished\n"
+else:
+    if (skip_assembly == 1 or skip_conversion == 1):
+        print "\nERROR: You cannot supply the -a flag or the -c flag without also supplying the -assembly flag.\n"
+        print "\nFinished\n"
diff -r 771ebe02636f -r b6ccc7dd7b02 pal_finder_wrapper.sh
--- a/pal_finder_wrapper.sh	Mon Mar 23 07:01:37 2015 -0400
+++ b/pal_finder_wrapper.sh	Fri Dec 04 07:43:30 2015 -0500
@@ -26,8 +26,11 @@
 # --primer-opt-tm VALUE: optimum melting temperature (Celsius)
 # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers
 # --output_config_file FNAME: write a copy of the config.txt file to FNAME
-# --filter_microsats FNAME: run Graeme Fox's Perl script to filter and sort the
-#                           microsatellites from pal_finder and write to FNAME
+# --filter_microsats FNAME: write output of filter options FNAME
+# -assembly FNAME: run the 'assembly' filter option and write to FNAME
+# -primers: run the 'primers' filter option
+# -occurrences: run the 'occurrences' filter option
+# -rankmotifs: run the 'rankmotifs' filter option
 #
 # pal_finder is available from http://sourceforge.net/projects/palfinder/
 #
@@ -55,9 +58,9 @@
 : ${PRIMER3_CORE_EXE:=primer3_core}
 #
 # Filter script is in the same directory as this script
-PALFINDER_FILTER_PL=$(dirname $0)/pal_finder_filter.pl
-if [ ! -f $PALFINDER_FILTER_PL ] ; then
-    echo No pal_finder_filter.pl script >&2
+PALFINDER_FILTER=$(dirname $0)/pal_finder_filter_and_assembly.py
+if [ ! -f $PALFINDER_FILTER ] ; then
+    echo No $PALFINDER_FILTER script >&2
     exit 1
 fi
 #
@@ -104,7 +107,9 @@
 PRIMER_MIN_TM=
 PRIMER_PAIR_MAX_DIFF_TM=
 OUTPUT_CONFIG_FILE=
+OUTPUT_ASSEMBLY=
 FILTERED_MICROSATS=
+FILTER_OPTIONS=
 #
 # Collect command line arguments
 if [ $# -lt 2 ] ; then
@@ -129,8 +134,8 @@
     case "$1" in
 	--primer-prefix)
 	    shift
-	    # Convert spaces to underscores in prefix
-	    PRIMER_PREFIX=$(echo $1 | tr " " "_")
+	    # Convert all non-alphanumeric characters to underscores in prefix
+	    PRIMER_PREFIX=$(echo -n $1 | tr -s -c "[:alnum:]" "_")
 	    ;;
 	--2merMinReps)
 	    shift
@@ -208,6 +213,14 @@
 	    shift
 	    FILTERED_MICROSATS=$1
 	    ;;
+	-primers|-occurrences|-rankmotifs)
+	    FILTER_OPTIONS="$FILTER_OPTIONS $1"
+	    ;;
+	-assembly)
+	    FILTER_OPTIONS="$FILTER_OPTIONS $1"
+	    shift
+	    OUTPUT_ASSEMBLY=$1
+	    ;;
 	*)
 	    echo Unknown option: $1 >&2
 	    exit 1
@@ -309,15 +322,25 @@
     exit 1
 fi
 #
-# Run the pal_finder_filter.pl script from Graeme Fox
-if [ ! -z "$FILTERED_MICROSATS" ] ; then
-    echo "### Running filtering script ###"
-    perl $PALFINDER_FILTER_PL Output/PAL_summary.txt 2>&1
+# Sort outputs into a consistent order regardless of Perl version
+echo "### Sorting outputs ###"
+head -1 Output/PAL_summary.txt > Output/PAL_summary.sorted.txt
+if [ "$PLATFORM" == "Illumina" ] ; then
+    grep -v "^readPairID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt
+else
+    grep -v "^SequenceID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt
+fi
+mv Output/PAL_summary.sorted.txt Output/PAL_summary.txt
+#
+# Run the filtering & assembly script
+if [ ! -z "$FILTERED_MICROSATS" ] || [ ! -z "$OUTPUT_ASSEMBLY" ] ; then
+    echo "### Running filtering & assembly script ###"
+    python $PALFINDER_FILTER -i $fastq_r1 -j $fastq_r2 -p Output/PAL_summary.txt $FILTER_OPTIONS 2>&1
     if [ $? -ne 0 ] ; then
-	echo ERROR pal_finder_filter.pl exited with non-zero status >&2
+	echo ERROR $PALFINDER_FILTER exited with non-zero status >&2
 	exit 1
-    elif [ ! -f pal_finder_filter_output.txt ] ; then
-	echo ERROR no output from pal_finder_filter.pl >&2
+    elif [ ! -f PAL_summary.filtered ] ; then
+	echo ERROR no output from $PALFINDER_FILTER >&2
 	exit 1
     fi
 fi
@@ -330,8 +353,14 @@
 if [ -f Output/PAL_summary.txt ] ; then
     /bin/mv Output/PAL_summary.txt $PAL_SUMMARY
 fi
-if [ ! -z "$FILTERED_MICROSATS" ] && [ -f pal_finder_filter_output.txt ] ; then
-    /bin/mv pal_finder_filter_output.txt $FILTERED_MICROSATS
+if [ ! -z "$FILTERED_MICROSATS" ] && [ -f PAL_summary.filtered ] ; then
+    /bin/mv PAL_summary.filtered $FILTERED_MICROSATS
+fi
+if [ ! -z "$OUTPUT_ASSEMBLY" ] ; then
+    assembly=${fastq_r1%.*}_pal_finder_assembly_output.txt
+    if [ -f "$assembly" ] ; then
+	/bin/mv $assembly "$OUTPUT_ASSEMBLY"
+    fi
 fi
 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then
     /bin/mv config.txt $OUTPUT_CONFIG_FILE
diff -r 771ebe02636f -r b6ccc7dd7b02 pal_finder_wrapper.xml
--- a/pal_finder_wrapper.xml	Mon Mar 23 07:01:37 2015 -0400
+++ b/pal_finder_wrapper.xml	Fri Dec 04 07:43:30 2015 -0500
@@ -1,17 +1,28 @@
-<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.2">
-  <description>Find microsatellite repeat elements sequencing reads and design PCR primers to amplify them</description>
+<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.3">
+  <description>Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them</description>
+  <requirements>
+    <requirement type="package" version="5.16.3">perl</requirement>
+    <requirement type="package" version="0.02.04">pal_finder</requirement>
+    <requirement type="package" version="2.0.0">primer3_core</requirement>
+    <requirement type="package" version="1.65">biopython</requirement>
+    <requirement type="package" version="2.8.1">pandaseq</requirement>
+  </requirements>
   <command interpreter="bash">pal_finder_wrapper.sh
   #if str( $platform.platform_type ) == "illumina"
-    $platform.input_fastq_r1 $platform.input_fastq_r2
+    #set $paired_input_type = $platform.paired_input_type_conditional.paired_input_type
+    #if $paired_input_type == "pair_of_files"
+      "$platform.paired_input_type_conditional.input_fastq_r1"
+      "$platform.paired_input_type_conditional.input_fastq_r2"
+    #else
+      "$platform.paired_input_type_conditional.input_fastq_pair.forward"
+      "$platform.paired_input_type_conditional.input_fastq_pair.reverse"
+    #end if
   #else
-    --454 $platform.input_fasta
+    --454 "$platform.input_fasta"
   #end if
   $output_microsat_summary $output_pal_summary
-  #if str( $platform.platform_type ) == "illumina" and $platform.filter_microsats
-    --filter_microsats $output_filtered_microsats
-  #end if
   #if $keep_config_file
-    --output_config_file $output_config_file
+    --output_config_file "$output_config_file"
   #end if
   --primer-prefix "$primer_prefix"
   --2merMinReps $min_2mer_repeats
@@ -35,12 +46,18 @@
   #if str( $mispriming.mispriming_options ) == "custom"
   --primer-mispriming-library $mispriming.mispriming_library
   #end if
+  #if str( $platform.platform_type ) == "illumina"
+    #if $platform.filters
+      #for $filter in str($platform.filters).split(',')
+        $filter
+        --filter_microsats "$output_filtered_microsats"
+      #end for
+    #end if
+    #if str( $platform.assembly ) == '-assembly'
+      $platform.assembly "$output_assembly"
+    #end if
+  #end if
   </command>
-  <requirements>
-    <requirement type="package" version="5.16.3">perl</requirement>
-    <requirement type="package" version="0.02.04">pal_finder</requirement>
-    <requirement type="package" version="2.0.0">primer3_core</requirement>
-  </requirements>
   <inputs>
     <param name="primer_prefix" type="text" value="test" size="25" label="Primer prefix" help="This prefix will be added to the beginning of all primer names" />
     <conditional name="platform">
@@ -49,11 +66,33 @@
 	<option value="454">454</option>
       </param>
       <when value="illumina">
-	<param name="input_fastq_r1" type="data" format="fastqsanger" label="Illumina fastq file (read 1)" />
-	<param name="input_fastq_r2" type="data" format="fastqsanger" label="Illumina fastq file (read 2)" />
-	<param name="filter_microsats" type="boolean" truevalue="True" falsevalue="False"
-	       label="Filter and sort the microsatellites" checked="True"
-	       help="Filter pal_finder results to only include lines with primer sequences and remove non-perfect repeats" />
+	<conditional name="paired_input_type_conditional">
+          <param name="paired_input_type" type="select" label="Input Type">
+            <option value="pair_of_files" selected="true">Pair of datasets</option>
+            <option value="collection">Dataset collection pair</option>
+	  </param>
+	  <when value="pair_of_files">
+	    <param name="input_fastq_r1" type="data" format="fastqsanger"
+		   label="Illumina fastq file (read 1)" />
+	    <param name="input_fastq_r2" type="data" format="fastqsanger"
+		   label="Illumina fastq file (read 2)" />
+	  </when>
+	  <when value="collection">
+	    <param name="input_fastq_pair" format="fastqsanger"
+		   type="data_collection" collection_type="paired"
+		   label="Select FASTQ dataset collection with R1/R2 pair" />
+	  </when>
+	</conditional>
+	<param name="filters" type="select" display="checkboxes"
+	       multiple="True" label="Filters to apply to the pal_finder results"
+	       help="Apply none, one or more filters to refine results">
+          <option value="-primers" selected="True">Only include loci with designed primers</option>
+          <option value="-occurrences" selected="True">Exclude loci where the primer sequences occur more than once in the reads</option>
+          <option value="-rankmotifs" selected="True">Only include loci with 'perfect' motifs, and rank by motif size</option>
+        </param>
+	<param name="assembly" type="boolean"
+	       checked="True" truevalue="-assembly" falsevalue=""
+               label="Use PANDAseq to assemble paired-end reads and confirm primer sequences are present in high-quality assembly" />
       </when>
       <when value="454">
 	<param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" />
@@ -117,12 +156,15 @@
 	   help="Can be used to run pal_finder outside of Galaxy" />
   </inputs>
   <outputs>
-    <data name="output_microsat_summary" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix} (microsatellite types)" />
-    <data name="output_pal_summary" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix} (microsatellites with read IDs and primer pairs)" />
-    <data name="output_filtered_microsats" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix} (filtered and sorted microsatellites)">
-      <filter>platform['platform_type'] == 'illumina' and platform['filter_microsats']</filter>
+    <data name="output_pal_summary" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: all microsatellites (full details)" />
+    <data name="output_filtered_microsats" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: filtered microsatellites (full details)">
+      <filter>platform['platform_type'] == 'illumina' and platform['filters'] is not None</filter>
     </data>
-    <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix} (config file)">
+    <data name="output_microsat_summary" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: summary of microsatellite types" />
+    <data name="output_assembly" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: assembly">
+      <filter>platform['assembly'] is True</filter>
+    </data>
+    <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: config file">
       <filter>keep_config_file is True</filter>
     </data>
   </outputs>
@@ -132,24 +174,77 @@
       <param name="platform_type" value="illumina" />
       <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
       <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
-      <!-- 
-      **NB** outputs have to be specified in order that they appear in the
-      tool (which is the order they will be written to the history) - the
-      test framework seems to use the order and ignores the "name" attribute
-      -->
+      <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" />
+      <output name="output_pal_summary" file="illuminaPE_microsats.out" />
+      <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats.out" />
+      <output name="output_assembly" file="illuminaPE_assembly_after_filters.out" />
+    </test>
+    <test>
+      <!-- Test with Illumina input as dataset pair -->
+      <param name="platform_type" value="illumina" />
+      <param name="paired_input_type" value="collection" />
+      <param name="input_fastq_pair">
+	<collection type="paired">
+	  <element name="forward" value="illuminaPE_r1.fq" ftype="fastqsanger" />
+	  <element name="reverse" value="illuminaPE_r2.fq" ftype="fastqsanger" />
+	</collection>
+      </param>
       <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" />
       <output name="output_pal_summary" file="illuminaPE_microsats.out" />
       <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats.out" />
+      <output name="output_assembly" file="illuminaPE_assembly_after_filters.out" />
+    </test>
+    <test>
+      <!-- Test with Illumina input filter to loci with PandaSEQ assembly
+	   ('-assembly' option) -->
+      <param name="platform_type" value="illumina" />
+      <param name="filters" value="" />
+      <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
+      <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
+      <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" />
+      <output name="output_pal_summary" file="illuminaPE_microsats.out" />
+      <output name="output_assembly" file="illuminaPE_assembly.out" />
+    </test>
+    <test>
+      <!-- Test with Illumina input filter to loci with primers
+	   ('-primers' option) -->
+      <param name="platform_type" value="illumina" />
+      <param name="filters" value="-primers" />
+      <param name="assembly" value="false" />
+      <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
+      <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
+      <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" />
+      <output name="output_pal_summary" file="illuminaPE_microsats.out" />
+      <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats_primers.out" />
+    </test>
+    <test>
+      <!-- Test with Illumina input filter to loci which appear only once
+	   ('-occurrences' option) -->
+      <param name="platform_type" value="illumina" />
+      <param name="filters" value="-occurrences" />
+      <param name="assembly" value="false" />
+      <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
+      <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
+      <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" />
+      <output name="output_pal_summary" file="illuminaPE_microsats.out" />
+      <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats_occurrences.out" />
+    </test>
+    <test>
+      <!-- Test with Illumina input filter and rank loci with perfect motifs
+	   ('-rankmotifs' option) -->
+      <param name="platform_type" value="illumina" />
+      <param name="filters" value="-rankmotifs" />
+      <param name="assembly" value="false" />
+      <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
+      <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
+      <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" />
+      <output name="output_pal_summary" file="illuminaPE_microsats.out" />
+      <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats_rankmotifs.out" />
     </test>
     <test>
       <!-- Test with 454 input -->
       <param name="platform_type" value="454" />
       <param name="input_fasta" value="454_in.fa" ftype="fasta" />
-      <!-- 
-      **NB** outputs have to be specified in order that they appear in the
-      tool (which is the order they will be written to the history) - the
-      test framework seems to use the order and ignores the "name" attribute
-      -->
       <output name="output_microsat_summary" file="454_microsat_types.out" />
       <output name="output_pal_summary" file="454_microsats.out" />
     </test>
@@ -163,9 +258,15 @@
 directly from raw 454 or Illumina paired-end sequencing reads. It then designs PCR
 primers to amplify these repeat loci (Potentially Amplifiable Loci: PAL).
 
-Optionally for Illumina data, the output from pal_finder can also be filtered to
-remove any motifs without primer sequences, and with non-perfect microsatellites.
-The microsatellites are then ranked by motif size (largest to smallest).
+Optionally for Illumina data, one or more filters can be applied to the output from
+pal_finder to:
+
+ * Only include loci with designed primers
+ * Exclude loci where the primer sequences occur more than once in the reads
+ * Only include loci with 'perfect' motifs (and rank by motif size,largest to
+   smallest)
+ * Use PANDAseq to assemble paired-end reads and confirm primer sequences are
+   present in high-quality assembly
 
 Pal_finder runs the primer3_core program; information on the settings used in
 primer3_core can be found in the Primer3 manual at
@@ -199,12 +300,12 @@
 The paper is available at
 http://purl.com/STEVEROZEN/papers/rozen-and-skaletsky-2000-primer3.pdf
 
-The filtering and sorting of the pal_finder output for Illumina data is performed
-using a Perl script written by Graeme Fox at the University of Manchester, and which
-is included with this tool.
+The filtering and assembly of the pal_finder output for Illumina data is performed
+using a Python utility written by Graeme Fox at the University of Manchester, and which
+is included with this tool; this utility uses the BioPython and PANDAseq packages.
 
 Please kindly acknowledge both this Galaxy tool, the pal_finder and primer3 packages, and
-the utility script if you use it in your work.
+the utility script and its dependencies if you use it in your work.
   </help>
   <citations>
     <!--
@@ -214,7 +315,7 @@
     -->
     <citation type="doi">10.1371/journal.pone.0030953</citation>
     <citation type="bibtex">@Article{pmid10547847,
-    Author="Rozen, S.  and Skaletsky, H. ",
+    Author="Rozen, S. and Skaletsky, H. ",
     Title="{{P}rimer3 on the {W}{W}{W} for general users and for biologist programmers}",
     Journal="Methods Mol. Biol.",
     Year="2000",
@@ -222,5 +323,7 @@
     Pages="365--386",
     URL="{http://purl.com/STEVEROZEN/papers/rozen-and-skaletsky-2000-primer3.pdf}"
     }</citation>
+    <citation type="doi">10.1093/bioinformatics/btp163</citation>
+    <citation type="doi">10.1186/1471-2105-13-31</citation>
   </citations>
 </tool>
diff -r 771ebe02636f -r b6ccc7dd7b02 test-data/illuminaPE_assembly.out
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_assembly.out	Fri Dec 04 07:43:30 2015 -0500
@@ -0,0 +1,2 @@
+readPairID	 Forward Primer	 F Primer Position in Assembled Read	 Reverse Primer	 R Primer Position in Assembled Read	 Predicted Amplicon Size (bp)	 Motifs(bases)	 Assembled Read ID	 Assembled Read Sequence	 Raw Forward Read ID	 Raw Forward Read Sequence	 Raw Reverse Read ID	 Raw Reverse Read Sequence
+ILLUMINA-545855:49:FC61RLR:2:1:19063:1614		1		1	0	AT(14) AT(14) AT(14) AT(14) 	>ILLUMINA-545855:49:FC61RLR:2:1:19063:1614	TATATATATATATACACATATATATATATATTTTTTACATTATTTCACTTCGCCCAAACTAGAGAGTCTAACAAAGTACAACCCAGCATATTAAAGTTCATCTCAGTTTTGTTCTGAAATGAGAAAAAAATATATATATATATGTTTATATATATATATA	>ILLUMINA-545855:49:FC61RLR:2:1:19063:1614	TATATATATATATACACATATATATATATATTTTTTACATTATTTCACTTCGCCCAAACTAGAGAGTCTAACAAAGTACAACCCAGCATATTAAAGTTCATCTCAGTTTTGTTCTG	>ILLUMINA-545855:49:FC61RLR:2:1:19063:1614	TATATATATATATAAACATATATATATATATTTTTTTCTCATTTCAGAACAAAAGTGAGATGAACTTTAATATGGTGGGGTGTATTTTGAGAGACTCTCTAGTTTGGGAGGAGTGA
diff -r 771ebe02636f -r b6ccc7dd7b02 test-data/illuminaPE_assembly_after_filters.out
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_assembly_after_filters.out	Fri Dec 04 07:43:30 2015 -0500
@@ -0,0 +1,1 @@
+readPairID	 Forward Primer	 F Primer Position in Assembled Read	 Reverse Primer	 R Primer Position in Assembled Read	 Predicted Amplicon Size (bp)	 Motifs(bases)	 Assembled Read ID	 Assembled Read Sequence	 Raw Forward Read ID	 Raw Forward Read Sequence	 Raw Reverse Read ID	 Raw Reverse Read Sequence
diff -r 771ebe02636f -r b6ccc7dd7b02 test-data/illuminaPE_filtered_microsats.out
--- a/test-data/illuminaPE_filtered_microsats.out	Mon Mar 23 07:01:37 2015 -0400
+++ b/test-data/illuminaPE_filtered_microsats.out	Fri Dec 04 07:43:30 2015 -0500
@@ -1,4 +1,4 @@
-readPairID	Motifs(bases)	Bases in all Motifs	Possible Extended	Possible Spanning	Primers found (1=y,0=n)	F Primer Name	Forward Primer	R Primer Name	Reverse Primer	Amplicon Motifs	Number motif bases in amplicon	Primers on sep reads	Extend with primers	Spand with primers	Occurances of Forward Primer in Reads	Occurances of Reverse Primer in Reads	Occurances of Amplifiable Primer Pair in Reads	Occurances of Amplifiable Primer Pair in PALs
-ILLUMINA-545855_0049_FC61RLR:2:1:8157:1636#0	AC(12) 	12			1	test_5	AAGTACAGTGGGGAGGCTGG	test_6	TTTTCTACACAGCTCAAGTAGCCC	AC(12) 	12	1			1	1	1	1
-ILLUMINA-545855_0049_FC61RLR:2:1:10979:1695#0	TC(14) 	14			1	test_7	TTCTCCCACTATATTTTGCATTGG	test_8	TCCAGACTGAAGCTACCCTGG	TC(14) 	14	1			1	1	1	1
-ILLUMINA-545855_0049_FC61RLR:2:1:1978:1220#0	AC(12) 	12			1	test_3	GCAGTAAACAAAGGCAAAGGG	test_4	CCTGGGCAGAGGTGTTCC	AC(12) 	12	1			1	1	1	1
+readPairID	Motifs(bases)	Bases in all Motifs	Possible Extended	Possible Spanning	Primers found (1=y,0=n)	F Primer Name	Forward Primer	R Primer Name	Reverse Primer	Amplicon Motifs	Number motif bases in amplicon	Primers on sep reads	Extend with primers	Spand with primers	Occurances of Forward Primer in Reads	Occurances of Reverse Primer in Reads	Occurances of Amplifiable Primer Pair in Reads	Occurances of Amplifiable Primer Pair in PALs	R1_Sequence_ID	R1_Sequence	R2_Sequence_ID	R2_Sequence
+ILLUMINA-545855:49:FC61RLR:2:1:8157:1636	AC(12) 	12			1	test_5	AAGTACAGTGGGGAGGCTGG	test_6	TTTTCTACACAGCTCAAGTAGCCC	AC(12) 	12	1			1	1	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 1:N:0:TCCTGA	TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT	>ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 2:N:0:TCCTGA	TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC
+ILLUMINA-545855:49:FC61RLR:2:1:1978:1220	AC(12) 	12			1	test_3	GCAGTAAACAAAGGCAAAGGG	test_4	CCTGGGCAGAGGTGTTCC	AC(12) 	12	1			1	1	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 1:N:0:TCCTGA	TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA	>ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 2:N:0:TCCTGA	TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT
+ILLUMINA-545855:49:FC61RLR:2:1:10979:1695	TC(14) 	14			1	test_7	TTCTCCCACTATATTTTGCATTGG	test_8	TCCAGACTGAAGCTACCCTGG	TC(14) 	14	1			1	1	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 1:N:0:TCCTGA	TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA	>ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 2:N:0:TCCTGA	TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA
diff -r 771ebe02636f -r b6ccc7dd7b02 test-data/illuminaPE_filtered_microsats_assembly.out
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_filtered_microsats_assembly.out	Fri Dec 04 07:43:30 2015 -0500
@@ -0,0 +1,11 @@
+readPairID	Motifs(bases)	Bases in all Motifs	Possible Extended	Possible Spanning	Primers found (1=y,0=n)	F Primer Name	Forward Primer	R Primer Name	Reverse Primer	Amplicon Motifs	Number motif bases in amplicon	Primers on sep reads	Extend with primers	Spand with primers	Occurances of Forward Primer in Reads	Occurances of Reverse Primer in Reads	Occurances of Amplifiable Primer Pair in Reads	Occurances of Amplifiable Primer Pair in PALs
+ILLUMINA-545855:0049:FC61RLR:2:1:1978:1220	AC(12) 	12			1	test_3	GCAGTAAACAAAGGCAAAGGG	test_4	CCTGGGCAGAGGTGTTCC	AC(12) 	12	1			1	1	1	1
+ILLUMINA-545855:0049:FC61RLR:2:1:5879:1238	AT(12) 	12			0													
+ILLUMINA-545855:0049:FC61RLR:2:1:17449:1584	AC(36) 	36			0													
+ILLUMINA-545855:0049:FC61RLR:2:1:8157:1636	AC(12) 	12			1	test_5	AAGTACAGTGGGGAGGCTGG	test_6	TTTTCTACACAGCTCAAGTAGCCC	AC(12) 	12	1			1	1	1	1
+ILLUMINA-545855:0049:FC61RLR:2:1:8044:1926	AT(12) 	12			0													
+ILLUMINA-545855:0049:FC61RLR:2:1:5626:1554	AT(14) AC(16) AC(16) AT(12) 	58			0													
+ILLUMINA-545855:0049:FC61RLR:2:1:10979:1695	TC(14) 	14			1	test_7	TTCTCCCACTATATTTTGCATTGG	test_8	TCCAGACTGAAGCTACCCTGG	TC(14) 	14	1			1	1	1	1
+ILLUMINA-545855:0049:FC61RLR:2:1:19063:1614	AT(14) AT(14) AT(14) AT(14) 	56			0													
+ILLUMINA-545855:0049:FC61RLR:2:1:6204:1090	TC(12) 	12			0													
+ILLUMINA-545855:0049:FC61RLR:2:1:8899:1514	AC(12) AC(12) 	24			1	test_2	TCTTTATCTAAACACATCCTGAAATACC	test_1	AAACGCAATTATTTTGAGATGTCC	AC(12) AC(12) 	24	1			1	2	1	1
diff -r 771ebe02636f -r b6ccc7dd7b02 test-data/illuminaPE_filtered_microsats_occurrences.out
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_filtered_microsats_occurrences.out	Fri Dec 04 07:43:30 2015 -0500
@@ -0,0 +1,4 @@
+readPairID	Motifs(bases)	Bases in all Motifs	Possible Extended	Possible Spanning	Primers found (1=y,0=n)	F Primer Name	Forward Primer	R Primer Name	Reverse Primer	Amplicon Motifs	Number motif bases in amplicon	Primers on sep reads	Extend with primers	Spand with primers	Occurances of Forward Primer in Reads	Occurances of Reverse Primer in Reads	Occurances of Amplifiable Primer Pair in Reads	Occurances of Amplifiable Primer Pair in PALs	R1_Sequence_ID	R1_Sequence	R2_Sequence_ID	R2_Sequence
+ILLUMINA-545855:49:FC61RLR:2:1:10979:1695	TC(14) 	14			1	test_7	TTCTCCCACTATATTTTGCATTGG	test_8	TCCAGACTGAAGCTACCCTGG	TC(14) 	14	1			1	1	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 1:N:0:TCCTGA	TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA	>ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 2:N:0:TCCTGA	TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA
+ILLUMINA-545855:49:FC61RLR:2:1:1978:1220	AC(12) 	12			1	test_3	GCAGTAAACAAAGGCAAAGGG	test_4	CCTGGGCAGAGGTGTTCC	AC(12) 	12	1			1	1	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 1:N:0:TCCTGA	TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA	>ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 2:N:0:TCCTGA	TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT
+ILLUMINA-545855:49:FC61RLR:2:1:8157:1636	AC(12) 	12			1	test_5	AAGTACAGTGGGGAGGCTGG	test_6	TTTTCTACACAGCTCAAGTAGCCC	AC(12) 	12	1			1	1	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 1:N:0:TCCTGA	TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT	>ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 2:N:0:TCCTGA	TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC
diff -r 771ebe02636f -r b6ccc7dd7b02 test-data/illuminaPE_filtered_microsats_primers.out
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_filtered_microsats_primers.out	Fri Dec 04 07:43:30 2015 -0500
@@ -0,0 +1,5 @@
+readPairID	Motifs(bases)	Bases in all Motifs	Possible Extended	Possible Spanning	Primers found (1=y,0=n)	F Primer Name	Forward Primer	R Primer Name	Reverse Primer	Amplicon Motifs	Number motif bases in amplicon	Primers on sep reads	Extend with primers	Spand with primers	Occurances of Forward Primer in Reads	Occurances of Reverse Primer in Reads	Occurances of Amplifiable Primer Pair in Reads	Occurances of Amplifiable Primer Pair in PALs	R1_Sequence_ID	R1_Sequence	R2_Sequence_ID	R2_Sequence
+ILLUMINA-545855:49:FC61RLR:2:1:10979:1695	TC(14) 	14			1	test_7	TTCTCCCACTATATTTTGCATTGG	test_8	TCCAGACTGAAGCTACCCTGG	TC(14) 	14	1			1	1	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 1:N:0:TCCTGA	TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA	>ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 2:N:0:TCCTGA	TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA
+ILLUMINA-545855:49:FC61RLR:2:1:1978:1220	AC(12) 	12			1	test_3	GCAGTAAACAAAGGCAAAGGG	test_4	CCTGGGCAGAGGTGTTCC	AC(12) 	12	1			1	1	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 1:N:0:TCCTGA	TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA	>ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 2:N:0:TCCTGA	TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT
+ILLUMINA-545855:49:FC61RLR:2:1:8157:1636	AC(12) 	12			1	test_5	AAGTACAGTGGGGAGGCTGG	test_6	TTTTCTACACAGCTCAAGTAGCCC	AC(12) 	12	1			1	1	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 1:N:0:TCCTGA	TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT	>ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 2:N:0:TCCTGA	TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC
+ILLUMINA-545855:49:FC61RLR:2:1:8899:1514	AC(12) AC(12) 	24			1	test_2	TCTTTATCTAAACACATCCTGAAATACC	test_1	AAACGCAATTATTTTGAGATGTCC	AC(12) AC(12) 	24	1			1	2	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 1:N:0:TCCTGA	TCTTTATCTAAACACATCCTGAAATACCATCTGTTACACACACACACAGCAGTGGAAGTATAAAAAAAAATCTGGACATCTCAAAATAATTGCGTTTCTGAAGTGTTACATTTTTC	>ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 2:N:0:TCCTGA	TATCATTGAAATTTTTATAAAAACTGTGAAGAGAAAAATGTAACACTTCAGAAACGCAATTATTTTGAGATGTCCAGATTTTTTTTTATACTTCCACTGCTGTGTGTGTGTGTAAC
diff -r 771ebe02636f -r b6ccc7dd7b02 test-data/illuminaPE_filtered_microsats_rankmotifs.out
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminaPE_filtered_microsats_rankmotifs.out	Fri Dec 04 07:43:30 2015 -0500
@@ -0,0 +1,8 @@
+readPairID	Motifs(bases)	Bases in all Motifs	Possible Extended	Possible Spanning	Primers found (1=y,0=n)	F Primer Name	Forward Primer	R Primer Name	Reverse Primer	Amplicon Motifs	Number motif bases in amplicon	Primers on sep reads	Extend with primers	Spand with primers	Occurances of Forward Primer in Reads	Occurances of Reverse Primer in Reads	Occurances of Amplifiable Primer Pair in Reads	Occurances of Amplifiable Primer Pair in PALs	R1_Sequence_ID	R1_Sequence	R2_Sequence_ID	R2_Sequence
+ILLUMINA-545855:49:FC61RLR:2:1:8157:1636	AC(12) 	12			1	test_5	AAGTACAGTGGGGAGGCTGG	test_6	TTTTCTACACAGCTCAAGTAGCCC	AC(12) 	12	1			1	1	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 1:N:0:TCCTGA	TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT	>ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 2:N:0:TCCTGA	TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC
+ILLUMINA-545855:49:FC61RLR:2:1:8044:1926	AT(12) 	12			0														>ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 1:N:0:TCCTGA	TAGATTTTTTTTTTTATATATATATAAATATAGATGTACATATATTTATATAAATATAAAAGCACAGCATCCTCCTGTCTCTCCTCCTGATTTATTATGGTTAAAGCTTGTGACAG	>ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 2:N:0:TCCTGA	TCAGGCAAGGTCACTGCCACCACTGGGGAGTGCCTGTTTCTGAAGGGCCCAGCCAACTCTGTCACAAGCTTTAACCATAATAAATCAGGAGGAGAGACAGGAGGATGCTGTGCTTT
+ILLUMINA-545855:49:FC61RLR:2:1:6204:1090	TC(12) 	12			0														>ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 1:N:0:TCCTGA	TGCTTTGGTTCTAAGAGAAAAACAATTATTATAAATGTTTATAATTGATGATAAGCATTTTTGTACAAAGCCAAGACCATTCTGAATGAAGCACCCAAAAAGCCCGGAGGCAACAA	>ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 2:N:0:TCCTGA	TGCTTTGGTTCTAAGAGAAAAACAAGTGATGCACAAGCAATTCCTCGCCACCACCCAACTGATGCCCAGCCACCCCCCCAAGCAGTGAAAGAGAGAGAGAGATGAACCCCCTTCAA
+ILLUMINA-545855:49:FC61RLR:2:1:5879:1238	AT(12) 	12			0														>ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 1:N:0:TCCTGA	TCCCCACCCTGTCATGGTTCTATGTTTTTGTTTTTGTTTTTGTTTTTATGGTTTCCGTATTCCACATTAAAACCTTATGTAACGTACGGGCCAATAAATAGTTACTCGCCATATCC	>ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 2:N:0:TCCTGA	TCCCCACCCTGTCATGGTTCTATGTATATATATATAGCCATGTGTGTGGTACCAGGGATAGGTACCTGGGATTGGGGCAGTGACACTTTAGTGCCCCGTACACTACATGATGTTTT
+ILLUMINA-545855:49:FC61RLR:2:1:1978:1220	AC(12) 	12			1	test_3	GCAGTAAACAAAGGCAAAGGG	test_4	CCTGGGCAGAGGTGTTCC	AC(12) 	12	1			1	1	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 1:N:0:TCCTGA	TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA	>ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 2:N:0:TCCTGA	TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT
+ILLUMINA-545855:49:FC61RLR:2:1:17449:1584	AC(36) 	36			0														>ILLUMINA-545855:49:FC61RLR:2:1:17449:1584 1:N:0:TCCTGA	TCGTAGCATGTGTATGCTTTGGGGTTTCATGCTGTTGATTCATAACTGCTGCTGGCTGTAGACTGAACCTTCTGGGTAGGAGGAATATGCTTAGACAAGCACACCAGTCAGCCCGA	>ILLUMINA-545855:49:FC61RLR:2:1:17449:1584 2:N:0:TCCTGA	TCTGTGTGTGAGCACACACACACACACACACACACACACACACACACATGCAGGTACTTGCTCTGCCACCCCTGGCGGGCTGCGTGGTGTGCCTGACGACGTATTCTAATCCTACA
+ILLUMINA-545855:49:FC61RLR:2:1:10979:1695	TC(14) 	14			1	test_7	TTCTCCCACTATATTTTGCATTGG	test_8	TCCAGACTGAAGCTACCCTGG	TC(14) 	14	1			1	1	1	1	>ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 1:N:0:TCCTGA	TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA	>ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 2:N:0:TCCTGA	TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA
diff -r 771ebe02636f -r b6ccc7dd7b02 test-data/illuminaPE_microsats.out
--- a/test-data/illuminaPE_microsats.out	Mon Mar 23 07:01:37 2015 -0400
+++ b/test-data/illuminaPE_microsats.out	Fri Dec 04 07:43:30 2015 -0500
@@ -1,11 +1,11 @@
 readPairID	Motifs(bases)	Bases in all Motifs	Possible Extended	Possible Spanning	Primers found (1=y,0=n)	F Primer Name	Forward Primer	R Primer Name	Reverse Primer	Amplicon Motifs	Number motif bases in amplicon	Primers on sep reads	Extend with primers	Spand with primers	Occurances of Forward Primer in Reads	Occurances of Reverse Primer in Reads	Occurances of Amplifiable Primer Pair in Reads	Occurances of Amplifiable Primer Pair in PALs
-ILLUMINA-545855_0049_FC61RLR:2:1:8044:1926#0	AT(12) 	12			0													
-ILLUMINA-545855_0049_FC61RLR:2:1:1978:1220#0	AC(12) 	12			1	test_3	GCAGTAAACAAAGGCAAAGGG	test_4	CCTGGGCAGAGGTGTTCC	AC(12) 	12	1			1	1	1	1
-ILLUMINA-545855_0049_FC61RLR:2:1:5879:1238#0	AT(12) 	12			0													
-ILLUMINA-545855_0049_FC61RLR:2:1:8899:1514#0	AC(12) AC(12) 	24			1	test_2	TCTTTATCTAAACACATCCTGAAATACC	test_1	AAACGCAATTATTTTGAGATGTCC	AC(12) AC(12) 	24	1			1	2	1	1
-ILLUMINA-545855_0049_FC61RLR:2:1:10979:1695#0	TC(14) 	14			1	test_7	TTCTCCCACTATATTTTGCATTGG	test_8	TCCAGACTGAAGCTACCCTGG	TC(14) 	14	1			1	1	1	1
-ILLUMINA-545855_0049_FC61RLR:2:1:5626:1554#0	AT(14) AC(16) AC(16) AT(12) 	58			0													
-ILLUMINA-545855_0049_FC61RLR:2:1:8157:1636#0	AC(12) 	12			1	test_5	AAGTACAGTGGGGAGGCTGG	test_6	TTTTCTACACAGCTCAAGTAGCCC	AC(12) 	12	1			1	1	1	1
-ILLUMINA-545855_0049_FC61RLR:2:1:19063:1614#0	AT(14) AT(14) AT(14) AT(14) 	56			0													
-ILLUMINA-545855_0049_FC61RLR:2:1:17449:1584#0	AC(36) 	36			0													
-ILLUMINA-545855_0049_FC61RLR:2:1:6204:1090#0	TC(12) 	12			0													
+ILLUMINA-545855:49:FC61RLR:2:1:10979:1695	TC(14) 	14			1	test_7	TTCTCCCACTATATTTTGCATTGG	test_8	TCCAGACTGAAGCTACCCTGG	TC(14) 	14	1			1	1	1	1
+ILLUMINA-545855:49:FC61RLR:2:1:17449:1584	AC(36) 	36			0													
+ILLUMINA-545855:49:FC61RLR:2:1:19063:1614	AT(14) AT(14) AT(14) AT(14) 	56			0													
+ILLUMINA-545855:49:FC61RLR:2:1:1978:1220	AC(12) 	12			1	test_3	GCAGTAAACAAAGGCAAAGGG	test_4	CCTGGGCAGAGGTGTTCC	AC(12) 	12	1			1	1	1	1
+ILLUMINA-545855:49:FC61RLR:2:1:5626:1554	AT(14) AC(16) AC(16) AT(12) 	58			0													
+ILLUMINA-545855:49:FC61RLR:2:1:5879:1238	AT(12) 	12			0													
+ILLUMINA-545855:49:FC61RLR:2:1:6204:1090	TC(12) 	12			0													
+ILLUMINA-545855:49:FC61RLR:2:1:8044:1926	AT(12) 	12			0													
+ILLUMINA-545855:49:FC61RLR:2:1:8157:1636	AC(12) 	12			1	test_5	AAGTACAGTGGGGAGGCTGG	test_6	TTTTCTACACAGCTCAAGTAGCCC	AC(12) 	12	1			1	1	1	1
+ILLUMINA-545855:49:FC61RLR:2:1:8899:1514	AC(12) AC(12) 	24			1	test_2	TCTTTATCTAAACACATCCTGAAATACC	test_1	AAACGCAATTATTTTGAGATGTCC	AC(12) AC(12) 	24	1			1	2	1	1
diff -r 771ebe02636f -r b6ccc7dd7b02 test-data/illuminaPE_r1.fq
--- a/test-data/illuminaPE_r1.fq	Mon Mar 23 07:01:37 2015 -0400
+++ b/test-data/illuminaPE_r1.fq	Fri Dec 04 07:43:30 2015 -0500
@@ -1,40 +1,40 @@
-@ILLUMINA-545855_0049_FC61RLR:2:1:10979:1695#0/1
+@ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 1:N:0:TCCTGA
 TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA
 +
-hhhhhghhhfgghhcfghhhhhhghhhhhhhhhhhhgfhhhhgfhhhhggghggfghhggdgghfgfcgcgggffgdf`gfcfdgdfdfafbdcaccfddecddbfcdfcdcdcdW
-@ILLUMINA-545855_0049_FC61RLR:2:1:17449:1584#0/1
+IIIIIHIIIGHHIIDGHIIIIIIHIIIIIIIIIIIIHGIIIIHGIIIIHHHIHHGHIIHHEHHIGHGDHDHHHGGHEGAHGDGEHEGEGBGCEDBDDGEEFDEECGDEGDEDEDE8
+@ILLUMINA-545855:49:FC61RLR:2:1:17449:1584 1:N:0:TCCTGA
 TCGTAGCATGTGTATGCTTTGGGGTTTCATGCTGTTGATTCATAACTGCTGCTGGCTGTAGACTGAACCTTCTGGGTAGGAGGAATATGCTTAGACAAGCACACCAGTCAGCCCGA
 +
-hhhhhhhhhhghhhhhfhhhhehhhhhhfhhhhfahgahhhghhhhghhgggdgeggedegdedhbgdffcacaccM\^^[`_^^^aaaacaaa^_bddd_aaa_a[VQ^Z_BBBB
-@ILLUMINA-545855_0049_FC61RLR:2:1:19063:1614#0/1
+IIIIIIIIIIHIIIIIGIIIIFIIIIIIGIIIIGBIHBIIIHIIIIHIIHHHEHFHHFEFHEFEICHEGGDBDBDD.=??<A@???BBBBDBBB?@CEEE@BBB@B<72?;@####
+@ILLUMINA-545855:49:FC61RLR:2:1:19063:1614 1:N:0:TCCTGA
 TATATATATATATACACATATATATATATATTTTTTACATTATTTCACTTCGCCCAAACTAGAGAGTCTAACAAAGTACAACCCAGCATATTAAAGTTCATCTCAGTTTTGTTCTG
 +
-hhhhhhhfhdhhhhfhhhgghghfhfhhhhhghhhhgfadhhhhghegghgehhhhegehghhgee]dddacfccZacWccaaccafgfggggded\cabcdddadadfgf`eg_d
-@ILLUMINA-545855_0049_FC61RLR:2:1:1978:1220#0/1
+IIIIIIIGIEIIIIGIIIHHIHIGIGIIIIIHIIIIHGBEIIIIHIFHHIHFIIIIFHFIHIIHFF>EEEBDGDD;BD8DDBBDDBGHGHHHHEFE=DBCDEEEBEBEGHGAFH@E
+@ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 1:N:0:TCCTGA
 TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA
 +
-cccccacaccaV^aaaTaa]P^[^WW]ccc^SGURUVZ]^Q[PUS\Z]W[Occc]]`U`^]ZZU]JU]][]SLWSWSWWWE_c__cc[cZc]]^[XccZUccb[ccZ[WW_BBBBB
-@ILLUMINA-545855_0049_FC61RLR:2:1:5626:1554#0/1
+DDDDDBDBDDB7?BBB5BB>1?<?88>DDD?4(6367;>?2<164=;>8<0DDD>>A6A?>;;6>+6>><>4-8484888&@D@@DD<D;D>>?<9DD;6DDC<DD;<88@#####
+@ILLUMINA-545855:49:FC61RLR:2:1:5626:1554 1:N:0:TCCTGA
 TCTAATATTATATATATCTGTGTGTGTATATATATATATACACACACACACACACATTGCTATTGTTAAATTAACCACCTTTTTATTTATAATACTATTATAACACTTATCTGTAT
 +
-R[\[[Zaddbb`d`ada_aY_VUVXQPX[Y]\a\__^^`XZa____BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-@ILLUMINA-545855_0049_FC61RLR:2:1:5879:1238#0/1
+3<=<<;BEECCAEABEB@B:@7679219<:>=B=@@??A9;B@@@@######################################################################
+@ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 1:N:0:TCCTGA
 TCCCCACCCTGTCATGGTTCTATGTTTTTGTTTTTGTTTTTGTTTTTATGGTTTCCGTATTCCACATTAAAACCTTATGTAACGTACGGGCCAATAAATAGTTACTCGCCATATCC
 +
-BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-@ILLUMINA-545855_0049_FC61RLR:2:1:6204:1090#0/1
+####################################################################################################################
+@ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 1:N:0:TCCTGA
 TGCTTTGGTTCTAAGAGAAAAACAATTATTATAAATGTTTATAATTGATGATAAGCATTTTTGTACAAAGCCAAGACCATTCTGAATGAAGCACCCAAAAAGCCCGGAGGCAACAA
 +
-BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-@ILLUMINA-545855_0049_FC61RLR:2:1:8044:1926#0/1
+####################################################################################################################
+@ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 1:N:0:TCCTGA
 TAGATTTTTTTTTTTATATATATATAAATATAGATGTACATATATTTATATAAATATAAAAGCACAGCATCCTCCTGTCTCTCCTCCTGATTTATTATGGTTAAAGCTTGTGACAG
 +
-gggggggggggggggegefegdeeceXQ\_\]ZZZ\gggfggggggggggggggggfgggeggggegecggggggggggggggggfggffggggggggggdggggfdgggded]da
-@ILLUMINA-545855_0049_FC61RLR:2:1:8157:1636#0/1
+HHHHHHHHHHHHHHHFHFGFHEFFDF92=@=>;;;=HHHGHHHHHHHHHHHHHHHHGHHHFHHHHFHFDHHHHHHHHHHHHHHHHGHHGGHHHHHHHHHHEHHHHGEHHHEFE>EB
+@ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 1:N:0:TCCTGA
 TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT
 +
-gggggggggggggegggggfggggggfggggfgggggg]ggffffeeggggfgfggeggggffafcb`b]bacaccbefdc_acca_aaaadbbc_[bb]b\^X\\_bdba\aaaW
-@ILLUMINA-545855_0049_FC61RLR:2:1:8899:1514#0/1
+HHHHHHHHHHHHHFHHHHHGHHHHHHGHHHHGHHHHHH>HHGGGGFFHHHHGHGHHFHHHHGGBGDCAC>CBDBDDCFGED@BDDB@BBBBECCD@<CC>C=?9==@CECB=BBB8
+@ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 1:N:0:TCCTGA
 TCTTTATCTAAACACATCCTGAAATACCATCTGTTACACACACACACAGCAGTGGAAGTATAAAAAAAAATCTGGACATCTCAAAATAATTGCGTTTCTGAAGTGTTACATTTTTC
 +
-hhhhhghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhfhhhhhfhhhhhhhhhhhhhhggfhhhhghgggghgggggggfgggggfegdgdggggggghh]
+IIIIIHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIIIGIIIIIIIIIIIIIIHHGIIIIHIHHHHIHHHHHHHGHHHHHGFHEHEHHHHHHHII>
diff -r 771ebe02636f -r b6ccc7dd7b02 test-data/illuminaPE_r2.fq
--- a/test-data/illuminaPE_r2.fq	Mon Mar 23 07:01:37 2015 -0400
+++ b/test-data/illuminaPE_r2.fq	Fri Dec 04 07:43:30 2015 -0500
@@ -1,40 +1,40 @@
-@ILLUMINA-545855_0049_FC61RLR:2:1:10979:1695#0/2
+@ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 2:N:0:TCCTGA
 TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA
 +
-hhhhhghhhhhghhhhghghhhhhhhghhhghhhghgfhhhhhhgdgggggggghhghhgeggdgghfggfgfhgfggef`fhdggfdfgaehecagggfcegacagffefcWda_
-@ILLUMINA-545855_0049_FC61RLR:2:1:17449:1584#0/2
+IIIIIHIIIIIHIIIIHIHIIIIIIIHIIIHIIIHIHGIIIIIIHEHHHHHHHHIIHIIHFHHEHHIGHHGHGIHGHHFGAGIEHHGEGHBFIFDBHHHGDFHBDBHGGFGD8EB@
+@ILLUMINA-545855:49:FC61RLR:2:1:17449:1584 2:N:0:TCCTGA
 TCTGTGTGTGAGCACACACACACACACACACACACACACACACACACATGCAGGTACTTGCTCTGCCACCCCTGGCGGGCTGCGTGGTGTGCCTGACGACGTATTCTAATCCTACA
 +
-fffff^bcbdbdaded`ffafdcfcff]cffccccaffffffedcafaR_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-@ILLUMINA-545855_0049_FC61RLR:2:1:19063:1614#0/2
+GGGGG?CDCECEBEFEAGGBGEDGDGG>DGGDDDDBGGGGGGFEDBGB3@##################################################################
+@ILLUMINA-545855:49:FC61RLR:2:1:19063:1614 2:N:0:TCCTGA
 TATATATATATATAAACATATATATATATATTTTTTTCTCATTTCAGAACAAAAGTGAGATGAACTTTAATATGGTGGGGTGTATTTTGAGAGACTCTCTAGTTTGGGAGGAGTGA
 +
-ccccccccccccYc_cJccccccccccccUccccc]`_YT]_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-@ILLUMINA-545855_0049_FC61RLR:2:1:1978:1220#0/2
+DDDDDDDDDDDD:D@D+DDDDDDDDDDDD6DDDDD>A@:5>@##########################################################################
+@ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 2:N:0:TCCTGA
 TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT
 +
-a^N^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-@ILLUMINA-545855_0049_FC61RLR:2:1:5626:1554#0/2
+B?/?################################################################################################################
+@ILLUMINA-545855:49:FC61RLR:2:1:5626:1554 2:N:0:TCCTGA
 TCTAATATTATATATATCTGTGTGTGTATATATATATATACACACACACACACACATTGACATAAAAGCGAAATATAAACATTAGCAGCTGGGGCTAAAATAAAAGCAGGAAGGTT
 +
-hhhhhhhhhhhhhhhhhhhhhhgggghghhghhghghghghhhhhghhfgQeWdQd\URUY^aa[^\\K`JL\\[W``dQ`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-@ILLUMINA-545855_0049_FC61RLR:2:1:5879:1238#0/2
+IIIIIIIIIIIIIIIIIIIIIIHHHHIHIIHIIHIHIHIHIIIIIHIIGH2F8E2E=636:?BB<?==,A+-==<8AAE2A###################################
+@ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 2:N:0:TCCTGA
 TCCCCACCCTGTCATGGTTCTATGTATATATATATAGCCATGTGTGTGGTACCAGGGATAGGTACCTGGGATTGGGGCAGTGACACTTTAGTGCCCCGTACACTACATGATGTTTT
 +
-gggggggggggggggggdggffffecgfgfggggggegggggfgdgdgdadgdeaWdddedc`fdcabdaaa_]adb]_a_cbaaadbaa[`dbaaab]a`]a[Za`_`_BBBBBB
-@ILLUMINA-545855_0049_FC61RLR:2:1:6204:1090#0/2
+HHHHHHHHHHHHHHHHHEHHGGGGFDHGHGHHHHHHFHHHHHGHEHEHEBEHEFB8EEEFEDAGEDBCEBBB@>BEC>@B@DCBBBECBB<AECBBBC>BA>B<;BA@A@######
+@ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 2:N:0:TCCTGA
 TGCTTTGGTTCTAAGAGAAAAACAAGTGATGCACAAGCAATTCCTCGCCACCACCCAACTGATGCCCAGCCACCCCCCCAAGCAGTGAAAGAGAGAGAGAGATGAACCCCCTTCAA
 +
-gggcagggdefggggdgegdgccccc_ggdggddgdeedddfcdffffdfda]dab]______aa_edaeaaa_`]```[Z]`]ZR]\^^]]aa]^]_^P^]YXI_BBBBBBBBBB
-@ILLUMINA-545855_0049_FC61RLR:2:1:8044:1926#0/2
+HHHDBHHHEFGHHHHEHFHEHDDDDD@HHEHHEEHEFFEEEGDEGGGGEGEB>EBC>@@@@@@BB@FEBFBBB@A>AAA<;>A>;3>=??>>BB>?>@?1?>:9*@##########
+@ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 2:N:0:TCCTGA
 TCAGGCAAGGTCACTGCCACCACTGGGGAGTGCCTGTTTCTGAAGGGCCCAGCCAACTCTGTCACAAGCTTTAACCATAATAAATCAGGAGGAGAGACAGGAGGATGCTGTGCTTT
 +
-hhhhhhhhhhghhhhhhhhhhhhhhfhhhhehhhhhfgghhhhhffdhgfgfgggffbggbffffffgfgfdfdfdfffcfadbbaffdcfaZW^aaaac`ab_YR\\Z\Y[ROYU
-@ILLUMINA-545855_0049_FC61RLR:2:1:8157:1636#0/2
+IIIIIIIIIIHIIIIIIIIIIIIIIGIIIIFIIIIIGHHIIIIIGGEIHGHGHHHGGCHHCGGGGGGHGHGEGEGEGGGDGBECCBGGEDGB;8?BBBBDABC@:3==;=:<30:6
+@ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 2:N:0:TCCTGA
 TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC
 +
-geggggggggggggcgfggcggggggggggggggggggfffggfgggggggggggfggggg_gggegfgeggdggggggcgaedaageecgd]degadecBBBBBBBBBBBBBBBB
-@ILLUMINA-545855_0049_FC61RLR:2:1:8899:1514#0/2
+HFHHHHHHHHHHHHDHGHHDHHHHHHHHHHHHHHHHHHGGGHHGHHHHHHHHHHHGHHHHH@HHHFHGHFHHEHHHHHHDHBFEBBHFFDHE>EFHBEFD################
+@ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 2:N:0:TCCTGA
 TATCATTGAAATTTTTATAAAAACTGTGAAGAGAAAAATGTAACACTTCAGAAACGCAATTATTTTGAGATGTCCAGATTTTTTTTTATACTTCCACTGCTGTGTGTGTGTGTAAC
 +
-hfJfffhhhhhhhhhhchhhhhhfgghhghhhhhdfghghghhghhhhhhhhhhhhhhghhchhhhhdchhhchgfgehhhhhhhhhgheeagfhfaffgacaedfdfbfdhdcda
+IG+GGGIIIIIIIIIIDIIIIIIGHHIIHIIIIIEGHIHIHIIHIIIIIIIIIIIIIIHIIDIIIIIEDIIIDIHGHFIIIIIIIIIHIFFBHGIGBGGHBDBFEGEGCGEIEDEB
diff -r 771ebe02636f -r b6ccc7dd7b02 tool_dependencies.xml
--- a/tool_dependencies.xml	Mon Mar 23 07:01:37 2015 -0400
+++ b/tool_dependencies.xml	Fri Dec 04 07:43:30 2015 -0500
@@ -64,4 +64,39 @@
       </install>
       <readme>primer3_core</readme>
     </package>
+    <package name="biopython" version="1.65">
+      <install version="1.0">
+	<actions>
+	  <action type="shell_command">pip install  --install-option "--prefix=$INSTALL_DIR" https://pypi.python.org/packages/source/b/biopython/biopython-1.65.tar.gz</action>
+	  <action type="set_environment">
+	    <environment_variable name="PYTHONPATH" action="prepend_to">$INSTALL_DIR/lib/python2.7/site-packages</environment_variable>
+	    <environment_variable name="PYTHONPATH" action="prepend_to">$INSTALL_DIR/lib64/python2.7/site-packages</environment_variable>
+	  </action>
+	</actions>
+      </install>
+      <readme>BioPython 1.65</readme>
+    </package>
+    <package name="pandaseq" version="2.8.1">
+      <install version="1.0">
+	<actions>
+	  <action type="download_by_url">https://github.com/neufeld/pandaseq/archive/v2.8.1.tar.gz</action>
+	  <action type="shell_command">./autogen.sh</action>
+	  <action type="shell_command">./configure --prefix=$INSTALL_DIR</action>
+	  <action type="shell_command">make</action>
+	  <action type="shell_command">make install</action>
+	  <action type="set_environment">
+	    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+	    <environment_variable name="LD_LIBRARY_PATH" action="prepend_to">$INSTALL_DIR/lib</environment_variable>
+	  </action>
+	</actions>
+      </install>
+      <readme>PANDASeq 2.8.1
+
+      PANDASEQ is a program to align Illumina reads, optionally
+      with PCR primers embedded in the sequence, and reconstruct
+      an overlapping sequence.
+
+      https://github.com/neufeld/pandaseq
+      </readme>
+    </package>
 </tool_dependency>