# HG changeset patch
# User artbio
# Date 1504523608 14400
# Node ID a8aacccd79a381f0e32afab3340e96bd29490238
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
diff -r 000000000000 -r a8aacccd79a3 sequence_format_converter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sequence_format_converter.py Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,208 @@
+#!/usr/bin/env python
+#
+import argparse
+import logging
+import sys
+from collections import defaultdict
+
+
+def Parser():
+ the_parser = argparse.ArgumentParser()
+ the_parser.add_argument(
+ '--input', action="store", type=str,
+ help="input file, accepted format: fastq, fasta, fasta_weigthed, \
+ tabular")
+ the_parser.add_argument(
+ '--output', action="store", type=str, help="output converted file")
+ the_parser.add_argument(
+ '--format', action="store", type=str,
+ help="select output format (fasta, fasta_weigthed, tabular")
+ args = the_parser.parse_args()
+ return args
+
+
+class Sequencing:
+
+ def __init__(self, input, output, format):
+ self.input = input
+ self.output = open(output, 'w')
+ self.outputformat = format
+ self.inputformat = self.detectformat(self.input)
+ self.seqdic = defaultdict(int)
+ self.read(self.input, self.inputformat)
+ self.write(self.output, self.outputformat)
+
+ def detectformat(self, input):
+ input = open(input, 'r')
+ block = []
+ reference = ['A', 'T', 'G', 'C', 'N']
+ format = ''
+ try:
+ for l in range(4):
+ block.append(input.readline()[:-1])
+ except:
+ logging.info("File hasn't at leat four lines !")
+ sys.exit("File hasn't at leat four lines !")
+ input.close()
+ line1, line2, line3, line4 = block[0], block[1], block[2], block[3]
+ if line1[0] == '>' and line3[0] == '>':
+ logging.info("'>' detected in lines 1 and 3")
+ sequence = ''.join([line2, line4]).upper()
+ nucleotides = set([base for base in sequence])
+ for nucleotide in nucleotides:
+ if nucleotide not in reference:
+ logging.info("But other nucleotides that A, T, G, C or N")
+ sys.exit('input appears to be Fasta but with \
+ unexpected nucleotides')
+ format = 'fasta'
+ elif line1[0] == '>' and line4[0] == '>':
+ logging.info("'>' detected in lines 1 and 4")
+ sequence = ''.join([line2, line3]).upper()
+ nucleotides = set([base for base in sequence])
+ for nucleotide in nucleotides:
+ if nucleotide not in reference:
+ logging.info("But other nucleotides that A, T, G, C or N")
+ sys.exit('input appears to be Fasta but with \
+ unexpected nucleotides')
+ format = 'fasta'
+ elif line1[0] == '>':
+ logging.info("'>' detected in lines 1")
+ sequence = ''.join([line2, line3, line4]).upper()
+ nucleotides = set([base for base in sequence])
+ for nucleotide in nucleotides:
+ if nucleotide not in reference:
+ logging.info("But other nucleotides that A, T, G, C or N")
+ sys.exit('input appears to be Fasta but with \
+ unexpected nucleotides')
+ format = 'fasta'
+ if format == 'fasta':
+ try:
+ for line in block:
+ if line[0] == '>':
+ int(line.split('_')[-1])
+ return 'fastaw'
+ except:
+ return 'fasta'
+ if line1[0] == '@' and line3[0] == '+':
+ nucleotides = set([base for base in line2])
+ for nucleotide in nucleotides:
+ if nucleotide not in reference:
+ logging.info("Looks like fastq input but other nucleotides \
+ that A, T, G, C or N")
+ sys.exit("input appears to be Fastq \
+ but with unexpected nucleotides")
+ return 'fastq'
+ for line in block:
+ if len(line.split('\t')) != 2:
+ logging.info("No valid format detected")
+ sys.exit('No valid format detected')
+ try:
+ int(line.split('\t')[-1])
+ except:
+ logging.info("No valid format detected")
+ sys.exit('No valid format detected')
+ for nucleotide in line.split('\t')[0]:
+ if nucleotide not in reference:
+ logging.info("No valid format detected")
+ sys.exit('No valid format detected')
+ return 'tabular'
+
+ def read(self, input, format):
+ input = open(input, 'r')
+ if format == 'fasta':
+ try:
+ self.readfasta(input)
+ except:
+ logging.info("an error occured while reading fasta")
+ elif format == 'fastaw':
+ try:
+ self.readfastaw(input)
+ except:
+ logging.info("an error occured while reading fastaw")
+ elif format == 'tabular':
+ try:
+ self.readtabular(input)
+ except:
+ logging.info("an error occured while reading tabular")
+ elif format == 'fastq':
+ try:
+ self.readfastq(input)
+ except:
+ logging.info("an error occured while reading fastq")
+ else:
+ logging.info("no valid format detected")
+ sys.exit('No valid format detected')
+
+ def readfastaw(self, input):
+ for line in input:
+ if line[0] == ">":
+ weigth = int(line[:-1].split("_")[-1])
+ else:
+ self.seqdic[line[:-1]] += weigth
+ input.close()
+
+ def readfasta(self, input):
+ ''' this method is able to read multi-line fasta sequence'''
+ for line in input:
+ if line[0] == ">":
+ try:
+ # to dump the sequence of the previous item
+ # try because of first missing stringlist variable
+ self.seqdic["".join(stringlist)] += 1
+ except NameError:
+ pass
+ stringlist = []
+ else:
+ try:
+ stringlist.append(line[:-1])
+ except UnboundLocalError:
+ # if file went through filter and contains only empty lines
+ logging.info("first line is empty.")
+ try:
+ self.seqdic["".join(stringlist)] += 1 # for the last sequence
+ except NameError:
+ logging.info("input file has not fasta sequences.")
+ input.close()
+
+ def readtabular(self, input):
+ for line in input:
+ fields = line[:-1].split('\t')
+ self.seqdic[fields[0]] += int(fields[1])
+ input.close()
+
+ def readfastq(self, input):
+ linecount = 0
+ for line in input:
+ linecount += 1
+ if linecount % 4 == 2:
+ self.seqdic[line[:-1]] += 1
+ input.close()
+
+ def write(self, output, format='fasta'):
+ if format == 'fasta':
+ headercount = 0
+ for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True):
+ for i in range(self.seqdic[seq]):
+ headercount += 1
+ output.write('>%s\n%s\n' % (headercount, seq))
+ elif format == 'fastaw':
+ headercount = 0
+ for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True):
+ headercount += 1
+ output.write('>%s_%s\n%s\n' % (headercount,
+ self.seqdic[seq], seq))
+ elif format == 'tabular':
+ for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True):
+ output.write('%s\t%s\n' % (seq, self.seqdic[seq]))
+ output.close()
+
+
+def main(input, output, format):
+ Sequencing(input, output, format)
+
+
+if __name__ == "__main__":
+ args = Parser()
+ log = logging.getLogger(__name__)
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+ main(args.input, args.output, args.format)
diff -r 000000000000 -r a8aacccd79a3 sequence_format_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sequence_format_converter.xml Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,169 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+The tool performs all pairwise conversions between sequence formats fasta, fastaw and tabular.
+
+The tool is also able to convert fastq format in any of the formats fasta, fastaw and tabular.
+
+The format of the input is automatically detected by the tool.
+
+**Formats**
+
+*Fasta*
+
+>id1
+
+ATGCATGACCAGATAGGAC
+
+>id2
+
+ATGCATGACCAGATAGGAC
+
+Note that the tool handles fasta sequences over multiple lines
+
+
+----------
+
+*Fastaw*
+
+Allows to reduce the size of a fasta file of sequence reads:
+
+>id1_n1
+
+ATGCATGACCAGATAGGAC
+
+>id2_n2
+
+ATGCATGACCAGATAGGAC
+
+etc...
+
+Here n1 and n2 are integers that indicate the number of reads of the sequence found in the sequencing dataset
+
+Note that if 2 fastaw files are merged (e.g. by concatenation), the values of the number of reads are wrong.
+
+These values can simply be re-computed by submitting the merged file to a fastaw conversion with the *sequence_format_converter* tool !
+
+
+----------
+
+*Tabular*
+
+Is a tabular version of fastaw without fasta headers:
+
+column 1 column2
+
+ATGCATGACCAGATAGGAC n1
+
+ATGCATGACCAGATAGGAC n2
+
+
+----------
+
+*Fastq*
+
+@HWI-1
+
+ATGCATGACCAGATAGGAC
+
+\+
+
+BBBA;ACB9ABCBABB@@/
+
+@HWI-2
+
+ATGCATGACCAGATAGGAC
+
+\+
+
+?03@?82?B>C@B>@CC?0
+
+
+
+
+
diff -r 000000000000 -r a8aacccd79a3 test-data/fastqTofasta.sorted.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastqTofasta.sorted.fa Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,500 @@
+>1
+>10
+>100
+>101
+>102
+>103
+>104
+>105
+>106
+>107
+>108
+>109
+>11
+>110
+>111
+>112
+>113
+>114
+>115
+>116
+>117
+>118
+>119
+>12
+>120
+>121
+>122
+>123
+>124
+>125
+>126
+>127
+>128
+>129
+>13
+>130
+>131
+>132
+>133
+>134
+>135
+>136
+>137
+>138
+>139
+>14
+>140
+>141
+>142
+>143
+>144
+>145
+>146
+>147
+>148
+>149
+>15
+>150
+>151
+>152
+>153
+>154
+>155
+>156
+>157
+>158
+>159
+>16
+>160
+>161
+>162
+>163
+>164
+>165
+>166
+>167
+>168
+>169
+>17
+>170
+>171
+>172
+>173
+>174
+>175
+>176
+>177
+>178
+>179
+>18
+>180
+>181
+>182
+>183
+>184
+>185
+>186
+>187
+>188
+>189
+>19
+>190
+>191
+>192
+>193
+>194
+>195
+>196
+>197
+>198
+>199
+>2
+>20
+>200
+>201
+>202
+>203
+>204
+>205
+>206
+>207
+>208
+>209
+>21
+>210
+>211
+>212
+>213
+>214
+>215
+>216
+>217
+>218
+>219
+>22
+>220
+>221
+>222
+>223
+>224
+>225
+>226
+>227
+>228
+>229
+>23
+>230
+>231
+>232
+>233
+>234
+>235
+>236
+>237
+>238
+>239
+>24
+>240
+>241
+>242
+>243
+>244
+>245
+>246
+>247
+>248
+>249
+>25
+>250
+>26
+>27
+>28
+>29
+>3
+>30
+>31
+>32
+>33
+>34
+>35
+>36
+>37
+>38
+>39
+>4
+>40
+>41
+>42
+>43
+>44
+>45
+>46
+>47
+>48
+>49
+>5
+>50
+>51
+>52
+>53
+>54
+>55
+>56
+>57
+>58
+>59
+>6
+>60
+>61
+>62
+>63
+>64
+>65
+>66
+>67
+>68
+>69
+>7
+>70
+>71
+>72
+>73
+>74
+>75
+>76
+>77
+>78
+>79
+>8
+>80
+>81
+>82
+>83
+>84
+>85
+>86
+>87
+>88
+>89
+>9
+>90
+>91
+>92
+>93
+>94
+>95
+>96
+>97
+>98
+>99
+AATGGCACTGGAAGAATTCACGG
+AATGGCACTGGAAGAATTCACGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGGT
+AATGGCACTGGAAGAATTCACGTG
+AATTGCACTAGTCCCGGCCTG
+ACTGAATTCTCGTGGGTCTGCAT
+AGGACGGGAAGGTGTCAACG
+ATAAAGCTAGATTACCAAAGCAT
+CAAATTCGGTTCTAGAGAGGTT
+CGAATAGCGTTGTGACTGA
+CGGACGGTATATGGGTTAATATT
+CGGATGATGGTTCACAACGACC
+CGGCACATGTTGAAGTACACTCA
+CGGCACATGTTGAAGTACACTCA
+CGGCACATGTTGAAGTACACTCAA
+CGGCACATGTTGAAGTACACTCAA
+CTGACTAGATCCACACTCATTA
+GGACGGAGAACTGATAAGGGCTCGG
+GGCGAACATGGATCTAGTGCACG
+GGGAGCGAGACGGGGACTCAC
+GGGAGCGAGACGGGGACTCACT
+GGGAGCGAGACGGGGACTCACT
+TAAAGCTAGATTACCAAAGCAT
+TAAAGCTAGATTACCAAAGCAT
+TAAAGCTAGATTACCAAAGCAT
+TAAGGAAATAGTAGCCGTGAT
+TAAGGAAATAGTAGCCGTGAT
+TAAGGAAATAGTAGCCGTGAT
+TAGCACCACATGATTCGGCT
+TAGGAACTTCATACCGTGCTCT
+TAGGAACTTCATACCGTGCTCT
+TATCACAGCCAGCTTTGAGGAG
+TATCACAGCCATTTTGACGAGTT
+TATCACAGCCATTTTGACGAGTT
+TATTGCACTTGAGACGGCCTTA
+TCAGGTACCTGAAGTAGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCGTTAT
+TCTTTGGTATTCTAGCTGTAGA
+TGACTAGATCCACACTCATTA
+TGACTAGATCCACACTCATTAA
+TGACTAGATCCACACTCATTAA
+TGACTAGATCCACACTCATTAC
+TGACTAGATTCACACTCATTA
+TGGAATGTAAAGAAGAATGGAG
+TGGAATGTAAAGAAGTATGG
+TGGAATGTAAAGAAGTATGG
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAGA
+TGGAATGTAAAGAAGTATGGAGA
+TGGAATGTAAAGAAGTATGGAGT
+TGGAATGTAAAGAATTATGGAG
+TGGAATGTAAAGGAGTATGGAG
+TGGACGGAGAACTGATAAGG
+TGGACGGAGAACTGATAAGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAAA
+TGGACGGAGAACTGATAAGGGCAT
+TGGACGGAGAACTGATAAGGGCT
+TGGACGGAGAACTGATAAGGGCTT
+TGGACGGAGAACTGATAAGGGT
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAAA
+TGTGATGTGACGTAGTGGAAA
+TGTGATGTGACGTAGTGGAAC
diff -r 000000000000 -r a8aacccd79a3 test-data/fastqTofastaw.sorted.faw
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastqTofastaw.sorted.faw Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,110 @@
+>10_3
+>11_3
+>12_2
+>13_2
+>14_2
+>15_2
+>16_2
+>17_2
+>18_2
+>19_2
+>1_86
+>20_2
+>21_2
+>22_2
+>23_1
+>24_1
+>25_1
+>26_1
+>27_1
+>28_1
+>29_1
+>2_43
+>30_1
+>31_1
+>32_1
+>33_1
+>34_1
+>35_1
+>36_1
+>37_1
+>38_1
+>39_1
+>3_16
+>40_1
+>41_1
+>42_1
+>43_1
+>44_1
+>45_1
+>46_1
+>47_1
+>48_1
+>49_1
+>4_10
+>50_1
+>51_1
+>52_1
+>53_1
+>54_1
+>55_1
+>5_9
+>6_8
+>7_7
+>8_6
+>9_4
+AATGGCACTGGAAGAATTCACGG
+AATGGCACTGGAAGAATTCACGGG
+AATGGCACTGGAAGAATTCACGGGT
+AATGGCACTGGAAGAATTCACGTG
+AATTGCACTAGTCCCGGCCTG
+ACTGAATTCTCGTGGGTCTGCAT
+AGGACGGGAAGGTGTCAACG
+ATAAAGCTAGATTACCAAAGCAT
+CAAATTCGGTTCTAGAGAGGTT
+CGAATAGCGTTGTGACTGA
+CGGACGGTATATGGGTTAATATT
+CGGATGATGGTTCACAACGACC
+CGGCACATGTTGAAGTACACTCA
+CGGCACATGTTGAAGTACACTCAA
+CTGACTAGATCCACACTCATTA
+GGACGGAGAACTGATAAGGGCTCGG
+GGCGAACATGGATCTAGTGCACG
+GGGAGCGAGACGGGGACTCAC
+GGGAGCGAGACGGGGACTCACT
+TAAAGCTAGATTACCAAAGCAT
+TAAGGAAATAGTAGCCGTGAT
+TAGCACCACATGATTCGGCT
+TAGGAACTTCATACCGTGCTCT
+TATCACAGCCAGCTTTGAGGAG
+TATCACAGCCATTTTGACGAGTT
+TATTGCACTTGAGACGGCCTTA
+TCAGGTACCTGAAGTAGCG
+TCAGGTACCTGAAGTAGCGCGCG
+TCAGGTACCTGAAGTAGCGCGCGTTAT
+TCTTTGGTATTCTAGCTGTAGA
+TGACTAGATCCACACTCATTA
+TGACTAGATCCACACTCATTAA
+TGACTAGATCCACACTCATTAC
+TGACTAGATTCACACTCATTA
+TGGAATGTAAAGAAGAATGGAG
+TGGAATGTAAAGAAGTATGG
+TGGAATGTAAAGAAGTATGGA
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAGA
+TGGAATGTAAAGAAGTATGGAGT
+TGGAATGTAAAGAATTATGGAG
+TGGAATGTAAAGGAGTATGGAG
+TGGACGGAGAACTGATAAGG
+TGGACGGAGAACTGATAAGGG
+TGGACGGAGAACTGATAAGGGC
+TGGACGGAGAACTGATAAGGGCA
+TGGACGGAGAACTGATAAGGGCAA
+TGGACGGAGAACTGATAAGGGCAAA
+TGGACGGAGAACTGATAAGGGCAT
+TGGACGGAGAACTGATAAGGGCT
+TGGACGGAGAACTGATAAGGGCTT
+TGGACGGAGAACTGATAAGGGT
+TGTGATGTGACGTAGTGGAA
+TGTGATGTGACGTAGTGGAAA
+TGTGATGTGACGTAGTGGAAC
diff -r 000000000000 -r a8aacccd79a3 test-data/fastqTotabular.sorted.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastqTotabular.sorted.tab Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,55 @@
+AATGGCACTGGAAGAATTCACGG 2
+AATGGCACTGGAAGAATTCACGGG 4
+AATGGCACTGGAAGAATTCACGGGT 1
+AATGGCACTGGAAGAATTCACGTG 1
+AATTGCACTAGTCCCGGCCTG 1
+ACTGAATTCTCGTGGGTCTGCAT 1
+AGGACGGGAAGGTGTCAACG 1
+ATAAAGCTAGATTACCAAAGCAT 1
+CAAATTCGGTTCTAGAGAGGTT 1
+CGAATAGCGTTGTGACTGA 1
+CGGACGGTATATGGGTTAATATT 1
+CGGATGATGGTTCACAACGACC 1
+CGGCACATGTTGAAGTACACTCA 2
+CGGCACATGTTGAAGTACACTCAA 2
+CTGACTAGATCCACACTCATTA 1
+GGACGGAGAACTGATAAGGGCTCGG 1
+GGCGAACATGGATCTAGTGCACG 1
+GGGAGCGAGACGGGGACTCAC 1
+GGGAGCGAGACGGGGACTCACT 2
+TAAAGCTAGATTACCAAAGCAT 3
+TAAGGAAATAGTAGCCGTGAT 3
+TAGCACCACATGATTCGGCT 1
+TAGGAACTTCATACCGTGCTCT 2
+TATCACAGCCAGCTTTGAGGAG 1
+TATCACAGCCATTTTGACGAGTT 2
+TATTGCACTTGAGACGGCCTTA 1
+TCAGGTACCTGAAGTAGCG 1
+TCAGGTACCTGAAGTAGCGCGCG 10
+TCAGGTACCTGAAGTAGCGCGCGTTAT 1
+TCTTTGGTATTCTAGCTGTAGA 1
+TGACTAGATCCACACTCATTA 1
+TGACTAGATCCACACTCATTAA 2
+TGACTAGATCCACACTCATTAC 1
+TGACTAGATTCACACTCATTA 1
+TGGAATGTAAAGAAGAATGGAG 1
+TGGAATGTAAAGAAGTATGG 2
+TGGAATGTAAAGAAGTATGGA 7
+TGGAATGTAAAGAAGTATGGAG 43
+TGGAATGTAAAGAAGTATGGAGA 2
+TGGAATGTAAAGAAGTATGGAGT 1
+TGGAATGTAAAGAATTATGGAG 1
+TGGAATGTAAAGGAGTATGGAG 1
+TGGACGGAGAACTGATAAGG 2
+TGGACGGAGAACTGATAAGGG 6
+TGGACGGAGAACTGATAAGGGC 86
+TGGACGGAGAACTGATAAGGGCA 8
+TGGACGGAGAACTGATAAGGGCAA 9
+TGGACGGAGAACTGATAAGGGCAAA 1
+TGGACGGAGAACTGATAAGGGCAT 1
+TGGACGGAGAACTGATAAGGGCT 1
+TGGACGGAGAACTGATAAGGGCTT 1
+TGGACGGAGAACTGATAAGGGT 1
+TGTGATGTGACGTAGTGGAA 16
+TGTGATGTGACGTAGTGGAAA 2
+TGTGATGTGACGTAGTGGAAC 1
diff -r 000000000000 -r a8aacccd79a3 test-data/input.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fa Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,500 @@
+>1
+TAGTTACCTTCATATCTCTCTTTA
+>2
+TCTATTCATACAAAACACTAATACCC
+>3
+ACAACCTCAACTCATATTT
+>4
+TATAATTTTATTTTATATTTTCTCT
+>5
+TCTTCTATATAATCCTTTATTATAA
+>6
+TAAAATAAACCAAAACCCAAAAAT
+>7
+AATCTACAATTTCCATTACGACTCC
+>8
+TTTCCGACAAATACACCATCTTC
+>9
+CAGATTCACTGATTTTCTTACGCC
+>10
+CAAGAATACAAAAAATACTAATTA
+>11
+AACTCTCTAATTTAACTTTGTGC
+>12
+AAAAACACACAGACACAAGCAGCAAT
+>13
+TTACTAGATCCACCCTCATTA
+>14
+ATGTTATTTACTTTTTCCCCTTATA
+>15
+CGATATTTTCTCCTCTTACC
+>16
+TACAGAGAAATATACAACACTCACC
+>17
+ATGAAATTCGAACAATACGTC
+>18
+GAGAATAAATATTTCAATGGTCTATTG
+>19
+TGCTTTTACTACATATTTTTTATTTTTTTA
+>20
+CATACCTTAAATTATCTCTTTCTT
+>21
+TTTGTTTTTCATTTTTTTATCTTT
+>22
+TTATTATCTATTTTAATTTTTCTTAA
+>23
+TATTATCATCTCGTTCTTCCTTCTC
+>24
+TCAACTGGCAAGAATTTTTGAAAATT
+>25
+TACAAATTTTTTTTTCTTTCTTAT
+>26
+TTTTCTTTTTTTCATTTTCTCTTTTA
+>27
+AAGATGGAGTAGTTTTTT
+>28
+TCTCATCTTACAATTTTTTAAAACTT
+>29
+CATACCTACAAAAAAGCTTCTCTTAC
+>30
+TTTTATATTTCCTTATATCTTTACTA
+>31
+GGAGATTGTAGAACGAAAGGAAAAT
+>32
+TCTATTTCTTTATTTTTTTTATTAT
+>33
+CGGACGGTATATTTTTTAATATAA
+>34
+TTCTTGGACTACACATTTTTTATTGTTTTA
+>35
+TACTATATACTTCTTCAAATCACA
+>36
+ATTTTACTTCATCATTTTC
+>37
+TATTTCCAACCTTCAACCTCAAATAA
+>38
+CACGACTTTATTCTTTTTATCTCA
+>39
+TTCTTTTACTACATATTTTTTATTTTTTTA
+>40
+TTTATTACAACCCTATCTTACCTCAA
+>41
+CGATATTTTCTCCTCGTACC
+>42
+TAATTACCATTGCTAACTATCCA
+>43
+CATTAATTCATCCATTTAAACTAA
+>44
+GAAACAAACAACACATACCCTCTGGC
+>45
+TACTTTTTTCTTAATTTTTTATTAAAC
+>46
+TAACTTTAACTTTTTTACT
+>47
+TTCTTTTACTACATATTGTTTATTTTTTTA
+>48
+TCTAGTCTGAGCGTAGTACCAGATTG
+>49
+TTTTTTATCAATTTTCACCATTCAT
+>50
+AATGACACACTCTTCATCAAC
+>51
+TAACATAAATTTTAATCATAAATTG
+>52
+TCTATATTATTTTTATCAATTTTCACC
+>53
+TCCCAACCCTCGAGCATCATTTTC
+>54
+TAGTCATACATACCTAATTATACATA
+>55
+TACAAAAAATGCGAAAATTGACCCT
+>56
+GAGAACTTTTAATCATTTTAC
+>57
+TCTTATTTTAATCTTCCAATTTC
+>58
+CGGCACATGTTGAATTACACTCA
+>59
+CAGTTTCACAAAAGATCTTTTAA
+>60
+GCCAACGACCATACCACGA
+>61
+CAAATAACAAACTGAATAAACGAAA
+>62
+TAGTTTCACTACTTTATTCTTTTTA
+>63
+TGAGGAAAACAGAAAAATGAGAGACA
+>64
+TATATAAATCTTCAACATCAA
+>65
+TGATTTACTTACATTCTTTTTTT
+>66
+CTTCTTTTACTACATATTTTTTATTTTTTTA
+>67
+TACTGAAAACGGGCGCATATCAGTGG
+>68
+TATTCAATCACTCCATTATATATAACA
+>69
+TATATTGCCTCCCCATAATCCTT
+>70
+TCGCCGTAAAGCCAGTCGTTCTCC
+>71
+TTTAAACACTTCCTACATCAAATTTC
+>72
+TTTGTTTTTTACTATATTT
+>73
+TCTTTTTTTTAATACTTATTTTCATT
+>74
+TAAGTTTTTAATCATTTTTTTT
+>75
+TGAGAATGACTTCTTCACGATCTCTT
+>76
+AATTTATTTAATTTATATTCTAACTAA
+>77
+AAAAAATATCTTTTTTAACTCGTGGCC
+>78
+AAGACAACAATGACATATAAGACG
+>79
+TGAGCTAGAACTGCACCCACTCCA
+>80
+ACTAAACTTTTCTTACCATATTTCTA
+>81
+TATTTCACTTTATACTTCCTTAA
+>82
+TATCTATCTTTGATCTTCTTTTCA
+>83
+TTTTTATCAATTTTCACCATTC
+>84
+TAAAAATAATTGTCTTTAATTTCA
+>85
+TTAAAGACGCAACAACTAACATT
+>86
+TAACCTTGCAGAACTATACGATTCAAA
+>87
+TACTATTTTATTATACATACATACATTA
+>88
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA
+>89
+AATGTCACTTGAAGAATTCACGT
+>90
+TAATAAGAAACTGTTCAAACAATCCAC
+>91
+AAGCACGCCTTACCACAATTTATAA
+>92
+CCATATATGACTGACTCATTTCAC
+>93
+TTAATATGTAATTTCATACCTCAC
+>94
+AGGATTTTTAAGCCCATATGTTTCC
+>95
+ACAGCAGGACGGTGATCA
+>96
+TGATGACGGGCAGCAGGGATTTTC
+>97
+TTGTAAAACATTCTTTCTCCTGAC
+>98
+TTCTTCCTTTTATCCTCTCTTAA
+>99
+ATTATTAATAAATTATTATAA
+>100
+CTGGAAACTATTGATCAAATT
+>101
+TACAACTAACATCCTTTCTTCTTCC
+>102
+TCAAATGCAAATTGGATTTATGA
+>103
+TCCTGAGGACGAGGGGCGTTTAGC
+>104
+TACACAGACTTACAAAACACATCCTTC
+>105
+GAAAGGAAGGGAAGAAAGCGAAAGGA
+>106
+TTAAACAATTTGGAATTAATT
+>107
+TAGCCTTTACTAGGCTTTTTCTAA
+>108
+CGATATTTTCTCCTCTTACCT
+>109
+TGGAATGTAAAGAAGTATGGAG
+>110
+TTTTTAACTCCCATCATTTTTCCTC
+>111
+TTTTTTATCATTTTTCACCTAAAAAA
+>112
+GAACAATTTTTCAATTTTTTACATTA
+>113
+TATGGATTATTTCAAAATTTTTTTTT
+>114
+TAACGGAGCACGAGAACGAAGTGG
+>115
+CAATTTTTAATTCCTTTTTTCTTCTT
+>116
+AGATGTTGATCTAAACTCTCCCA
+>117
+TGAGCGGAGAACCAGAGTTGATGAGC
+>118
+TAACAAATAGAACGTTCTAATTTAAA
+>119
+CTAGACAAGATGCTATAAATTTTAAA
+>120
+TTTTCTTTACCCATCTTTACTTTCCC
+>121
+TACACACTCATCAACCAAAGGACG
+>122
+TCTTATTATCATTTTTTTATCCCTT
+>123
+TCAGAGTTCTACAGGTCCTACGATT
+>124
+TTTATTACTTAGTCATAATTCCAA
+>125
+GCCGGGGCGTGAGATGTCTGCATTA
+>126
+GACGAAACGCAACAACAAAATGGACG
+>127
+TAGACTTTCTACTCATTATTAC
+>128
+AAATTGCAAAGATGGAAAATAAAACT
+>129
+CCTTACTCAACATACTTAATCATACTTA
+>130
+TGCTTGGACTACATATGGTTGAGTG
+>131
+GAATGATCGCACCACCACCTCAACGTT
+>132
+TCCACCTATTTATCTTTTCTT
+>133
+TGGACAAGAACCACGCGACGGGTGT
+>134
+CAAGATATGAACAAAGCAAAGACAC
+>135
+CAAACGGAACAAGACATCACCATC
+>136
+NATTCTTACTCCATTTCAATTTACT
+>137
+TAGAACTCGAACCAGAGCTCC
+>138
+CGGAAAAGAATGTAGACCATTTAA
+>139
+TACAAACGGAACTTTCGTCATAA
+>140
+GGTATCTTTATATTTTAATTTTCTT
+>141
+TATTCCGACAATACCTTCTTTAC
+>142
+AACTTTAAATTTTTAATAACCTT
+>143
+CATAATATAAACTTATCTT
+>144
+TATTTATAAATTTTTTCTTGAGAC
+>145
+TTTTTTTTTTGTTTTTATTTTTATCAT
+>146
+TATTATACATAGAATAACAAATCTTT
+>147
+TGGAGTAGCACAGTCGTCTGAAATC
+>148
+TATTTCTTTTTTAACTTCTTTTC
+>149
+TTATAATCACGGCACCCTATACA
+>150
+TTCTTTTACTACATATTTTTTATTTTT
+>151
+TAGCGAGATGGACCAACGTGCTGT
+>152
+CCAGAAAACAATACAACATCCTCA
+>153
+TCCGAAAACAAGGCCCGTCGCT
+>154
+TACTCAATAGAACTCTACTCACTCATA
+>155
+AACGGGGAATAAGGGTTCG
+>156
+TCAGTCTTTTTTTCTCTCCTA
+>157
+AATATAAAAATACAATCAACCATTGCA
+>158
+GGACGGAGAACTGATAAGGGCA
+>159
+TAAAGAAGAAGAATTGATTTTAAT
+>160
+TCATTACACTTCTTACAAAAC
+>161
+CCGCGATCTGCTTATTTATAATCTT
+>162
+TCTAAACACCCACGAAAATCTCTTAC
+>163
+AACAGGAAAAACAGAAGGATTTCTA
+>164
+TCTCTTTTATTTTTATCTTTCCTT
+>165
+AACATTTTATCAATTATACATTA
+>166
+GCAGATAGAAATCAATACAAAAATC
+>167
+TTAATGACACACGGGAAAAACACCG
+>168
+TACAGACAACACATACGGACTTAAGT
+>169
+TCCACAACAACTCTATCTAAAGCATT
+>170
+ATAAAGCTAGATTACCAAAGCAT
+>171
+TACCTCTTTATTAACCTCCACCTCTA
+>172
+TACACCTCTTTTTACTTTTTTATT
+>173
+CACCGAACCGGGAAGGCGAACAAC
+>174
+TAGGTACTTACCTTTTTTTTACACAA
+>175
+AGGAATATGATGAAATAAAAAAAT
+>176
+TATTATTTTTTTATTCCATTCATAT
+>177
+TAAAATAAATAAGTCCGACGACAA
+>178
+TCTGTATTTGACTTATTACTTTCTCC
+>179
+AAGGAATTAAAGCAATAATTCTAA
+>180
+TTCATTTTATTTTTAAATATCTTTTTT
+>181
+TTAGTATTAATCTTCACTTAA
+>182
+TATGGAGAAACAGCGATATAAGTCA
+>183
+CCCGAAAAGCCGAGGACGACTTA
+>184
+CACAGACTGAGGCAGAAAAAACAA
+>185
+TCAAGCCTTTTGAAGAACTGACCTAAA
+>186
+TAAGAAACTGAGCTAACGCAATGTACC
+>187
+CGATATTTTCTCCTCTGACC
+>188
+TAACTACTTTTACATTAATACTAA
+>189
+ACCAGCACCTTCCGACTCAACGTCAAA
+>190
+TCAAAGAACAATGTAAAGCCGCGAC
+>191
+TTGATTCTTCTTTTTCACAAAA
+>192
+TACAAAACAAACAAATTACAATCTAAA
+>193
+GAAACCATTATCTTATCTTTATACA
+>194
+CTATATTTTCTCTCTTACC
+>195
+TCAGAAGAACAGAGAATTGATTTT
+>196
+TAATAAATTATTAAATAAAAAAAAAA
+>197
+CCGACCGAGCAAATAAACACAGGAACG
+>198
+AAGTGAAGAAGTAGTTTTT
+>199
+TGGACGGAGAACTGATAAGGGC
+>200
+TGCTTGGACTACATATGGTTGAGGG
+>201
+TACAAGACTAAAACAAACGTGAAGT
+>202
+TGAAACTGAAACTAACATACAAAATATT
+>203
+TATCTGATCAACAATCTTTTCCCAT
+>204
+TTTAAGACTTATGAGCTTG
+>205
+CAAGGCTCAGAAGAACATCACCAAGACC
+>206
+TTCAAGTAGATTGCATTTTTTAATA
+>207
+CGCAACCAGCAGCAACTCCTAGCAT
+>208
+TACAAACGGAACTTTCTTCATAACTTC
+>209
+ACAAATCATAAATTTTTTTTTACT
+>210
+TCCGAAAAATCGTAGGACCCGGGCA
+>211
+CGCCGCAAGATGAATACTCTAATGA
+>212
+TGACCAAAGACAAACAAACAATAAATA
+>213
+AAAAGGAAAAACAGAAAAATTGGG
+>214
+TTCTTGATAACGCATCTTCTACAT
+>215
+TACTGAAACAAGGAAACACAAGC
+>216
+TCAAAAAGTAATAGGGATCGTTA
+>217
+TGGAATGTAAAGAAGTATGGAG
+>218
+TAAAATTGTAATATTTAAATAATAT
+>219
+GAGGATTAAAAGAACGGTTTATAA
+>220
+TTCTTTGACTACATATTTTTTATT
+>221
+TACAAACGTAATTTTCGCATAACATC
+>222
+AGAACAATTAAATAAAATAGCATA
+>223
+TAATAATTTAAATAAATATAAATTT
+>224
+AATTGCAACAGAGACTGGAA
+>225
+TTAAGTTTTAGACATAATCTATTACAA
+>226
+TGAAAGGAAAAACAGGACACGGGA
+>227
+AACAGGGAGATCAACAGCGTTGACA
+>228
+GGACGGAGAACTGATAATGGC
+>229
+TGTGTAATCTTTCTACTTCTTCTAC
+>230
+TTTACCAGAGGAGTCGAGTTTTT
+>231
+AAAATCGACTGCCGAAAACATTTTAA
+>232
+TGCTTGGACTACATATGGTTGAGTG
+>233
+TACAGACAACACATACGGACTTAA
+>234
+GAAGAGGAGGAGGAGTTTGTAAG
+>235
+TTACAATCTACTATTCTTTTATTA
+>236
+TTATTACATCGTCCACATATAACAAAA
+>237
+TTTTTAACTCATTTTACAATTAAAC
+>238
+AAAACCCGGACAAACCATCGGAGGA
+>239
+CAACACATGACGCGACAATTCTTG
+>240
+TACACACTCATCAACCAAAGTACGTA
+>241
+TTTTCATTTCTTCTTCAAATCCTTT
+>242
+AACAACTGCAAACATCTACCACA
+>243
+TAGTTTTAAATATTTCTTTTTTTC
+>244
+AAAGACAAAAGAAATACAGGCACT
+>245
+TTTGATACCTTTATACCATACCTATT
+>246
+ACAGCAGGACGGTGATCA
+>247
+TTCTAGCACAACACGCACACATATA
+>248
+TAATCAAAAAACTCTTCATTTTTA
+>249
+TAGGAACTTCATACCGGTCTC
+>250
+TATTTCAGCAACAGACTAAGACTAA
diff -r 000000000000 -r a8aacccd79a3 test-data/input.fastqsanger
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fastqsanger Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,1000 @@
+@HWI-49
+TAAAGCTAGATTACCAAAGCAT
++
+BBBA;ACB9ABCBABB@@/?BB
+@HWI-119
+TGGACGGAGAACTGATAAGGGC
++
+?03@?82?B>C@B>@CC?032<
+@HWI-127
+TGGACGGAGAACTGATAAGGGC
++
+A>9@>;9>:=@3A7@BB#####
+@HWI-135
+TGGACGGAGAACTGATAAGGGC
++
+BB6BA5AC+BBBCC6@B>C<'<
+@HWI-136
+TGGACGGAGAACTGATAAGGGC
++
+B22>A=?;7:;BB>=C@>)1(:
+@HWI-166
+CGGCACATGTTGAAGTACACTCAA
++
+B<2>BCBCACC@C@3@BBCBCCCB
+@HWI-187
+TGGACGGAGAACTGATAAGGGC
++
+B;>@B82:4;B?B?@BB=5:2<
+@HWI-199
+GGGAGCGAGACGGGGACTCAC
++
+@?@@8?6;@AB=72;;=
+@HWI-36
+ATAAAGCTAGATTACCAAAGCAT
++
+BBBB?AAAA9AABA>7@@:-9AB
+@HWI-39
+AATGGCACTGGAAGAATTCACGG
++
+54=4440)@0365:83:<;5###
+@HWI-227
+TGGAATGTAAAGAAGTATGGAG
++
+BB>ACCBCCC@9@A4>CA66@>
+@HWI-249
+TGGAATGTAAAGAAGTATGGAG
++
+A9>?BA2@AA>5<<4A?:BB:BBA?9<;4?B>3-78
+@HWI-254
+TGGAATGTAAAGAAGTATGGAG
++
+B>A=B@1>B?47<6=A??BB;@<@BABB@CC==:0?
+@HWI-332
+TGGACGGAGAACTGATAAGGGC
++
+B?8@A>B;BAB@BA?CB@A731
+@HWI-211
+TGGAATGTAAAGAAGTATGGAGA
++
+A61=BA;7B@BBA:9:B?;;@2=
+@HWI-333
+TGGAATGTAAAGAAGTATGGA
++
+BA>>AB8@BB?=>@98BBA>#
+@HWI-355
+TGGAATGTAAAGAAGTATGGAG
++
+B>8BBA7@CBA;>A2?BB2.>8
+@HWI-282
+TGGAATGTAAAGAATTATGGAG
++
+B>8>@A7@8=>>997-8A72##
+@HWI-378
+TGGAATGTAAAGAAGTATGGAG
++
+B7=;AA8=A@@5=;4>BB31:2
+@HWI-372
+AATGGCACTGGAAGAATTCACGGG
++
+BBA;@;>A@;@=?@:?B@######
+@HWI-388
+TGGAATGTAAAGAAGTATGGAG
++
+BA@?BB?BBB??A?0:BA53<>
+@HWI-399
+TGGAATGTAAAGAAGTATGGAG
++
+A;*>?B9@B<+1<@<4B@7)@-
+@HWI-109
+TGGACGGAGAACTGATAAGGGC
++
+BC@CBCC@CBB9@8?CCC>AA;
+@HWI-421
+TGGACGGAGAACTGATAAGGGC
++
+A66?A;0<@;B?A;?BB>38-7
+@HWI-412
+TGTGATGTGACGTAGTGGAA
++
+BA=?>B<>9><09?0=656=
+@HWI-432
+TGGACGGAGAACTGATAAGGGC
++
+A;;B>=;=2@?0@@>?CBA@2=
+@HWI-442
+TGGACGGAGAACTGATAAGGGC
++
+BBA?=;;>B6??@07AA#####
+@HWI-470
+TGGAATGTAAAGAAGTATGGAG
++
+B?3>AC>:AA?AA<)7BA;7=:
+@HWI-499
+TGGACGGAGAACTGATAAGGGC
++
+BA>??=:;B=B8A>>B9=/55=
+@HWI-100
+TGGACGGAGAACTGATAAGGGC
++
+A;6@@96@;?A@BBABC?3<5>
+@HWI-514
+TGACTAGATCCACACTCATTAA
++
+BAABC?7@C@ABABBBBABBBB
+@HWI-521
+TGGACGGAGAACTGATAAGGGC
++
+A2=A5=2=>@B;C>:7A>>:1.
+@HWI-545
+TGGACGGAGAACTGATAAGGGC
++
+BA?BBBAABAB@B>?BB@>A><
+@HWI-549
+TGGACGGAGAACTGATAAGGGC
++
+A66<;;0<>:@4A1<>>#####
+@HWI-431
+TATCACAGCCAGCTTTGAGGAG
++
+B>B?B?@A?69?38<@5902;9
+@HWI-579
+TGGAATGTAAAGAAGTATGGA
++
+BBB@A@BB>B=1>>BA###
+@HWI-583
+TGGACGGAGAACTGATAAGGGC
++
+BB:@BCB>>BBAB@>@BA8;00
+@HWI-587
+TAGCACCACATGATTCGGCT
++
+BA;/A;;A1BA:@BB>=4;?
+@HWI-516
+TGGACGGAGAACTGATAAGGGCA
++
+B??>>AA>A9A3A69B?;64779
+@HWI-113
+TGGACGGAGAACTGATAAGGGCA
++
+BBC@@;>BCBCBB*BCC@95**C
+@HWI-592
+TGGAATGTAAAGAAGTATGGAG
++
+BBBBBB?ABA;9@@2=B@)1=4
+@HWI-602
+TGGACGGAGAACTGATAAGGGC
++
+BA;>A@>=8=@?BB>B?:0906
+@HWI-608
+TGGACGGAGAACTGATAAGGGC
++
+BBC>CC9>??CAA9=B@=::36
+@HWI-590
+GGGAGCGAGACGGGGACTCACT
++
+=B=AAAA?CBAB@=A95=%:0;
+@HWI-612
+CGGCACATGTTGAAGTACACTCA
++
+B?>BBBCBABCCA=7@AAABBAA
+@HWI-610
+TCAGGTACCTGAAGTAGCGCGCG
++
+BB@;@2>;>?66>963#######
+@HWI-629
+TGGACGGAGAACTGATAAGGGCA
++
+A68@?;6<0C39>;B4B@CB73@@;<(>BB
+@HWI-648
+TGGACGGAGAACTGATAAGGGC
++
+A8=A?@9=275<
+@HWI-377
+TGGACGGAGAACTGATAAGGGCAA
++
+B>5>4?A?A;>0:9<=0;;:0.88
+@HWI-658
+TGGAATGTAAAGAAGTATGGAG
++
+B>8>AB9@B?AA>=;>AA82<8
+@HWI-677
+TGGAATGTAAAGAAGTATGG
++
+A:0)2=AB<4
+@HWI-684
+TGGAATGTAAAGAAGTATGGAG
++
+BAA?BBA@BA==?>8@B?02:)
+@HWI-699
+TGGACGGAGAACTGATAAGGGC
++
+B??B@>;<;>B?BB?AB>####
+@HWI-707
+TGGAATGTAAAGAAGAATGGAG
++
+BBAB9*===@7;;:(.,B??@;
+@HWI-710
+TGGACGGAGAACTGATAAGGGC
++
+BA?AA?8>A=B?BA>AA?####
+@HWI-700
+TGGACGGAGAACTGATAAGGGC
++
+BCB>4BB@8=CB>130-
+@HWI-780
+TGGACGGAGAACTGATAAGGGC
++
+BBAABCAA>0/29
+@HWI-766
+TGGACGGAGAACTGATAAGGGCAA
++
+A;3=C@->@B*B>@A=AB@A<
+@HWI-815
+TGGAATGTAAAGAAGTATGGAG
++
+B915AB;:?=>7<>9?CA55@*
+@HWI-839
+TGGACGGAGAACTGATAAGGGC
++
+A6;?A;;>A?@BB8=CB;583?
+@HWI-884
+TGGAATGTAAAGAAGTATGGAG
++
+BBBABBBBBB?BB>1/@BB?BB
+@HWI-897
+TGGAATGTAAAGAAGTATGGA
++
+BAA?BB?BBB>6@@=>BB70?
+@HWI-671
+TGACTAGATCCACACTCATTAC
++
+B@BBBA;ACBBCBBBBA>ABBB
+@HWI-901
+TGGACGGAGAACTGATAAGGGC
++
+B;7@@>;=@<@?A:>CB@;8:@
+@HWI-914
+TGGACGGAGAACTGATAAGGGC
++
+BBCCBCC:BBB@CCACBB<>AB
+@HWI-926
+TGGAATGTAAAGAAGTATGGAG
++
+BC=AAB;BBAB@B@+7BAA7BA
+@HWI-937
+TGGACGGAGAACTGATAAGGGC
++
+BBBBA4@>=;BAB=ACC@*8;<
+@HWI-973
+TGGAATGTAAAGAAGTATGGAG
++
+A59>BB;@BB?>>=3;BA71:5
+@HWI-242
+TGGACGGAGAACTGATAAGGGCTT
++
+A31@<05=;>@1B>>AA@:;59A9>
+@HWI-1008
+TGGACGGAGAACTGATAAGGGC
++
+A59=B=5=8@@>CA<9@=*,8>
+@HWI-568
+TGGACGGAGAACTGATAAGGGCAA
++
+A88@BA;>?@A?BA0###
+@HWI-911
+CTGACTAGATCCACACTCATTA
++
+BAC
+@HWI-1021
+TGGACGGAGAACTGATAAGGGC
++
+B79>?B;>:?B;B@<>66-7
+@HWI-1020
+TGGACGGAGAACTGATAAGGGC
++
+B;>?@>9>A=A=B;ACB>8;==
+@HWI-1022
+GGGAGCGAGACGGGGACTCACT
++
+AA@AC=B=79ABCBA1@B236A
+@HWI-1026
+TGGACGGAGAACTGATAAGGGCAAA
++
+B>?B>=AAB@?4B>>BBAAA>?A?A
+@HWI-1033
+TGGAATGTAAAGAAGTATGG
++
+?43>AA;@A=>89?>=CB;>>;BBACB@CC@3:+?
+@HWI-1059
+TGGACGGAGAACTGATAAGGGC
++
+BA?AABA@BBBABBBBBA980:
+@HWI-1060
+TGGACGGAGAACTGATAAGGGC
++
+B?>=BCBA;BA<56<=
+@HWI-1080
+TGGACGGAGAACTGATAAGGGC
++
+B@?@BCA@CA>ABA4.<7
+@HWI-1090
+CGGATGATGGTTCACAACGACC
++
+A;59A7?AA3AB>@B@B:2@@>;>>=BA>@>##
+@HWI-1094
+TGGAATGTAAAGAAGTATGGAG
++
+BB:2@A:A@BA6A@=4B;5';;
+@HWI-1115
+AATGGCACTGGAAGAATTCACGGG
++
+BCBAB@B@>><>@;;A<;>#####
+@HWI-1120
+TGGAATGTAAAGAAGTATGGAG
++
+B67@B?2?BBA=A@09BA47?/
+@HWI-1122
+TGGACGGAGAACTGATAAGGGC
++
+B@5>;0;;B==3AB>BA=6:29
+@HWI-1127
+TGGACGGAGAACTGATAAGGGC
++
+A8BA>1###
+@HWI-1128
+TGGAATGTAAAGAAGTATGGAG
++
+B??>BBC>>A9=@<5:B@36>A
+@HWI-1119
+TCAGGTACCTGAAGTAGCGCGCG
++
+BB?>??A>B=(6?7#########
+@HWI-1149
+TGGAATGTAAAGAAGTATGGAG
++
+BAA>@BB@B?<8?B>??BAB;;BB?2###
+@HWI-1159
+TGGACGGAGAACTGATAAGGGC
++
+BBBCBA<@;;BBBA@B@>36)?
+@HWI-1175
+TGGACGGAGAACTGATAAGGG
++
+A0)>B@69;4>@BB>BAA=A:
+@HWI-1174
+TGGACGGAGAACTGATAAGGGCAA
++
+B?>@A6@;@>@:?0:C@:######
+@HWI-1181
+TGGACGGAGAACTGATAAGGGC
++
+BCACC?BAABB?CBACCCAA?B
+@HWI-1196
+TGGACGGAGAACTGATAAGGGC
++
+B>ABBC=>;=BAABB?BB@<81
+@HWI-1197
+TGGACGGAGAACTGATAAGGGCA
++
+BBBB>?A>;@B;BB@B@909879
+@HWI-1201
+TGACTAGATCCACACTCATTAA
++
+A>CB@A?BBBBCBB>BB@CBA?
+@HWI-1206
+TGGACGGAGAACTGATAAGGGC
++
+B?ACBCC@BBBB@0?CB@;2;<
+@HWI-985
+AATGGCACTGGAAGAATTCACGGG
++
+BB@6@A>@:.:>@96A########
+@HWI-1236
+TGGACGGAGAACTGATAAGG
++
+BA?AABBB>>BB@..
+@HWI-1245
+TGGAATGTAAAGAAGTATGGAGA
++
+BCBBBCB<=:?78/5ABBAAB9B
+@HWI-1266
+GGCGAACATGGATCTAGTGCACG
++
+?ABAABCB@=@?A?=:0>3=>##
+@HWI-1228
+TGGACGGAGAACTGATAAGGG
++
+<@:B@==?@>A@A>;BB?)06
+@HWI-1275
+TAAAGCTAGATTACCAAAGCAT
++
+BBBBBACA:BCBCBBBB@7;BB
+@HWI-1279
+TGGAATGTAAAGAAGTATGGAG
++
+BBBBABBABA@8=ABAAB>4>2
+@HWI-1282
+TGGAATGTAAAGAAGTATGGAG
++
+B?8A@-5BB@:>=
+@HWI-1312
+TGGACGGAGAACTGATAAGGGC
++
+B:2
+@HWI-1317
+TGTGATGTGACGTAGTGGAA
++
+BB?B?BB?@<:0<;9>=<<:
+@HWI-1323
+TGGACGGAGAACTGATAAGGGC
++
+BAACB@@@BBBBCCBCC?;>5>
+@HWI-1304
+TGTGATGTGACGTAGTGGAA
++
+A92@?@1@7?:)9?>>708@
+@HWI-1337
+TGGACGGAGAACTGATAAGGGC
++
+B?BCA6A@BBC@CB>BCB;=;A
+@HWI-1344
+TGGAATGTAAAGAAGTATGGAG
++
+A869>@9@475;8
+@HWI-1418
+TGGAATGTAAAGAAGTATGGAGT
++
+B?B@BCBBBC@8@@6=B>//>7A
+@HWI-1434
+TCAGGTACCTGAAGTAGCG
++
+B?6B>180?=)9AA3@###
+@HWI-1332
+TATCACAGCCATTTTGACGAGTT
++
+BCB@BABB=BBBBBA58;8>;@B
+@HWI-1447
+TGGACGGAGAACTGATAAGGGC
++
+BBBB6CBBCABBBCBCCC5A9=
+@HWI-1449
+TGGACGGAGAACTGATAAGGGC
++
+B;>@>=@?7@BA7BBC<6;###
+@HWI-1451
+TCAGGTACCTGAAGTAGCGCGCG
++
+BA@;@7<6:?91=##########
+@HWI-1460
+TGGAATGTAAAGAAGTATGGAG
++
+BCCBCCCBCB?:?A?>BA75;<
+@HWI-1375
+TGACTAGATTCACACTCATTA
++
+BCABCB?ACC>BA39C@5BC<
+@HWI-1481
+TGGAATGTAAAGAAGTATGGAG
++
+B6/9B@5ABBA??>048989<0
+@HWI-1489
+TGTGATGTGACGTAGTGGAA
++
+BAA8ABBBAB;+::1AB78B
+@HWI-1505
+TGGAATGTAAAGAAGTATGGA
++
+B?)=?A97<>;A=9BB###
+@HWI-1508
+TGTGATGTGACGTAGTGGAA
++
+B>>A:=0?::3.960:####
+@HWI-1517
+TGGAATGTAAAGAAGTATGGAG
++
+B>>>BB>ABB=4==1=B>0)<0
+@HWI-1530
+TGGACGGAGAACTGATAAGGGCA
++
+BCBBBCB>BBBA@;?BA<>9;>:=B:A9>@>?3###
+@HWI-1534
+TGGACGGAGAACTGATAAGGGC
++
+B9;BAA>=A@B?A>@BB@><4<
+@HWI-1538
+TCAGGTACCTGAAGTAGCGCGCG
++
+BCCCBBBAA?):@@;;;:39###
+@HWI-1547
+TGGACGGAGAACTGATAAGG
++
+A6;@@A6<>?B
+@HWI-1572
+TGGAATGTAAAGAAGTATGGAG
++
+BAA@BBA@@<>?@A=9?9
+@HWI-1583
+TGGACGGAGAACTGATAAGGGC
++
+B>?B?50??@C@BABBBA66)8
+@HWI-1592
+TGGACGGAGAACTGATAAGGGC
++
+BBB>BBB;?>@@CB@BBAAA>=
+@HWI-1522
+TGGACGGAGAACTGATAAGGGCAA
++
+A51BB?5?BABAA;ACA7=@;>@>
+@HWI-1602
+CGGCACATGTTGAAGTACACTCAA
++
+?@;AB>C@@;=1A;4=?A?BCAC=
+@HWI-1521
+TCAGGTACCTGAAGTAGCGCGCG
++
+BA?8?4>=?@>;:7;98######
+@HWI-1625
+TCAGGTACCTGAAGTAGCGCGCG
++
+BB@A@';9A;*=9?4<7994###
+@HWI-1631
+TGGAATGTAAAGAAGTATGGAG
++
+B>A>BB6@BA=8<<6=BA75>8
+@HWI-1632
+TGTGATGTGACGTAGTGGAA
++
+BA??;B>><=<4=<7?:599
+@HWI-1636
+TGGACGGAGAACTGATAAGGGCA
++
+A5=AAAA
+@HWI-1662
+TGGACGGAGAACTGATAAGGG
++
+A65=A@A7:B@=303
+@HWI-1666
+TGGACGGAGAACTGATAAGGGC
++
+BABCBCBBCACBA2
+@HWI-1668
+TGGAATGTAAAGAAGTATGGA
++
+B6-:@A36>B?A?7B?BBBBB@6>;8
+@HWI-1683
+TGGACGGAGAACTGATAAGGGC
++
+BAABABB?B?@;B>@BBAAA=9
+@HWI-1690
+TGTGATGTGACGTAGTGGAA
++
+A;;>?A0?9>9549)=1589
+@HWI-1702
+AATGGCACTGGAAGAATTCACGGGT
++
+@BA>@;@=:9@;=95;<@;<9'.3<
+@HWI-1717
+AGGACGGGAAGGTGTCAACG
++
+AA?BAB?A@>@:<;=9
+@HWI-1724
+TGGAATGTAAAGAAGTATGGAG
++
+B?<=AB@A@<>B><50;
+@HWI-1728
+TGGAATGTAAAGAAGTATGGAG
++
+@@6@BB;@BC@>>A7;BB=:?)
+@HWI-1729
+CGAATAGCGTTGTGACTGA
++
+@B@CB@BB89ABBA?B8>?
+@HWI-1733
+TGGAATGTAAAGAAGTATGGAG
++
+BAABBBB?BB>0=>8@B=/,;(
+@HWI-1743
+TGTGATGTGACGTAGTGGAA
++
+BB>A9B+><<@>7;0=A><>
+@HWI-1744
+TGGACGGAGAACTGATAAGGGC
++
+B@6?BB;>B?B@BA?B=:6>;A
+@HWI-1754
+TCAGGTACCTGAAGTAGCGCGCG
++
+BBBA;;=1;;)940446-#####
+@HWI-1640
+TGGACGGAGAACTGATAAGGG
++
+B99@AB;@4
+@HWI-1769
+TCAGGTACCTGAAGTAGCGCGCG
++
+BCBA<=BCBC?BB>5A9;6:%:.
+@HWI-1773
+TGTGATGTGACGTAGTGGAAC
++
+BBAA>A>>;;73?:9?:2.34
+@HWI-1596
+TGGACGGAGAACTGATAAGGG
++
+BB@B@AA?A@B?B:>AB@943
+@HWI-1780
+TGGACGGAGAACTGATAAGGGCAA
++
+B;;BA@9?BAB?=-<@>@3:4>;?
+@HWI-1800
+TGGAATGTAAAGAAGTATGGAG
++
+BAB?BBA@B@:)9=AAB@34?0
+@HWI-1806
+TGGAATGTAAAGAAGTATGGA
++
+BBBBCCABCCA=AA<;CB@2>
+@HWI-1821
+TGGAATGTAAAGAAGTATGGAG
++
+B@8?AB;@BB>;>>;;@@:2=7
+@HWI-1766
+TGGACGGAGAACTGATAAGGGC
++
+BCACCCC?:C@7BAAACB6>2<
+@HWI-1830
+TGGACGGAGAACTGATAAGGGC
++
+BA@BB>@>>@B@A5>@B@866;
+@HWI-1846
+TGGACGGAGAACTGATAAGGGC
++
+BB?;5BA?@8B;>>@A9<####
+@HWI-1851
+TGGAATGTAAAGAAGTATGGA
++
+BBB@BBBABB@3??A@AB###
+@HWI-1856
+TGGAATGTAAAGAAGTATGGAG
++
+B@:=@B?BBB=:>@@>BB=:>;
+@HWI-1873
+TGGAATGTAAAGAAGTATGGAG
++
+BB@?BB@BBB???=>=B@2)<=
+@HWI-1876
+TGGACGGAGAACTGATAAGGGC
++
+A==B@?B;?>B=72;B==/<##
+@HWI-1885
+TGGAATGTAAAGAAGTATGGAG
++
+A74@BCBABAA:03BB:3=2
+@HWI-1888
+TGTGATGTGACGTAGTGGAA
++
+B@@:>>1<:62;?=7928=@
+@HWI-1889
+TGGACGGAGAACTGATAAGGGC
++
+A>;@B@;@=@CAB;@BB?330:
+@HWI-1894
+TAAGGAAATAGTAGCCGTGAT
++
+BAA>>########
+@HWI-1912
+TGGACGGAGAACTGATAAGGG
++
+=?67;8B>A4?A=)>1@?:
+@HWI-1914
+TAAGGAAATAGTAGCCGTGAT
++
+BBBB7BB>9B;=AA:A>####
+@HWI-1915
+TAGGAACTTCATACCGTGCTCT
++
+BC=9@BBCCBCBCBA*?3?BAB
+@HWI-1931
+TGGACGGAGAACTGATAAGGGCAA
++
+A6;@B=5@=@BBCB?B@A13)=9@
+@HWI-1940
+TGGAATGTAAAGAAGTATGGAG
++
+BC@BBBCBBAA@;?7757:
+@HWI-1943
+TGTGATGTGACGTAGTGGAA
++
+BA<:7BB>>=?AA<9?>>5;
+@HWI-1955
+TGGAATGTAAAGAAGTATGGAG
++
+B?;>AB;>BA???>49BA92=2
+@HWI-1884
+TGGACGGAGAACTGATAAGGGC
++
+B==A=3<=B>;8877><9?:####
+@HWI-1983
+TGGACGGAGAACTGATAAGGGC
++
+B=1@B@6:B@B@BB=BBBA@;@
+@HWI-1980
+TGGACGGAGAACTGATAAGGGC
++
+BABBB=>>6?C@BBBBA=(52@
+@HWI-1992
+TGTGATGTGACGTAGTGGAA
++
+BA@9=@7@A?<>>?:@7:9>
+@HWI-1995
+CAAATTCGGTTCTAGAGAGGTT
++
+B
+@HWI-2011
+ACTGAATTCTCGTGGGTCTGCAT
++
+BAB==B?B@A@8;/.(:?A>:
+@HWI-2017
+TCAGGTACCTGAAGTAGCGCGCG
++
+BC@?727=B@5<<##########
+@HWI-2013
+AATTGCACTAGTCCCGGCCTG
++
+BCA>5;A=A>?=B@@4:?@8'
+@HWI-2040
+TGGACGGAGAACTGATAAGGGC
++
+BBBBB=BBB9BBB@?BB:88>=
+@HWI-2049
+TGGAATGTAAAGAAGTATGGAG
++
+BA>=ABAABB>5><0;@?.-94
+@HWI-2053
+TATCACAGCCATTTTGACGAGTT
++
+BCBBCBC>A@ABCBC@@=;BCBCBAA;A?A?BAACA=805;
+@HWI-2106
+TGGACGGAGAACTGATAAGGGCT
++
+A:5;BA8;A=A@?4;BA93-0:=
+@HWI-2100
+TCAGGTACCTGAAGTAGCGCGCGTTAT
++
+BB@579A@B<3;=07615830,6>=##
+@HWI-2072
+GGACGGAGAACTGATAAGGGCTCGG
++
+ABCBBA.3@@:A=@B=@@0<;@;.<
+@HWI-2124
+TGGACGGAGAACTGATAAGGGC
++
+?00@B@1>?AB@>9;B?>503?
+@HWI-2136
+TGGACGGAGAACTGATAAGGGC
++
+BBBBBBCABB@>BB?CBB?>>:
+@HWI-2137
+TGTGATGTGACGTAGTGGAA
++
+B>=<:<-149;8>8.93379
+@HWI-2141
+TGTGATGTGACGTAGTGGAAA
++
+BB@AAB<@8>=+=@1=/5=B?
+@HWI-2117
+CGGCACATGTTGAAGTACACTCA
++
+B>ABBABA>@?)=?0>CABABBB
+@HWI-2146
+TGGACGGAGAACTGATAAGGGC
++
+B;;BB4=ABBCBB8@CC;/5;B
+@HWI-2148
+TGGACGGAGAACTGATAAGGGC
++
+BBABA??@@AA:B@@AB@6=8=
+@HWI-2151
+TGGACGGAGAACTGATAAGGGC
++
+BCA@>BCB?3ABBA?B?A7@6B
+@HWI-2145
+TGTGATGTGACGTAGTGGAA
++
+B>?BBB>A>A;9<@1@559<
+@HWI-2158
+AATGGCACTGGAAGAATTCACGGG
++
+BCBABBBB?50=?8;A=>9>;###
+@HWI-2142
+TGGACGGAGAACTGATAAGGGC
++
+A4;@<@?=<@>@BB>BB8<0:7
+@HWI-2140
+TCAGGTACCTGAAGTAGCGCGCG
++
+BBABB?B=>=0<@>7<394:4:4
+@HWI-2191
+AATGGCACTGGAAGAATTCACGTG
++
+ABB?A@ABA;5;>8?#######
+@HWI-2183
+TGGACGGAGAACTGATAAGGGC
++
+BAA?@>@>:;>=A>>BA:3107
+@HWI-2203
+TGGAATGTAAAGGAGTATGGAG
++
+BBABBC@?@A83?A>BBB>7?:
+@HWI-2200
+TCTTTGGTATTCTAGCTGTAGA
++
+BBCBCC@BACC?CB:AB5@###
+@HWI-2209
+TGGACGGAGAACTGATAAGGGC
++
+BAAA?A>>=;@;B;>@@>5/94
+@HWI-2218
+TGGAATGTAAAGAAGTATGGAG
++
+BC>BBCBBBC?A?@<>BB2);7
+@HWI-2222
+TGGACGGAGAACTGATAAGGGCA
++
+BCABBB?@BBCBBAABB@94;@@
+@HWI-2227
+TGGACGGAGAACTGATAAGGGC
++
+BBBBBBB?B@BAA;>BBA:0;>
+@HWI-2225
+AATGGCACTGGAAGAATTCACGG
++
+ABBA@?B@:6>;?@9@@B=?::?
+@HWI-2230
+TGGACGGAGAACTGATAAGGGC
++
+BBBB@CB@?>B?A;>=@>2/7;
diff -r 000000000000 -r a8aacccd79a3 test-data/input.sorted.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.sorted.fa Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,500 @@
+>1
+>10
+>100
+>101
+>102
+>103
+>104
+>105
+>106
+>107
+>108
+>109
+>11
+>110
+>111
+>112
+>113
+>114
+>115
+>116
+>117
+>118
+>119
+>12
+>120
+>121
+>122
+>123
+>124
+>125
+>126
+>127
+>128
+>129
+>13
+>130
+>131
+>132
+>133
+>134
+>135
+>136
+>137
+>138
+>139
+>14
+>140
+>141
+>142
+>143
+>144
+>145
+>146
+>147
+>148
+>149
+>15
+>150
+>151
+>152
+>153
+>154
+>155
+>156
+>157
+>158
+>159
+>16
+>160
+>161
+>162
+>163
+>164
+>165
+>166
+>167
+>168
+>169
+>17
+>170
+>171
+>172
+>173
+>174
+>175
+>176
+>177
+>178
+>179
+>18
+>180
+>181
+>182
+>183
+>184
+>185
+>186
+>187
+>188
+>189
+>19
+>190
+>191
+>192
+>193
+>194
+>195
+>196
+>197
+>198
+>199
+>2
+>20
+>200
+>201
+>202
+>203
+>204
+>205
+>206
+>207
+>208
+>209
+>21
+>210
+>211
+>212
+>213
+>214
+>215
+>216
+>217
+>218
+>219
+>22
+>220
+>221
+>222
+>223
+>224
+>225
+>226
+>227
+>228
+>229
+>23
+>230
+>231
+>232
+>233
+>234
+>235
+>236
+>237
+>238
+>239
+>24
+>240
+>241
+>242
+>243
+>244
+>245
+>246
+>247
+>248
+>249
+>25
+>250
+>26
+>27
+>28
+>29
+>3
+>30
+>31
+>32
+>33
+>34
+>35
+>36
+>37
+>38
+>39
+>4
+>40
+>41
+>42
+>43
+>44
+>45
+>46
+>47
+>48
+>49
+>5
+>50
+>51
+>52
+>53
+>54
+>55
+>56
+>57
+>58
+>59
+>6
+>60
+>61
+>62
+>63
+>64
+>65
+>66
+>67
+>68
+>69
+>7
+>70
+>71
+>72
+>73
+>74
+>75
+>76
+>77
+>78
+>79
+>8
+>80
+>81
+>82
+>83
+>84
+>85
+>86
+>87
+>88
+>89
+>9
+>90
+>91
+>92
+>93
+>94
+>95
+>96
+>97
+>98
+>99
+AAAAAATATCTTTTTTAACTCGTGGCC
+AAAAACACACAGACACAAGCAGCAAT
+AAAACCCGGACAAACCATCGGAGGA
+AAAAGGAAAAACAGAAAAATTGGG
+AAAATCGACTGCCGAAAACATTTTAA
+AAAGACAAAAGAAATACAGGCACT
+AAATTGCAAAGATGGAAAATAAAACT
+AACAACTGCAAACATCTACCACA
+AACAGGAAAAACAGAAGGATTTCTA
+AACAGGGAGATCAACAGCGTTGACA
+AACATTTTATCAATTATACATTA
+AACGGGGAATAAGGGTTCG
+AACTCTCTAATTTAACTTTGTGC
+AACTTTAAATTTTTAATAACCTT
+AAGACAACAATGACATATAAGACG
+AAGATGGAGTAGTTTTTT
+AAGCACGCCTTACCACAATTTATAA
+AAGGAATTAAAGCAATAATTCTAA
+AAGTGAAGAAGTAGTTTTT
+AATATAAAAATACAATCAACCATTGCA
+AATCTACAATTTCCATTACGACTCC
+AATGACACACTCTTCATCAAC
+AATGTCACTTGAAGAATTCACGT
+AATTGCAACAGAGACTGGAA
+AATTTATTTAATTTATATTCTAACTAA
+ACAAATCATAAATTTTTTTTTACT
+ACAACCTCAACTCATATTT
+ACAGCAGGACGGTGATCA
+ACAGCAGGACGGTGATCA
+ACCAGCACCTTCCGACTCAACGTCAAA
+ACTAAACTTTTCTTACCATATTTCTA
+AGAACAATTAAATAAAATAGCATA
+AGATGTTGATCTAAACTCTCCCA
+AGGAATATGATGAAATAAAAAAAT
+AGGATTTTTAAGCCCATATGTTTCC
+ATAAAGCTAGATTACCAAAGCAT
+ATGAAATTCGAACAATACGTC
+ATGTTATTTACTTTTTCCCCTTATA
+ATTATTAATAAATTATTATAA
+ATTTTACTTCATCATTTTC
+CAAACGGAACAAGACATCACCATC
+CAAATAACAAACTGAATAAACGAAA
+CAACACATGACGCGACAATTCTTG
+CAAGAATACAAAAAATACTAATTA
+CAAGATATGAACAAAGCAAAGACAC
+CAAGGCTCAGAAGAACATCACCAAGACC
+CAATTTTTAATTCCTTTTTTCTTCTT
+CACAGACTGAGGCAGAAAAAACAA
+CACCGAACCGGGAAGGCGAACAAC
+CACGACTTTATTCTTTTTATCTCA
+CAGATTCACTGATTTTCTTACGCC
+CAGTTTCACAAAAGATCTTTTAA
+CATAATATAAACTTATCTT
+CATACCTACAAAAAAGCTTCTCTTAC
+CATACCTTAAATTATCTCTTTCTT
+CATTAATTCATCCATTTAAACTAA
+CCAGAAAACAATACAACATCCTCA
+CCATATATGACTGACTCATTTCAC
+CCCGAAAAGCCGAGGACGACTTA
+CCGACCGAGCAAATAAACACAGGAACG
+CCGCGATCTGCTTATTTATAATCTT
+CCTTACTCAACATACTTAATCATACTTA
+CGATATTTTCTCCTCGTACC
+CGATATTTTCTCCTCTGACC
+CGATATTTTCTCCTCTTACC
+CGATATTTTCTCCTCTTACCT
+CGCAACCAGCAGCAACTCCTAGCAT
+CGCCGCAAGATGAATACTCTAATGA
+CGGAAAAGAATGTAGACCATTTAA
+CGGACGGTATATTTTTTAATATAA
+CGGCACATGTTGAATTACACTCA
+CTAGACAAGATGCTATAAATTTTAAA
+CTATATTTTCTCTCTTACC
+CTGGAAACTATTGATCAAATT
+CTTCTTTTACTACATATTTTTTATTTTTTTA
+GAAACAAACAACACATACCCTCTGGC
+GAAACCATTATCTTATCTTTATACA
+GAAAGGAAGGGAAGAAAGCGAAAGGA
+GAACAATTTTTCAATTTTTTACATTA
+GAAGAGGAGGAGGAGTTTGTAAG
+GAATGATCGCACCACCACCTCAACGTT
+GACGAAACGCAACAACAAAATGGACG
+GAGAACTTTTAATCATTTTAC
+GAGAATAAATATTTCAATGGTCTATTG
+GAGGATTAAAAGAACGGTTTATAA
+GCAGATAGAAATCAATACAAAAATC
+GCCAACGACCATACCACGA
+GCCGGGGCGTGAGATGTCTGCATTA
+GGACGGAGAACTGATAAGGGCA
+GGACGGAGAACTGATAATGGC
+GGAGATTGTAGAACGAAAGGAAAAT
+GGTATCTTTATATTTTAATTTTCTT
+NATTCTTACTCCATTTCAATTTACT
+TAAAAATAATTGTCTTTAATTTCA
+TAAAATAAACCAAAACCCAAAAAT
+TAAAATAAATAAGTCCGACGACAA
+TAAAATTGTAATATTTAAATAATAT
+TAAAGAAGAAGAATTGATTTTAAT
+TAACAAATAGAACGTTCTAATTTAAA
+TAACATAAATTTTAATCATAAATTG
+TAACCTTGCAGAACTATACGATTCAAA
+TAACGGAGCACGAGAACGAAGTGG
+TAACTACTTTTACATTAATACTAA
+TAACTTTAACTTTTTTACT
+TAAGAAACTGAGCTAACGCAATGTACC
+TAAGTTTTTAATCATTTTTTTT
+TAATAAATTATTAAATAAAAAAAAAA
+TAATAAGAAACTGTTCAAACAATCCAC
+TAATAATTTAAATAAATATAAATTT
+TAATCAAAAAACTCTTCATTTTTA
+TAATTACCATTGCTAACTATCCA
+TACAAAAAATGCGAAAATTGACCCT
+TACAAAACAAACAAATTACAATCTAAA
+TACAAACGGAACTTTCGTCATAA
+TACAAACGGAACTTTCTTCATAACTTC
+TACAAACGTAATTTTCGCATAACATC
+TACAAATTTTTTTTTCTTTCTTAT
+TACAACTAACATCCTTTCTTCTTCC
+TACAAGACTAAAACAAACGTGAAGT
+TACACACTCATCAACCAAAGGACG
+TACACACTCATCAACCAAAGTACGTA
+TACACAGACTTACAAAACACATCCTTC
+TACACCTCTTTTTACTTTTTTATT
+TACAGACAACACATACGGACTTAA
+TACAGACAACACATACGGACTTAAGT
+TACAGAGAAATATACAACACTCACC
+TACCTCTTTATTAACCTCCACCTCTA
+TACTATATACTTCTTCAAATCACA
+TACTATTTTATTATACATACATACATTA
+TACTCAATAGAACTCTACTCACTCATA
+TACTGAAAACGGGCGCATATCAGTGG
+TACTGAAACAAGGAAACACAAGC
+TACTTTTTTCTTAATTTTTTATTAAAC
+TAGAACTCGAACCAGAGCTCC
+TAGACTTTCTACTCATTATTAC
+TAGCCTTTACTAGGCTTTTTCTAA
+TAGCGAGATGGACCAACGTGCTGT
+TAGGAACTTCATACCGGTCTC
+TAGGTACTTACCTTTTTTTTACACAA
+TAGTCATACATACCTAATTATACATA
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA
+TAGTTACCTTCATATCTCTCTTTA
+TAGTTTCACTACTTTATTCTTTTTA
+TAGTTTTAAATATTTCTTTTTTTC
+TATAATTTTATTTTATATTTTCTCT
+TATATAAATCTTCAACATCAA
+TATATTGCCTCCCCATAATCCTT
+TATCTATCTTTGATCTTCTTTTCA
+TATCTGATCAACAATCTTTTCCCAT
+TATGGAGAAACAGCGATATAAGTCA
+TATGGATTATTTCAAAATTTTTTTTT
+TATTATACATAGAATAACAAATCTTT
+TATTATCATCTCGTTCTTCCTTCTC
+TATTATTTTTTTATTCCATTCATAT
+TATTCAATCACTCCATTATATATAACA
+TATTCCGACAATACCTTCTTTAC
+TATTTATAAATTTTTTCTTGAGAC
+TATTTCACTTTATACTTCCTTAA
+TATTTCAGCAACAGACTAAGACTAA
+TATTTCCAACCTTCAACCTCAAATAA
+TATTTCTTTTTTAACTTCTTTTC
+TCAAAAAGTAATAGGGATCGTTA
+TCAAAGAACAATGTAAAGCCGCGAC
+TCAAATGCAAATTGGATTTATGA
+TCAACTGGCAAGAATTTTTGAAAATT
+TCAAGCCTTTTGAAGAACTGACCTAAA
+TCAGAAGAACAGAGAATTGATTTT
+TCAGAGTTCTACAGGTCCTACGATT
+TCAGTCTTTTTTTCTCTCCTA
+TCATTACACTTCTTACAAAAC
+TCCACAACAACTCTATCTAAAGCATT
+TCCACCTATTTATCTTTTCTT
+TCCCAACCCTCGAGCATCATTTTC
+TCCGAAAAATCGTAGGACCCGGGCA
+TCCGAAAACAAGGCCCGTCGCT
+TCCTGAGGACGAGGGGCGTTTAGC
+TCGCCGTAAAGCCAGTCGTTCTCC
+TCTAAACACCCACGAAAATCTCTTAC
+TCTAGTCTGAGCGTAGTACCAGATTG
+TCTATATTATTTTTATCAATTTTCACC
+TCTATTCATACAAAACACTAATACCC
+TCTATTTCTTTATTTTTTTTATTAT
+TCTCATCTTACAATTTTTTAAAACTT
+TCTCTTTTATTTTTATCTTTCCTT
+TCTGTATTTGACTTATTACTTTCTCC
+TCTTATTATCATTTTTTTATCCCTT
+TCTTATTTTAATCTTCCAATTTC
+TCTTCTATATAATCCTTTATTATAA
+TCTTTTTTTTAATACTTATTTTCATT
+TGAAACTGAAACTAACATACAAAATATT
+TGAAAGGAAAAACAGGACACGGGA
+TGACCAAAGACAAACAAACAATAAATA
+TGAGAATGACTTCTTCACGATCTCTT
+TGAGCGGAGAACCAGAGTTGATGAGC
+TGAGCTAGAACTGCACCCACTCCA
+TGAGGAAAACAGAAAAATGAGAGACA
+TGATGACGGGCAGCAGGGATTTTC
+TGATTTACTTACATTCTTTTTTT
+TGCTTGGACTACATATGGTTGAGGG
+TGCTTGGACTACATATGGTTGAGTG
+TGCTTGGACTACATATGGTTGAGTG
+TGCTTTTACTACATATTTTTTATTTTTTTA
+TGGAATGTAAAGAAGTATGGAG
+TGGAATGTAAAGAAGTATGGAG
+TGGACAAGAACCACGCGACGGGTGT
+TGGACGGAGAACTGATAAGGGC
+TGGAGTAGCACAGTCGTCTGAAATC
+TGTGTAATCTTTCTACTTCTTCTAC
+TTAAACAATTTGGAATTAATT
+TTAAAGACGCAACAACTAACATT
+TTAAGTTTTAGACATAATCTATTACAA
+TTAATATGTAATTTCATACCTCAC
+TTAATGACACACGGGAAAAACACCG
+TTACAATCTACTATTCTTTTATTA
+TTACTAGATCCACCCTCATTA
+TTAGTATTAATCTTCACTTAA
+TTATAATCACGGCACCCTATACA
+TTATTACATCGTCCACATATAACAAAA
+TTATTATCTATTTTAATTTTTCTTAA
+TTCAAGTAGATTGCATTTTTTAATA
+TTCATTTTATTTTTAAATATCTTTTTT
+TTCTAGCACAACACGCACACATATA
+TTCTTCCTTTTATCCTCTCTTAA
+TTCTTGATAACGCATCTTCTACAT
+TTCTTGGACTACACATTTTTTATTGTTTTA
+TTCTTTGACTACATATTTTTTATT
+TTCTTTTACTACATATTGTTTATTTTTTTA
+TTCTTTTACTACATATTTTTTATTTTT
+TTCTTTTACTACATATTTTTTATTTTTTTA
+TTGATTCTTCTTTTTCACAAAA
+TTGTAAAACATTCTTTCTCCTGAC
+TTTAAACACTTCCTACATCAAATTTC
+TTTAAGACTTATGAGCTTG
+TTTACCAGAGGAGTCGAGTTTTT
+TTTATTACAACCCTATCTTACCTCAA
+TTTATTACTTAGTCATAATTCCAA
+TTTCCGACAAATACACCATCTTC
+TTTGATACCTTTATACCATACCTATT
+TTTGTTTTTCATTTTTTTATCTTT
+TTTGTTTTTTACTATATTT
+TTTTATATTTCCTTATATCTTTACTA
+TTTTCATTTCTTCTTCAAATCCTTT
+TTTTCTTTACCCATCTTTACTTTCCC
+TTTTCTTTTTTTCATTTTCTCTTTTA
+TTTTTAACTCATTTTACAATTAAAC
+TTTTTAACTCCCATCATTTTTCCTC
+TTTTTATCAATTTTCACCATTC
+TTTTTTATCAATTTTCACCATTCAT
+TTTTTTATCATTTTTCACCTAAAAAA
+TTTTTTTTTTGTTTTTATTTTTATCAT
diff -r 000000000000 -r a8aacccd79a3 test-data/output.faw
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.faw Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,494 @@
+>1_2
+ACAGCAGGACGGTGATCA
+>2_2
+TGCTTGGACTACATATGGTTGAGTG
+>3_2
+TGGAATGTAAAGAAGTATGGAG
+>4_1
+AATTTATTTAATTTATATTCTAACTAA
+>5_1
+TTATAATCACGGCACCCTATACA
+>6_1
+TCCGAAAAATCGTAGGACCCGGGCA
+>7_1
+TCCCAACCCTCGAGCATCATTTTC
+>8_1
+TTGATTCTTCTTTTTCACAAAA
+>9_1
+AGAACAATTAAATAAAATAGCATA
+>10_1
+CCAGAAAACAATACAACATCCTCA
+>11_1
+TCTAGTCTGAGCGTAGTACCAGATTG
+>12_1
+TTTTTAACTCCCATCATTTTTCCTC
+>13_1
+GCAGATAGAAATCAATACAAAAATC
+>14_1
+AAGTGAAGAAGTAGTTTTT
+>15_1
+AATGTCACTTGAAGAATTCACGT
+>16_1
+TTTACCAGAGGAGTCGAGTTTTT
+>17_1
+GGTATCTTTATATTTTAATTTTCTT
+>18_1
+TTACTAGATCCACCCTCATTA
+>19_1
+TATATAAATCTTCAACATCAA
+>20_1
+GAAACCATTATCTTATCTTTATACA
+>21_1
+GGAGATTGTAGAACGAAAGGAAAAT
+>22_1
+ACTAAACTTTTCTTACCATATTTCTA
+>23_1
+TCAAGCCTTTTGAAGAACTGACCTAAA
+>24_1
+TAACATAAATTTTAATCATAAATTG
+>25_1
+TAGCGAGATGGACCAACGTGCTGT
+>26_1
+TACAAACGTAATTTTCGCATAACATC
+>27_1
+CGGAAAAGAATGTAGACCATTTAA
+>28_1
+GAAAGGAAGGGAAGAAAGCGAAAGGA
+>29_1
+CAAGAATACAAAAAATACTAATTA
+>30_1
+CATACCTACAAAAAAGCTTCTCTTAC
+>31_1
+TAAAATAAATAAGTCCGACGACAA
+>32_1
+AGGAATATGATGAAATAAAAAAAT
+>33_1
+TTTTTTTTTTGTTTTTATTTTTATCAT
+>34_1
+TTTTTAACTCATTTTACAATTAAAC
+>35_1
+CCGCGATCTGCTTATTTATAATCTT
+>36_1
+TAGGTACTTACCTTTTTTTTACACAA
+>37_1
+TCATTACACTTCTTACAAAAC
+>38_1
+TATTATACATAGAATAACAAATCTTT
+>39_1
+ATGTTATTTACTTTTTCCCCTTATA
+>40_1
+TCCGAAAACAAGGCCCGTCGCT
+>41_1
+TGCTTTTACTACATATTTTTTATTTTTTTA
+>42_1
+GAACAATTTTTCAATTTTTTACATTA
+>43_1
+AACATTTTATCAATTATACATTA
+>44_1
+TATCTGATCAACAATCTTTTCCCAT
+>45_1
+AATTGCAACAGAGACTGGAA
+>46_1
+TATTCAATCACTCCATTATATATAACA
+>47_1
+CAAACGGAACAAGACATCACCATC
+>48_1
+TCTTCTATATAATCCTTTATTATAA
+>49_1
+TGATGACGGGCAGCAGGGATTTTC
+>50_1
+TTTATTACAACCCTATCTTACCTCAA
+>51_1
+AACAGGAAAAACAGAAGGATTTCTA
+>52_1
+CGATATTTTCTCCTCGTACC
+>53_1
+AAATTGCAAAGATGGAAAATAAAACT
+>54_1
+TACACACTCATCAACCAAAGGACG
+>55_1
+TATATTGCCTCCCCATAATCCTT
+>56_1
+TACAAACGGAACTTTCTTCATAACTTC
+>57_1
+GGACGGAGAACTGATAATGGC
+>58_1
+TTCTTTGACTACATATTTTTTATT
+>59_1
+TCTTTTTTTTAATACTTATTTTCATT
+>60_1
+TACTTTTTTCTTAATTTTTTATTAAAC
+>61_1
+AATATAAAAATACAATCAACCATTGCA
+>62_1
+CAGTTTCACAAAAGATCTTTTAA
+>63_1
+GAAACAAACAACACATACCCTCTGGC
+>64_1
+AATGACACACTCTTCATCAAC
+>65_1
+AAGATGGAGTAGTTTTTT
+>66_1
+ACAACCTCAACTCATATTT
+>67_1
+TTAAACAATTTGGAATTAATT
+>68_1
+TACAAAAAATGCGAAAATTGACCCT
+>69_1
+GGACGGAGAACTGATAAGGGCA
+>70_1
+TTAAGTTTTAGACATAATCTATTACAA
+>71_1
+TTATTATCTATTTTAATTTTTCTTAA
+>72_1
+CACCGAACCGGGAAGGCGAACAAC
+>73_1
+TCCACCTATTTATCTTTTCTT
+>74_1
+TCGCCGTAAAGCCAGTCGTTCTCC
+>75_1
+TCAAAGAACAATGTAAAGCCGCGAC
+>76_1
+TGGACAAGAACCACGCGACGGGTGT
+>77_1
+CCCGAAAAGCCGAGGACGACTTA
+>78_1
+TTTTTTATCATTTTTCACCTAAAAAA
+>79_1
+TAGAACTCGAACCAGAGCTCC
+>80_1
+TCTATATTATTTTTATCAATTTTCACC
+>81_1
+TCTATTTCTTTATTTTTTTTATTAT
+>82_1
+TTTGATACCTTTATACCATACCTATT
+>83_1
+ATAAAGCTAGATTACCAAAGCAT
+>84_1
+GCCAACGACCATACCACGA
+>85_1
+CGGCACATGTTGAATTACACTCA
+>86_1
+TACTATTTTATTATACATACATACATTA
+>87_1
+TTAATGACACACGGGAAAAACACCG
+>88_1
+TAGTTTCACTACTTTATTCTTTTTA
+>89_1
+AACAGGGAGATCAACAGCGTTGACA
+>90_1
+CGATATTTTCTCCTCTGACC
+>91_1
+ACCAGCACCTTCCGACTCAACGTCAAA
+>92_1
+AAGGAATTAAAGCAATAATTCTAA
+>93_1
+TACAAAACAAACAAATTACAATCTAAA
+>94_1
+CAATTTTTAATTCCTTTTTTCTTCTT
+>95_1
+TACAGACAACACATACGGACTTAA
+>96_1
+TCTGTATTTGACTTATTACTTTCTCC
+>97_1
+TGAGCTAGAACTGCACCCACTCCA
+>98_1
+CGCCGCAAGATGAATACTCTAATGA
+>99_1
+TATTTCTTTTTTAACTTCTTTTC
+>100_1
+TTACAATCTACTATTCTTTTATTA
+>101_1
+TTTAAACACTTCCTACATCAAATTTC
+>102_1
+TGTGTAATCTTTCTACTTCTTCTAC
+>103_1
+TCTATTCATACAAAACACTAATACCC
+>104_1
+TGGAGTAGCACAGTCGTCTGAAATC
+>105_1
+AAGCACGCCTTACCACAATTTATAA
+>106_1
+CTGGAAACTATTGATCAAATT
+>107_1
+TACACAGACTTACAAAACACATCCTTC
+>108_1
+TTCAAGTAGATTGCATTTTTTAATA
+>109_1
+TTATTACATCGTCCACATATAACAAAA
+>110_1
+CAAGGCTCAGAAGAACATCACCAAGACC
+>111_1
+TGAGGAAAACAGAAAAATGAGAGACA
+>112_1
+TCAAAAAGTAATAGGGATCGTTA
+>113_1
+TAACTTTAACTTTTTTACT
+>114_1
+TATTCCGACAATACCTTCTTTAC
+>115_1
+TTTGTTTTTTACTATATTT
+>116_1
+TTCATTTTATTTTTAAATATCTTTTTT
+>117_1
+TACTCAATAGAACTCTACTCACTCATA
+>118_1
+TGAAAGGAAAAACAGGACACGGGA
+>119_1
+AAAATCGACTGCCGAAAACATTTTAA
+>120_1
+TACAGAGAAATATACAACACTCACC
+>121_1
+TCAACTGGCAAGAATTTTTGAAAATT
+>122_1
+GAGAACTTTTAATCATTTTAC
+>123_1
+TATTATCATCTCGTTCTTCCTTCTC
+>124_1
+TTTTCATTTCTTCTTCAAATCCTTT
+>125_1
+TAGTCATACATACCTAATTATACATA
+>126_1
+ATTTTACTTCATCATTTTC
+>127_1
+TCTCTTTTATTTTTATCTTTCCTT
+>128_1
+GCCGGGGCGTGAGATGTCTGCATTA
+>129_1
+AGGATTTTTAAGCCCATATGTTTCC
+>130_1
+CAAGATATGAACAAAGCAAAGACAC
+>131_1
+CAACACATGACGCGACAATTCTTG
+>132_1
+CAAATAACAAACTGAATAAACGAAA
+>133_1
+TGAGAATGACTTCTTCACGATCTCTT
+>134_1
+TCTTATTATCATTTTTTTATCCCTT
+>135_1
+TCAAATGCAAATTGGATTTATGA
+>136_1
+CCTTACTCAACATACTTAATCATACTTA
+>137_1
+TAGACTTTCTACTCATTATTAC
+>138_1
+TGAAACTGAAACTAACATACAAAATATT
+>139_1
+AAAACCCGGACAAACCATCGGAGGA
+>140_1
+TACAGACAACACATACGGACTTAAGT
+>141_1
+TATTTCAGCAACAGACTAAGACTAA
+>142_1
+AACTTTAAATTTTTAATAACCTT
+>143_1
+TATTTATAAATTTTTTCTTGAGAC
+>144_1
+TTAATATGTAATTTCATACCTCAC
+>145_1
+CACAGACTGAGGCAGAAAAAACAA
+>146_1
+TAAAGAAGAAGAATTGATTTTAAT
+>147_1
+TACTGAAAACGGGCGCATATCAGTGG
+>148_1
+TCAGTCTTTTTTTCTCTCCTA
+>149_1
+TATAATTTTATTTTATATTTTCTCT
+>150_1
+NATTCTTACTCCATTTCAATTTACT
+>151_1
+TTGTAAAACATTCTTTCTCCTGAC
+>152_1
+TAATTACCATTGCTAACTATCCA
+>153_1
+TTCTTCCTTTTATCCTCTCTTAA
+>154_1
+TCTAAACACCCACGAAAATCTCTTAC
+>155_1
+AAAAACACACAGACACAAGCAGCAAT
+>156_1
+CGGACGGTATATTTTTTAATATAA
+>157_1
+TATGGAGAAACAGCGATATAAGTCA
+>158_1
+TACAACTAACATCCTTTCTTCTTCC
+>159_1
+AACTCTCTAATTTAACTTTGTGC
+>160_1
+TCCTGAGGACGAGGGGCGTTTAGC
+>161_1
+TATTTCCAACCTTCAACCTCAAATAA
+>162_1
+TGGACGGAGAACTGATAAGGGC
+>163_1
+TTTAAGACTTATGAGCTTG
+>164_1
+TTAAAGACGCAACAACTAACATT
+>165_1
+TAGGAACTTCATACCGGTCTC
+>166_1
+CGATATTTTCTCCTCTTACC
+>167_1
+GAGGATTAAAAGAACGGTTTATAA
+>168_1
+GAATGATCGCACCACCACCTCAACGTT
+>169_1
+TTTTCTTTACCCATCTTTACTTTCCC
+>170_1
+AAGACAACAATGACATATAAGACG
+>171_1
+TAATAATTTAAATAAATATAAATTT
+>172_1
+TACTGAAACAAGGAAACACAAGC
+>173_1
+TCAGAAGAACAGAGAATTGATTTT
+>174_1
+CATACCTTAAATTATCTCTTTCTT
+>175_1
+TTCTTTTACTACATATTTTTTATTTTT
+>176_1
+AAAAAATATCTTTTTTAACTCGTGGCC
+>177_1
+TAACAAATAGAACGTTCTAATTTAAA
+>178_1
+TAGTTACCTTCATATCTCTCTTTA
+>179_1
+TAAAATTGTAATATTTAAATAATAT
+>180_1
+AAAAGGAAAAACAGAAAAATTGGG
+>181_1
+AGATGTTGATCTAAACTCTCCCA
+>182_1
+TACCTCTTTATTAACCTCCACCTCTA
+>183_1
+TTTCCGACAAATACACCATCTTC
+>184_1
+ACAAATCATAAATTTTTTTTTACT
+>185_1
+GACGAAACGCAACAACAAAATGGACG
+>186_1
+TACAAATTTTTTTTTCTTTCTTAT
+>187_1
+TACACCTCTTTTTACTTTTTTATT
+>188_1
+TATGGATTATTTCAAAATTTTTTTTT
+>189_1
+TTCTAGCACAACACGCACACATATA
+>190_1
+TAACTACTTTTACATTAATACTAA
+>191_1
+TCTCATCTTACAATTTTTTAAAACTT
+>192_1
+TTCTTGGACTACACATTTTTTATTGTTTTA
+>193_1
+TACACACTCATCAACCAAAGTACGTA
+>194_1
+TACTATATACTTCTTCAAATCACA
+>195_1
+TCAGAGTTCTACAGGTCCTACGATT
+>196_1
+TGATTTACTTACATTCTTTTTTT
+>197_1
+CCATATATGACTGACTCATTTCAC
+>198_1
+GAAGAGGAGGAGGAGTTTGTAAG
+>199_1
+AAAGACAAAAGAAATACAGGCACT
+>200_1
+TACAAGACTAAAACAAACGTGAAGT
+>201_1
+TAACGGAGCACGAGAACGAAGTGG
+>202_1
+CTTCTTTTACTACATATTTTTTATTTTTTTA
+>203_1
+TAATAAGAAACTGTTCAAACAATCCAC
+>204_1
+TGAGCGGAGAACCAGAGTTGATGAGC
+>205_1
+TATTATTTTTTTATTCCATTCATAT
+>206_1
+TTTATTACTTAGTCATAATTCCAA
+>207_1
+TTTTATATTTCCTTATATCTTTACTA
+>208_1
+AACGGGGAATAAGGGTTCG
+>209_1
+AATCTACAATTTCCATTACGACTCC
+>210_1
+CCGACCGAGCAAATAAACACAGGAACG
+>211_1
+TCCACAACAACTCTATCTAAAGCATT
+>212_1
+TTCTTGATAACGCATCTTCTACAT
+>213_1
+TGCTTGGACTACATATGGTTGAGGG
+>214_1
+CAGATTCACTGATTTTCTTACGCC
+>215_1
+TTTGTTTTTCATTTTTTTATCTTT
+>216_1
+CTATATTTTCTCTCTTACC
+>217_1
+TAACCTTGCAGAACTATACGATTCAAA
+>218_1
+TAAGAAACTGAGCTAACGCAATGTACC
+>219_1
+TTCTTTTACTACATATTTTTTATTTTTTTA
+>220_1
+TATCTATCTTTGATCTTCTTTTCA
+>221_1
+TAATAAATTATTAAATAAAAAAAAAA
+>222_1
+TTTTTTATCAATTTTCACCATTCAT
+>223_1
+TATTTCACTTTATACTTCCTTAA
+>224_1
+TAGTTTTAAATATTTCTTTTTTTC
+>225_1
+TTCTTTTACTACATATTGTTTATTTTTTTA
+>226_1
+GAGAATAAATATTTCAATGGTCTATTG
+>227_1
+CGATATTTTCTCCTCTTACCT
+>228_1
+CACGACTTTATTCTTTTTATCTCA
+>229_1
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA
+>230_1
+CATAATATAAACTTATCTT
+>231_1
+ATGAAATTCGAACAATACGTC
+>232_1
+AACAACTGCAAACATCTACCACA
+>233_1
+TAAAAATAATTGTCTTTAATTTCA
+>234_1
+CGCAACCAGCAGCAACTCCTAGCAT
+>235_1
+ATTATTAATAAATTATTATAA
+>236_1
+CATTAATTCATCCATTTAAACTAA
+>237_1
+TCTTATTTTAATCTTCCAATTTC
+>238_1
+CTAGACAAGATGCTATAAATTTTAAA
+>239_1
+TGACCAAAGACAAACAAACAATAAATA
+>240_1
+TTTTTATCAATTTTCACCATTC
+>241_1
+TAAGTTTTTAATCATTTTTTTT
+>242_1
+TAATCAAAAAACTCTTCATTTTTA
+>243_1
+TACAAACGGAACTTTCGTCATAA
+>244_1
+TTTTCTTTTTTTCATTTTCTCTTTTA
+>245_1
+TAGCCTTTACTAGGCTTTTTCTAA
+>246_1
+TTAGTATTAATCTTCACTTAA
+>247_1
+TAAAATAAACCAAAACCCAAAAAT
diff -r 000000000000 -r a8aacccd79a3 test-data/output.sorted.faw
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.sorted.faw Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,494 @@
+>100_1
+>101_1
+>102_1
+>103_1
+>104_1
+>105_1
+>106_1
+>107_1
+>108_1
+>109_1
+>10_1
+>110_1
+>111_1
+>112_1
+>113_1
+>114_1
+>115_1
+>116_1
+>117_1
+>118_1
+>119_1
+>11_1
+>120_1
+>121_1
+>122_1
+>123_1
+>124_1
+>125_1
+>126_1
+>127_1
+>128_1
+>129_1
+>12_1
+>130_1
+>131_1
+>132_1
+>133_1
+>134_1
+>135_1
+>136_1
+>137_1
+>138_1
+>139_1
+>13_1
+>140_1
+>141_1
+>142_1
+>143_1
+>144_1
+>145_1
+>146_1
+>147_1
+>148_1
+>149_1
+>14_1
+>150_1
+>151_1
+>152_1
+>153_1
+>154_1
+>155_1
+>156_1
+>157_1
+>158_1
+>159_1
+>15_1
+>160_1
+>161_1
+>162_1
+>163_1
+>164_1
+>165_1
+>166_1
+>167_1
+>168_1
+>169_1
+>16_1
+>170_1
+>171_1
+>172_1
+>173_1
+>174_1
+>175_1
+>176_1
+>177_1
+>178_1
+>179_1
+>17_1
+>180_1
+>181_1
+>182_1
+>183_1
+>184_1
+>185_1
+>186_1
+>187_1
+>188_1
+>189_1
+>18_1
+>190_1
+>191_1
+>192_1
+>193_1
+>194_1
+>195_1
+>196_1
+>197_1
+>198_1
+>199_1
+>19_1
+>1_2
+>200_1
+>201_1
+>202_1
+>203_1
+>204_1
+>205_1
+>206_1
+>207_1
+>208_1
+>209_1
+>20_1
+>210_1
+>211_1
+>212_1
+>213_1
+>214_1
+>215_1
+>216_1
+>217_1
+>218_1
+>219_1
+>21_1
+>220_1
+>221_1
+>222_1
+>223_1
+>224_1
+>225_1
+>226_1
+>227_1
+>228_1
+>229_1
+>22_1
+>230_1
+>231_1
+>232_1
+>233_1
+>234_1
+>235_1
+>236_1
+>237_1
+>238_1
+>239_1
+>23_1
+>240_1
+>241_1
+>242_1
+>243_1
+>244_1
+>245_1
+>246_1
+>247_1
+>24_1
+>25_1
+>26_1
+>27_1
+>28_1
+>29_1
+>2_2
+>30_1
+>31_1
+>32_1
+>33_1
+>34_1
+>35_1
+>36_1
+>37_1
+>38_1
+>39_1
+>3_2
+>40_1
+>41_1
+>42_1
+>43_1
+>44_1
+>45_1
+>46_1
+>47_1
+>48_1
+>49_1
+>4_1
+>50_1
+>51_1
+>52_1
+>53_1
+>54_1
+>55_1
+>56_1
+>57_1
+>58_1
+>59_1
+>5_1
+>60_1
+>61_1
+>62_1
+>63_1
+>64_1
+>65_1
+>66_1
+>67_1
+>68_1
+>69_1
+>6_1
+>70_1
+>71_1
+>72_1
+>73_1
+>74_1
+>75_1
+>76_1
+>77_1
+>78_1
+>79_1
+>7_1
+>80_1
+>81_1
+>82_1
+>83_1
+>84_1
+>85_1
+>86_1
+>87_1
+>88_1
+>89_1
+>8_1
+>90_1
+>91_1
+>92_1
+>93_1
+>94_1
+>95_1
+>96_1
+>97_1
+>98_1
+>99_1
+>9_1
+AAAAAATATCTTTTTTAACTCGTGGCC
+AAAAACACACAGACACAAGCAGCAAT
+AAAACCCGGACAAACCATCGGAGGA
+AAAAGGAAAAACAGAAAAATTGGG
+AAAATCGACTGCCGAAAACATTTTAA
+AAAGACAAAAGAAATACAGGCACT
+AAATTGCAAAGATGGAAAATAAAACT
+AACAACTGCAAACATCTACCACA
+AACAGGAAAAACAGAAGGATTTCTA
+AACAGGGAGATCAACAGCGTTGACA
+AACATTTTATCAATTATACATTA
+AACGGGGAATAAGGGTTCG
+AACTCTCTAATTTAACTTTGTGC
+AACTTTAAATTTTTAATAACCTT
+AAGACAACAATGACATATAAGACG
+AAGATGGAGTAGTTTTTT
+AAGCACGCCTTACCACAATTTATAA
+AAGGAATTAAAGCAATAATTCTAA
+AAGTGAAGAAGTAGTTTTT
+AATATAAAAATACAATCAACCATTGCA
+AATCTACAATTTCCATTACGACTCC
+AATGACACACTCTTCATCAAC
+AATGTCACTTGAAGAATTCACGT
+AATTGCAACAGAGACTGGAA
+AATTTATTTAATTTATATTCTAACTAA
+ACAAATCATAAATTTTTTTTTACT
+ACAACCTCAACTCATATTT
+ACAGCAGGACGGTGATCA
+ACCAGCACCTTCCGACTCAACGTCAAA
+ACTAAACTTTTCTTACCATATTTCTA
+AGAACAATTAAATAAAATAGCATA
+AGATGTTGATCTAAACTCTCCCA
+AGGAATATGATGAAATAAAAAAAT
+AGGATTTTTAAGCCCATATGTTTCC
+ATAAAGCTAGATTACCAAAGCAT
+ATGAAATTCGAACAATACGTC
+ATGTTATTTACTTTTTCCCCTTATA
+ATTATTAATAAATTATTATAA
+ATTTTACTTCATCATTTTC
+CAAACGGAACAAGACATCACCATC
+CAAATAACAAACTGAATAAACGAAA
+CAACACATGACGCGACAATTCTTG
+CAAGAATACAAAAAATACTAATTA
+CAAGATATGAACAAAGCAAAGACAC
+CAAGGCTCAGAAGAACATCACCAAGACC
+CAATTTTTAATTCCTTTTTTCTTCTT
+CACAGACTGAGGCAGAAAAAACAA
+CACCGAACCGGGAAGGCGAACAAC
+CACGACTTTATTCTTTTTATCTCA
+CAGATTCACTGATTTTCTTACGCC
+CAGTTTCACAAAAGATCTTTTAA
+CATAATATAAACTTATCTT
+CATACCTACAAAAAAGCTTCTCTTAC
+CATACCTTAAATTATCTCTTTCTT
+CATTAATTCATCCATTTAAACTAA
+CCAGAAAACAATACAACATCCTCA
+CCATATATGACTGACTCATTTCAC
+CCCGAAAAGCCGAGGACGACTTA
+CCGACCGAGCAAATAAACACAGGAACG
+CCGCGATCTGCTTATTTATAATCTT
+CCTTACTCAACATACTTAATCATACTTA
+CGATATTTTCTCCTCGTACC
+CGATATTTTCTCCTCTGACC
+CGATATTTTCTCCTCTTACC
+CGATATTTTCTCCTCTTACCT
+CGCAACCAGCAGCAACTCCTAGCAT
+CGCCGCAAGATGAATACTCTAATGA
+CGGAAAAGAATGTAGACCATTTAA
+CGGACGGTATATTTTTTAATATAA
+CGGCACATGTTGAATTACACTCA
+CTAGACAAGATGCTATAAATTTTAAA
+CTATATTTTCTCTCTTACC
+CTGGAAACTATTGATCAAATT
+CTTCTTTTACTACATATTTTTTATTTTTTTA
+GAAACAAACAACACATACCCTCTGGC
+GAAACCATTATCTTATCTTTATACA
+GAAAGGAAGGGAAGAAAGCGAAAGGA
+GAACAATTTTTCAATTTTTTACATTA
+GAAGAGGAGGAGGAGTTTGTAAG
+GAATGATCGCACCACCACCTCAACGTT
+GACGAAACGCAACAACAAAATGGACG
+GAGAACTTTTAATCATTTTAC
+GAGAATAAATATTTCAATGGTCTATTG
+GAGGATTAAAAGAACGGTTTATAA
+GCAGATAGAAATCAATACAAAAATC
+GCCAACGACCATACCACGA
+GCCGGGGCGTGAGATGTCTGCATTA
+GGACGGAGAACTGATAAGGGCA
+GGACGGAGAACTGATAATGGC
+GGAGATTGTAGAACGAAAGGAAAAT
+GGTATCTTTATATTTTAATTTTCTT
+NATTCTTACTCCATTTCAATTTACT
+TAAAAATAATTGTCTTTAATTTCA
+TAAAATAAACCAAAACCCAAAAAT
+TAAAATAAATAAGTCCGACGACAA
+TAAAATTGTAATATTTAAATAATAT
+TAAAGAAGAAGAATTGATTTTAAT
+TAACAAATAGAACGTTCTAATTTAAA
+TAACATAAATTTTAATCATAAATTG
+TAACCTTGCAGAACTATACGATTCAAA
+TAACGGAGCACGAGAACGAAGTGG
+TAACTACTTTTACATTAATACTAA
+TAACTTTAACTTTTTTACT
+TAAGAAACTGAGCTAACGCAATGTACC
+TAAGTTTTTAATCATTTTTTTT
+TAATAAATTATTAAATAAAAAAAAAA
+TAATAAGAAACTGTTCAAACAATCCAC
+TAATAATTTAAATAAATATAAATTT
+TAATCAAAAAACTCTTCATTTTTA
+TAATTACCATTGCTAACTATCCA
+TACAAAAAATGCGAAAATTGACCCT
+TACAAAACAAACAAATTACAATCTAAA
+TACAAACGGAACTTTCGTCATAA
+TACAAACGGAACTTTCTTCATAACTTC
+TACAAACGTAATTTTCGCATAACATC
+TACAAATTTTTTTTTCTTTCTTAT
+TACAACTAACATCCTTTCTTCTTCC
+TACAAGACTAAAACAAACGTGAAGT
+TACACACTCATCAACCAAAGGACG
+TACACACTCATCAACCAAAGTACGTA
+TACACAGACTTACAAAACACATCCTTC
+TACACCTCTTTTTACTTTTTTATT
+TACAGACAACACATACGGACTTAA
+TACAGACAACACATACGGACTTAAGT
+TACAGAGAAATATACAACACTCACC
+TACCTCTTTATTAACCTCCACCTCTA
+TACTATATACTTCTTCAAATCACA
+TACTATTTTATTATACATACATACATTA
+TACTCAATAGAACTCTACTCACTCATA
+TACTGAAAACGGGCGCATATCAGTGG
+TACTGAAACAAGGAAACACAAGC
+TACTTTTTTCTTAATTTTTTATTAAAC
+TAGAACTCGAACCAGAGCTCC
+TAGACTTTCTACTCATTATTAC
+TAGCCTTTACTAGGCTTTTTCTAA
+TAGCGAGATGGACCAACGTGCTGT
+TAGGAACTTCATACCGGTCTC
+TAGGTACTTACCTTTTTTTTACACAA
+TAGTCATACATACCTAATTATACATA
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA
+TAGTTACCTTCATATCTCTCTTTA
+TAGTTTCACTACTTTATTCTTTTTA
+TAGTTTTAAATATTTCTTTTTTTC
+TATAATTTTATTTTATATTTTCTCT
+TATATAAATCTTCAACATCAA
+TATATTGCCTCCCCATAATCCTT
+TATCTATCTTTGATCTTCTTTTCA
+TATCTGATCAACAATCTTTTCCCAT
+TATGGAGAAACAGCGATATAAGTCA
+TATGGATTATTTCAAAATTTTTTTTT
+TATTATACATAGAATAACAAATCTTT
+TATTATCATCTCGTTCTTCCTTCTC
+TATTATTTTTTTATTCCATTCATAT
+TATTCAATCACTCCATTATATATAACA
+TATTCCGACAATACCTTCTTTAC
+TATTTATAAATTTTTTCTTGAGAC
+TATTTCACTTTATACTTCCTTAA
+TATTTCAGCAACAGACTAAGACTAA
+TATTTCCAACCTTCAACCTCAAATAA
+TATTTCTTTTTTAACTTCTTTTC
+TCAAAAAGTAATAGGGATCGTTA
+TCAAAGAACAATGTAAAGCCGCGAC
+TCAAATGCAAATTGGATTTATGA
+TCAACTGGCAAGAATTTTTGAAAATT
+TCAAGCCTTTTGAAGAACTGACCTAAA
+TCAGAAGAACAGAGAATTGATTTT
+TCAGAGTTCTACAGGTCCTACGATT
+TCAGTCTTTTTTTCTCTCCTA
+TCATTACACTTCTTACAAAAC
+TCCACAACAACTCTATCTAAAGCATT
+TCCACCTATTTATCTTTTCTT
+TCCCAACCCTCGAGCATCATTTTC
+TCCGAAAAATCGTAGGACCCGGGCA
+TCCGAAAACAAGGCCCGTCGCT
+TCCTGAGGACGAGGGGCGTTTAGC
+TCGCCGTAAAGCCAGTCGTTCTCC
+TCTAAACACCCACGAAAATCTCTTAC
+TCTAGTCTGAGCGTAGTACCAGATTG
+TCTATATTATTTTTATCAATTTTCACC
+TCTATTCATACAAAACACTAATACCC
+TCTATTTCTTTATTTTTTTTATTAT
+TCTCATCTTACAATTTTTTAAAACTT
+TCTCTTTTATTTTTATCTTTCCTT
+TCTGTATTTGACTTATTACTTTCTCC
+TCTTATTATCATTTTTTTATCCCTT
+TCTTATTTTAATCTTCCAATTTC
+TCTTCTATATAATCCTTTATTATAA
+TCTTTTTTTTAATACTTATTTTCATT
+TGAAACTGAAACTAACATACAAAATATT
+TGAAAGGAAAAACAGGACACGGGA
+TGACCAAAGACAAACAAACAATAAATA
+TGAGAATGACTTCTTCACGATCTCTT
+TGAGCGGAGAACCAGAGTTGATGAGC
+TGAGCTAGAACTGCACCCACTCCA
+TGAGGAAAACAGAAAAATGAGAGACA
+TGATGACGGGCAGCAGGGATTTTC
+TGATTTACTTACATTCTTTTTTT
+TGCTTGGACTACATATGGTTGAGGG
+TGCTTGGACTACATATGGTTGAGTG
+TGCTTTTACTACATATTTTTTATTTTTTTA
+TGGAATGTAAAGAAGTATGGAG
+TGGACAAGAACCACGCGACGGGTGT
+TGGACGGAGAACTGATAAGGGC
+TGGAGTAGCACAGTCGTCTGAAATC
+TGTGTAATCTTTCTACTTCTTCTAC
+TTAAACAATTTGGAATTAATT
+TTAAAGACGCAACAACTAACATT
+TTAAGTTTTAGACATAATCTATTACAA
+TTAATATGTAATTTCATACCTCAC
+TTAATGACACACGGGAAAAACACCG
+TTACAATCTACTATTCTTTTATTA
+TTACTAGATCCACCCTCATTA
+TTAGTATTAATCTTCACTTAA
+TTATAATCACGGCACCCTATACA
+TTATTACATCGTCCACATATAACAAAA
+TTATTATCTATTTTAATTTTTCTTAA
+TTCAAGTAGATTGCATTTTTTAATA
+TTCATTTTATTTTTAAATATCTTTTTT
+TTCTAGCACAACACGCACACATATA
+TTCTTCCTTTTATCCTCTCTTAA
+TTCTTGATAACGCATCTTCTACAT
+TTCTTGGACTACACATTTTTTATTGTTTTA
+TTCTTTGACTACATATTTTTTATT
+TTCTTTTACTACATATTGTTTATTTTTTTA
+TTCTTTTACTACATATTTTTTATTTTT
+TTCTTTTACTACATATTTTTTATTTTTTTA
+TTGATTCTTCTTTTTCACAAAA
+TTGTAAAACATTCTTTCTCCTGAC
+TTTAAACACTTCCTACATCAAATTTC
+TTTAAGACTTATGAGCTTG
+TTTACCAGAGGAGTCGAGTTTTT
+TTTATTACAACCCTATCTTACCTCAA
+TTTATTACTTAGTCATAATTCCAA
+TTTCCGACAAATACACCATCTTC
+TTTGATACCTTTATACCATACCTATT
+TTTGTTTTTCATTTTTTTATCTTT
+TTTGTTTTTTACTATATTT
+TTTTATATTTCCTTATATCTTTACTA
+TTTTCATTTCTTCTTCAAATCCTTT
+TTTTCTTTACCCATCTTTACTTTCCC
+TTTTCTTTTTTTCATTTTCTCTTTTA
+TTTTTAACTCATTTTACAATTAAAC
+TTTTTAACTCCCATCATTTTTCCTC
+TTTTTATCAATTTTCACCATTC
+TTTTTTATCAATTTTCACCATTCAT
+TTTTTTATCATTTTTCACCTAAAAAA
+TTTTTTTTTTGTTTTTATTTTTATCAT
diff -r 000000000000 -r a8aacccd79a3 test-data/output.sorted.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.sorted.tab Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,247 @@
+AAAAAATATCTTTTTTAACTCGTGGCC 1
+AAAAACACACAGACACAAGCAGCAAT 1
+AAAACCCGGACAAACCATCGGAGGA 1
+AAAAGGAAAAACAGAAAAATTGGG 1
+AAAATCGACTGCCGAAAACATTTTAA 1
+AAAGACAAAAGAAATACAGGCACT 1
+AAATTGCAAAGATGGAAAATAAAACT 1
+AACAACTGCAAACATCTACCACA 1
+AACAGGAAAAACAGAAGGATTTCTA 1
+AACAGGGAGATCAACAGCGTTGACA 1
+AACATTTTATCAATTATACATTA 1
+AACGGGGAATAAGGGTTCG 1
+AACTCTCTAATTTAACTTTGTGC 1
+AACTTTAAATTTTTAATAACCTT 1
+AAGACAACAATGACATATAAGACG 1
+AAGATGGAGTAGTTTTTT 1
+AAGCACGCCTTACCACAATTTATAA 1
+AAGGAATTAAAGCAATAATTCTAA 1
+AAGTGAAGAAGTAGTTTTT 1
+AATATAAAAATACAATCAACCATTGCA 1
+AATCTACAATTTCCATTACGACTCC 1
+AATGACACACTCTTCATCAAC 1
+AATGTCACTTGAAGAATTCACGT 1
+AATTGCAACAGAGACTGGAA 1
+AATTTATTTAATTTATATTCTAACTAA 1
+ACAAATCATAAATTTTTTTTTACT 1
+ACAACCTCAACTCATATTT 1
+ACAGCAGGACGGTGATCA 2
+ACCAGCACCTTCCGACTCAACGTCAAA 1
+ACTAAACTTTTCTTACCATATTTCTA 1
+AGAACAATTAAATAAAATAGCATA 1
+AGATGTTGATCTAAACTCTCCCA 1
+AGGAATATGATGAAATAAAAAAAT 1
+AGGATTTTTAAGCCCATATGTTTCC 1
+ATAAAGCTAGATTACCAAAGCAT 1
+ATGAAATTCGAACAATACGTC 1
+ATGTTATTTACTTTTTCCCCTTATA 1
+ATTATTAATAAATTATTATAA 1
+ATTTTACTTCATCATTTTC 1
+CAAACGGAACAAGACATCACCATC 1
+CAAATAACAAACTGAATAAACGAAA 1
+CAACACATGACGCGACAATTCTTG 1
+CAAGAATACAAAAAATACTAATTA 1
+CAAGATATGAACAAAGCAAAGACAC 1
+CAAGGCTCAGAAGAACATCACCAAGACC 1
+CAATTTTTAATTCCTTTTTTCTTCTT 1
+CACAGACTGAGGCAGAAAAAACAA 1
+CACCGAACCGGGAAGGCGAACAAC 1
+CACGACTTTATTCTTTTTATCTCA 1
+CAGATTCACTGATTTTCTTACGCC 1
+CAGTTTCACAAAAGATCTTTTAA 1
+CATAATATAAACTTATCTT 1
+CATACCTACAAAAAAGCTTCTCTTAC 1
+CATACCTTAAATTATCTCTTTCTT 1
+CATTAATTCATCCATTTAAACTAA 1
+CCAGAAAACAATACAACATCCTCA 1
+CCATATATGACTGACTCATTTCAC 1
+CCCGAAAAGCCGAGGACGACTTA 1
+CCGACCGAGCAAATAAACACAGGAACG 1
+CCGCGATCTGCTTATTTATAATCTT 1
+CCTTACTCAACATACTTAATCATACTTA 1
+CGATATTTTCTCCTCGTACC 1
+CGATATTTTCTCCTCTGACC 1
+CGATATTTTCTCCTCTTACC 1
+CGATATTTTCTCCTCTTACCT 1
+CGCAACCAGCAGCAACTCCTAGCAT 1
+CGCCGCAAGATGAATACTCTAATGA 1
+CGGAAAAGAATGTAGACCATTTAA 1
+CGGACGGTATATTTTTTAATATAA 1
+CGGCACATGTTGAATTACACTCA 1
+CTAGACAAGATGCTATAAATTTTAAA 1
+CTATATTTTCTCTCTTACC 1
+CTGGAAACTATTGATCAAATT 1
+CTTCTTTTACTACATATTTTTTATTTTTTTA 1
+GAAACAAACAACACATACCCTCTGGC 1
+GAAACCATTATCTTATCTTTATACA 1
+GAAAGGAAGGGAAGAAAGCGAAAGGA 1
+GAACAATTTTTCAATTTTTTACATTA 1
+GAAGAGGAGGAGGAGTTTGTAAG 1
+GAATGATCGCACCACCACCTCAACGTT 1
+GACGAAACGCAACAACAAAATGGACG 1
+GAGAACTTTTAATCATTTTAC 1
+GAGAATAAATATTTCAATGGTCTATTG 1
+GAGGATTAAAAGAACGGTTTATAA 1
+GCAGATAGAAATCAATACAAAAATC 1
+GCCAACGACCATACCACGA 1
+GCCGGGGCGTGAGATGTCTGCATTA 1
+GGACGGAGAACTGATAAGGGCA 1
+GGACGGAGAACTGATAATGGC 1
+GGAGATTGTAGAACGAAAGGAAAAT 1
+GGTATCTTTATATTTTAATTTTCTT 1
+NATTCTTACTCCATTTCAATTTACT 1
+TAAAAATAATTGTCTTTAATTTCA 1
+TAAAATAAACCAAAACCCAAAAAT 1
+TAAAATAAATAAGTCCGACGACAA 1
+TAAAATTGTAATATTTAAATAATAT 1
+TAAAGAAGAAGAATTGATTTTAAT 1
+TAACAAATAGAACGTTCTAATTTAAA 1
+TAACATAAATTTTAATCATAAATTG 1
+TAACCTTGCAGAACTATACGATTCAAA 1
+TAACGGAGCACGAGAACGAAGTGG 1
+TAACTACTTTTACATTAATACTAA 1
+TAACTTTAACTTTTTTACT 1
+TAAGAAACTGAGCTAACGCAATGTACC 1
+TAAGTTTTTAATCATTTTTTTT 1
+TAATAAATTATTAAATAAAAAAAAAA 1
+TAATAAGAAACTGTTCAAACAATCCAC 1
+TAATAATTTAAATAAATATAAATTT 1
+TAATCAAAAAACTCTTCATTTTTA 1
+TAATTACCATTGCTAACTATCCA 1
+TACAAAAAATGCGAAAATTGACCCT 1
+TACAAAACAAACAAATTACAATCTAAA 1
+TACAAACGGAACTTTCGTCATAA 1
+TACAAACGGAACTTTCTTCATAACTTC 1
+TACAAACGTAATTTTCGCATAACATC 1
+TACAAATTTTTTTTTCTTTCTTAT 1
+TACAACTAACATCCTTTCTTCTTCC 1
+TACAAGACTAAAACAAACGTGAAGT 1
+TACACACTCATCAACCAAAGGACG 1
+TACACACTCATCAACCAAAGTACGTA 1
+TACACAGACTTACAAAACACATCCTTC 1
+TACACCTCTTTTTACTTTTTTATT 1
+TACAGACAACACATACGGACTTAA 1
+TACAGACAACACATACGGACTTAAGT 1
+TACAGAGAAATATACAACACTCACC 1
+TACCTCTTTATTAACCTCCACCTCTA 1
+TACTATATACTTCTTCAAATCACA 1
+TACTATTTTATTATACATACATACATTA 1
+TACTCAATAGAACTCTACTCACTCATA 1
+TACTGAAAACGGGCGCATATCAGTGG 1
+TACTGAAACAAGGAAACACAAGC 1
+TACTTTTTTCTTAATTTTTTATTAAAC 1
+TAGAACTCGAACCAGAGCTCC 1
+TAGACTTTCTACTCATTATTAC 1
+TAGCCTTTACTAGGCTTTTTCTAA 1
+TAGCGAGATGGACCAACGTGCTGT 1
+TAGGAACTTCATACCGGTCTC 1
+TAGGTACTTACCTTTTTTTTACACAA 1
+TAGTCATACATACCTAATTATACATA 1
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA 1
+TAGTTACCTTCATATCTCTCTTTA 1
+TAGTTTCACTACTTTATTCTTTTTA 1
+TAGTTTTAAATATTTCTTTTTTTC 1
+TATAATTTTATTTTATATTTTCTCT 1
+TATATAAATCTTCAACATCAA 1
+TATATTGCCTCCCCATAATCCTT 1
+TATCTATCTTTGATCTTCTTTTCA 1
+TATCTGATCAACAATCTTTTCCCAT 1
+TATGGAGAAACAGCGATATAAGTCA 1
+TATGGATTATTTCAAAATTTTTTTTT 1
+TATTATACATAGAATAACAAATCTTT 1
+TATTATCATCTCGTTCTTCCTTCTC 1
+TATTATTTTTTTATTCCATTCATAT 1
+TATTCAATCACTCCATTATATATAACA 1
+TATTCCGACAATACCTTCTTTAC 1
+TATTTATAAATTTTTTCTTGAGAC 1
+TATTTCACTTTATACTTCCTTAA 1
+TATTTCAGCAACAGACTAAGACTAA 1
+TATTTCCAACCTTCAACCTCAAATAA 1
+TATTTCTTTTTTAACTTCTTTTC 1
+TCAAAAAGTAATAGGGATCGTTA 1
+TCAAAGAACAATGTAAAGCCGCGAC 1
+TCAAATGCAAATTGGATTTATGA 1
+TCAACTGGCAAGAATTTTTGAAAATT 1
+TCAAGCCTTTTGAAGAACTGACCTAAA 1
+TCAGAAGAACAGAGAATTGATTTT 1
+TCAGAGTTCTACAGGTCCTACGATT 1
+TCAGTCTTTTTTTCTCTCCTA 1
+TCATTACACTTCTTACAAAAC 1
+TCCACAACAACTCTATCTAAAGCATT 1
+TCCACCTATTTATCTTTTCTT 1
+TCCCAACCCTCGAGCATCATTTTC 1
+TCCGAAAAATCGTAGGACCCGGGCA 1
+TCCGAAAACAAGGCCCGTCGCT 1
+TCCTGAGGACGAGGGGCGTTTAGC 1
+TCGCCGTAAAGCCAGTCGTTCTCC 1
+TCTAAACACCCACGAAAATCTCTTAC 1
+TCTAGTCTGAGCGTAGTACCAGATTG 1
+TCTATATTATTTTTATCAATTTTCACC 1
+TCTATTCATACAAAACACTAATACCC 1
+TCTATTTCTTTATTTTTTTTATTAT 1
+TCTCATCTTACAATTTTTTAAAACTT 1
+TCTCTTTTATTTTTATCTTTCCTT 1
+TCTGTATTTGACTTATTACTTTCTCC 1
+TCTTATTATCATTTTTTTATCCCTT 1
+TCTTATTTTAATCTTCCAATTTC 1
+TCTTCTATATAATCCTTTATTATAA 1
+TCTTTTTTTTAATACTTATTTTCATT 1
+TGAAACTGAAACTAACATACAAAATATT 1
+TGAAAGGAAAAACAGGACACGGGA 1
+TGACCAAAGACAAACAAACAATAAATA 1
+TGAGAATGACTTCTTCACGATCTCTT 1
+TGAGCGGAGAACCAGAGTTGATGAGC 1
+TGAGCTAGAACTGCACCCACTCCA 1
+TGAGGAAAACAGAAAAATGAGAGACA 1
+TGATGACGGGCAGCAGGGATTTTC 1
+TGATTTACTTACATTCTTTTTTT 1
+TGCTTGGACTACATATGGTTGAGGG 1
+TGCTTGGACTACATATGGTTGAGTG 2
+TGCTTTTACTACATATTTTTTATTTTTTTA 1
+TGGAATGTAAAGAAGTATGGAG 2
+TGGACAAGAACCACGCGACGGGTGT 1
+TGGACGGAGAACTGATAAGGGC 1
+TGGAGTAGCACAGTCGTCTGAAATC 1
+TGTGTAATCTTTCTACTTCTTCTAC 1
+TTAAACAATTTGGAATTAATT 1
+TTAAAGACGCAACAACTAACATT 1
+TTAAGTTTTAGACATAATCTATTACAA 1
+TTAATATGTAATTTCATACCTCAC 1
+TTAATGACACACGGGAAAAACACCG 1
+TTACAATCTACTATTCTTTTATTA 1
+TTACTAGATCCACCCTCATTA 1
+TTAGTATTAATCTTCACTTAA 1
+TTATAATCACGGCACCCTATACA 1
+TTATTACATCGTCCACATATAACAAAA 1
+TTATTATCTATTTTAATTTTTCTTAA 1
+TTCAAGTAGATTGCATTTTTTAATA 1
+TTCATTTTATTTTTAAATATCTTTTTT 1
+TTCTAGCACAACACGCACACATATA 1
+TTCTTCCTTTTATCCTCTCTTAA 1
+TTCTTGATAACGCATCTTCTACAT 1
+TTCTTGGACTACACATTTTTTATTGTTTTA 1
+TTCTTTGACTACATATTTTTTATT 1
+TTCTTTTACTACATATTGTTTATTTTTTTA 1
+TTCTTTTACTACATATTTTTTATTTTT 1
+TTCTTTTACTACATATTTTTTATTTTTTTA 1
+TTGATTCTTCTTTTTCACAAAA 1
+TTGTAAAACATTCTTTCTCCTGAC 1
+TTTAAACACTTCCTACATCAAATTTC 1
+TTTAAGACTTATGAGCTTG 1
+TTTACCAGAGGAGTCGAGTTTTT 1
+TTTATTACAACCCTATCTTACCTCAA 1
+TTTATTACTTAGTCATAATTCCAA 1
+TTTCCGACAAATACACCATCTTC 1
+TTTGATACCTTTATACCATACCTATT 1
+TTTGTTTTTCATTTTTTTATCTTT 1
+TTTGTTTTTTACTATATTT 1
+TTTTATATTTCCTTATATCTTTACTA 1
+TTTTCATTTCTTCTTCAAATCCTTT 1
+TTTTCTTTACCCATCTTTACTTTCCC 1
+TTTTCTTTTTTTCATTTTCTCTTTTA 1
+TTTTTAACTCATTTTACAATTAAAC 1
+TTTTTAACTCCCATCATTTTTCCTC 1
+TTTTTATCAATTTTCACCATTC 1
+TTTTTTATCAATTTTCACCATTCAT 1
+TTTTTTATCATTTTTCACCTAAAAAA 1
+TTTTTTTTTTGTTTTTATTTTTATCAT 1
diff -r 000000000000 -r a8aacccd79a3 test-data/output.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.tab Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,247 @@
+ACAGCAGGACGGTGATCA 2
+TGCTTGGACTACATATGGTTGAGTG 2
+TGGAATGTAAAGAAGTATGGAG 2
+AATTTATTTAATTTATATTCTAACTAA 1
+TTATAATCACGGCACCCTATACA 1
+TCCGAAAAATCGTAGGACCCGGGCA 1
+TCCCAACCCTCGAGCATCATTTTC 1
+TTGATTCTTCTTTTTCACAAAA 1
+AGAACAATTAAATAAAATAGCATA 1
+CCAGAAAACAATACAACATCCTCA 1
+TCTAGTCTGAGCGTAGTACCAGATTG 1
+TTTTTAACTCCCATCATTTTTCCTC 1
+GCAGATAGAAATCAATACAAAAATC 1
+AAGTGAAGAAGTAGTTTTT 1
+AATGTCACTTGAAGAATTCACGT 1
+TTTACCAGAGGAGTCGAGTTTTT 1
+GGTATCTTTATATTTTAATTTTCTT 1
+TTACTAGATCCACCCTCATTA 1
+TATATAAATCTTCAACATCAA 1
+GAAACCATTATCTTATCTTTATACA 1
+GGAGATTGTAGAACGAAAGGAAAAT 1
+ACTAAACTTTTCTTACCATATTTCTA 1
+TCAAGCCTTTTGAAGAACTGACCTAAA 1
+TAACATAAATTTTAATCATAAATTG 1
+TAGCGAGATGGACCAACGTGCTGT 1
+TACAAACGTAATTTTCGCATAACATC 1
+CGGAAAAGAATGTAGACCATTTAA 1
+GAAAGGAAGGGAAGAAAGCGAAAGGA 1
+CAAGAATACAAAAAATACTAATTA 1
+CATACCTACAAAAAAGCTTCTCTTAC 1
+TAAAATAAATAAGTCCGACGACAA 1
+AGGAATATGATGAAATAAAAAAAT 1
+TTTTTTTTTTGTTTTTATTTTTATCAT 1
+TTTTTAACTCATTTTACAATTAAAC 1
+CCGCGATCTGCTTATTTATAATCTT 1
+TAGGTACTTACCTTTTTTTTACACAA 1
+TCATTACACTTCTTACAAAAC 1
+TATTATACATAGAATAACAAATCTTT 1
+ATGTTATTTACTTTTTCCCCTTATA 1
+TCCGAAAACAAGGCCCGTCGCT 1
+TGCTTTTACTACATATTTTTTATTTTTTTA 1
+GAACAATTTTTCAATTTTTTACATTA 1
+AACATTTTATCAATTATACATTA 1
+TATCTGATCAACAATCTTTTCCCAT 1
+AATTGCAACAGAGACTGGAA 1
+TATTCAATCACTCCATTATATATAACA 1
+CAAACGGAACAAGACATCACCATC 1
+TCTTCTATATAATCCTTTATTATAA 1
+TGATGACGGGCAGCAGGGATTTTC 1
+TTTATTACAACCCTATCTTACCTCAA 1
+AACAGGAAAAACAGAAGGATTTCTA 1
+CGATATTTTCTCCTCGTACC 1
+AAATTGCAAAGATGGAAAATAAAACT 1
+TACACACTCATCAACCAAAGGACG 1
+TATATTGCCTCCCCATAATCCTT 1
+TACAAACGGAACTTTCTTCATAACTTC 1
+GGACGGAGAACTGATAATGGC 1
+TTCTTTGACTACATATTTTTTATT 1
+TCTTTTTTTTAATACTTATTTTCATT 1
+TACTTTTTTCTTAATTTTTTATTAAAC 1
+AATATAAAAATACAATCAACCATTGCA 1
+CAGTTTCACAAAAGATCTTTTAA 1
+GAAACAAACAACACATACCCTCTGGC 1
+AATGACACACTCTTCATCAAC 1
+AAGATGGAGTAGTTTTTT 1
+ACAACCTCAACTCATATTT 1
+TTAAACAATTTGGAATTAATT 1
+TACAAAAAATGCGAAAATTGACCCT 1
+GGACGGAGAACTGATAAGGGCA 1
+TTAAGTTTTAGACATAATCTATTACAA 1
+TTATTATCTATTTTAATTTTTCTTAA 1
+CACCGAACCGGGAAGGCGAACAAC 1
+TCCACCTATTTATCTTTTCTT 1
+TCGCCGTAAAGCCAGTCGTTCTCC 1
+TCAAAGAACAATGTAAAGCCGCGAC 1
+TGGACAAGAACCACGCGACGGGTGT 1
+CCCGAAAAGCCGAGGACGACTTA 1
+TTTTTTATCATTTTTCACCTAAAAAA 1
+TAGAACTCGAACCAGAGCTCC 1
+TCTATATTATTTTTATCAATTTTCACC 1
+TCTATTTCTTTATTTTTTTTATTAT 1
+TTTGATACCTTTATACCATACCTATT 1
+ATAAAGCTAGATTACCAAAGCAT 1
+GCCAACGACCATACCACGA 1
+CGGCACATGTTGAATTACACTCA 1
+TACTATTTTATTATACATACATACATTA 1
+TTAATGACACACGGGAAAAACACCG 1
+TAGTTTCACTACTTTATTCTTTTTA 1
+AACAGGGAGATCAACAGCGTTGACA 1
+CGATATTTTCTCCTCTGACC 1
+ACCAGCACCTTCCGACTCAACGTCAAA 1
+AAGGAATTAAAGCAATAATTCTAA 1
+TACAAAACAAACAAATTACAATCTAAA 1
+CAATTTTTAATTCCTTTTTTCTTCTT 1
+TACAGACAACACATACGGACTTAA 1
+TCTGTATTTGACTTATTACTTTCTCC 1
+TGAGCTAGAACTGCACCCACTCCA 1
+CGCCGCAAGATGAATACTCTAATGA 1
+TATTTCTTTTTTAACTTCTTTTC 1
+TTACAATCTACTATTCTTTTATTA 1
+TTTAAACACTTCCTACATCAAATTTC 1
+TGTGTAATCTTTCTACTTCTTCTAC 1
+TCTATTCATACAAAACACTAATACCC 1
+TGGAGTAGCACAGTCGTCTGAAATC 1
+AAGCACGCCTTACCACAATTTATAA 1
+CTGGAAACTATTGATCAAATT 1
+TACACAGACTTACAAAACACATCCTTC 1
+TTCAAGTAGATTGCATTTTTTAATA 1
+TTATTACATCGTCCACATATAACAAAA 1
+CAAGGCTCAGAAGAACATCACCAAGACC 1
+TGAGGAAAACAGAAAAATGAGAGACA 1
+TCAAAAAGTAATAGGGATCGTTA 1
+TAACTTTAACTTTTTTACT 1
+TATTCCGACAATACCTTCTTTAC 1
+TTTGTTTTTTACTATATTT 1
+TTCATTTTATTTTTAAATATCTTTTTT 1
+TACTCAATAGAACTCTACTCACTCATA 1
+TGAAAGGAAAAACAGGACACGGGA 1
+AAAATCGACTGCCGAAAACATTTTAA 1
+TACAGAGAAATATACAACACTCACC 1
+TCAACTGGCAAGAATTTTTGAAAATT 1
+GAGAACTTTTAATCATTTTAC 1
+TATTATCATCTCGTTCTTCCTTCTC 1
+TTTTCATTTCTTCTTCAAATCCTTT 1
+TAGTCATACATACCTAATTATACATA 1
+ATTTTACTTCATCATTTTC 1
+TCTCTTTTATTTTTATCTTTCCTT 1
+GCCGGGGCGTGAGATGTCTGCATTA 1
+AGGATTTTTAAGCCCATATGTTTCC 1
+CAAGATATGAACAAAGCAAAGACAC 1
+CAACACATGACGCGACAATTCTTG 1
+CAAATAACAAACTGAATAAACGAAA 1
+TGAGAATGACTTCTTCACGATCTCTT 1
+TCTTATTATCATTTTTTTATCCCTT 1
+TCAAATGCAAATTGGATTTATGA 1
+CCTTACTCAACATACTTAATCATACTTA 1
+TAGACTTTCTACTCATTATTAC 1
+TGAAACTGAAACTAACATACAAAATATT 1
+AAAACCCGGACAAACCATCGGAGGA 1
+TACAGACAACACATACGGACTTAAGT 1
+TATTTCAGCAACAGACTAAGACTAA 1
+AACTTTAAATTTTTAATAACCTT 1
+TATTTATAAATTTTTTCTTGAGAC 1
+TTAATATGTAATTTCATACCTCAC 1
+CACAGACTGAGGCAGAAAAAACAA 1
+TAAAGAAGAAGAATTGATTTTAAT 1
+TACTGAAAACGGGCGCATATCAGTGG 1
+TCAGTCTTTTTTTCTCTCCTA 1
+TATAATTTTATTTTATATTTTCTCT 1
+NATTCTTACTCCATTTCAATTTACT 1
+TTGTAAAACATTCTTTCTCCTGAC 1
+TAATTACCATTGCTAACTATCCA 1
+TTCTTCCTTTTATCCTCTCTTAA 1
+TCTAAACACCCACGAAAATCTCTTAC 1
+AAAAACACACAGACACAAGCAGCAAT 1
+CGGACGGTATATTTTTTAATATAA 1
+TATGGAGAAACAGCGATATAAGTCA 1
+TACAACTAACATCCTTTCTTCTTCC 1
+AACTCTCTAATTTAACTTTGTGC 1
+TCCTGAGGACGAGGGGCGTTTAGC 1
+TATTTCCAACCTTCAACCTCAAATAA 1
+TGGACGGAGAACTGATAAGGGC 1
+TTTAAGACTTATGAGCTTG 1
+TTAAAGACGCAACAACTAACATT 1
+TAGGAACTTCATACCGGTCTC 1
+CGATATTTTCTCCTCTTACC 1
+GAGGATTAAAAGAACGGTTTATAA 1
+GAATGATCGCACCACCACCTCAACGTT 1
+TTTTCTTTACCCATCTTTACTTTCCC 1
+AAGACAACAATGACATATAAGACG 1
+TAATAATTTAAATAAATATAAATTT 1
+TACTGAAACAAGGAAACACAAGC 1
+TCAGAAGAACAGAGAATTGATTTT 1
+CATACCTTAAATTATCTCTTTCTT 1
+TTCTTTTACTACATATTTTTTATTTTT 1
+AAAAAATATCTTTTTTAACTCGTGGCC 1
+TAACAAATAGAACGTTCTAATTTAAA 1
+TAGTTACCTTCATATCTCTCTTTA 1
+TAAAATTGTAATATTTAAATAATAT 1
+AAAAGGAAAAACAGAAAAATTGGG 1
+AGATGTTGATCTAAACTCTCCCA 1
+TACCTCTTTATTAACCTCCACCTCTA 1
+TTTCCGACAAATACACCATCTTC 1
+ACAAATCATAAATTTTTTTTTACT 1
+GACGAAACGCAACAACAAAATGGACG 1
+TACAAATTTTTTTTTCTTTCTTAT 1
+TACACCTCTTTTTACTTTTTTATT 1
+TATGGATTATTTCAAAATTTTTTTTT 1
+TTCTAGCACAACACGCACACATATA 1
+TAACTACTTTTACATTAATACTAA 1
+TCTCATCTTACAATTTTTTAAAACTT 1
+TTCTTGGACTACACATTTTTTATTGTTTTA 1
+TACACACTCATCAACCAAAGTACGTA 1
+TACTATATACTTCTTCAAATCACA 1
+TCAGAGTTCTACAGGTCCTACGATT 1
+TGATTTACTTACATTCTTTTTTT 1
+CCATATATGACTGACTCATTTCAC 1
+GAAGAGGAGGAGGAGTTTGTAAG 1
+AAAGACAAAAGAAATACAGGCACT 1
+TACAAGACTAAAACAAACGTGAAGT 1
+TAACGGAGCACGAGAACGAAGTGG 1
+CTTCTTTTACTACATATTTTTTATTTTTTTA 1
+TAATAAGAAACTGTTCAAACAATCCAC 1
+TGAGCGGAGAACCAGAGTTGATGAGC 1
+TATTATTTTTTTATTCCATTCATAT 1
+TTTATTACTTAGTCATAATTCCAA 1
+TTTTATATTTCCTTATATCTTTACTA 1
+AACGGGGAATAAGGGTTCG 1
+AATCTACAATTTCCATTACGACTCC 1
+CCGACCGAGCAAATAAACACAGGAACG 1
+TCCACAACAACTCTATCTAAAGCATT 1
+TTCTTGATAACGCATCTTCTACAT 1
+TGCTTGGACTACATATGGTTGAGGG 1
+CAGATTCACTGATTTTCTTACGCC 1
+TTTGTTTTTCATTTTTTTATCTTT 1
+CTATATTTTCTCTCTTACC 1
+TAACCTTGCAGAACTATACGATTCAAA 1
+TAAGAAACTGAGCTAACGCAATGTACC 1
+TTCTTTTACTACATATTTTTTATTTTTTTA 1
+TATCTATCTTTGATCTTCTTTTCA 1
+TAATAAATTATTAAATAAAAAAAAAA 1
+TTTTTTATCAATTTTCACCATTCAT 1
+TATTTCACTTTATACTTCCTTAA 1
+TAGTTTTAAATATTTCTTTTTTTC 1
+TTCTTTTACTACATATTGTTTATTTTTTTA 1
+GAGAATAAATATTTCAATGGTCTATTG 1
+CGATATTTTCTCCTCTTACCT 1
+CACGACTTTATTCTTTTTATCTCA 1
+TAGTGGACTTTAAAAAAAAAAAAAAAAAA 1
+CATAATATAAACTTATCTT 1
+ATGAAATTCGAACAATACGTC 1
+AACAACTGCAAACATCTACCACA 1
+TAAAAATAATTGTCTTTAATTTCA 1
+CGCAACCAGCAGCAACTCCTAGCAT 1
+ATTATTAATAAATTATTATAA 1
+CATTAATTCATCCATTTAAACTAA 1
+TCTTATTTTAATCTTCCAATTTC 1
+CTAGACAAGATGCTATAAATTTTAAA 1
+TGACCAAAGACAAACAAACAATAAATA 1
+TTTTTATCAATTTTCACCATTC 1
+TAAGTTTTTAATCATTTTTTTT 1
+TAATCAAAAAACTCTTCATTTTTA 1
+TACAAACGGAACTTTCGTCATAA 1
+TTTTCTTTTTTTCATTTTCTCTTTTA 1
+TAGCCTTTACTAGGCTTTTTCTAA 1
+TTAGTATTAATCTTCACTTAA 1
+TAAAATAAACCAAAACCCAAAAAT 1
diff -r 000000000000 -r a8aacccd79a3 test-data/sort.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sort.py Mon Sep 04 07:13:28 2017 -0400
@@ -0,0 +1,8 @@
+import sys
+
+F = open(sys.argv[1], 'r')
+lines = F.readlines()
+lines = [line[:-1] for line in lines]
+for line in sorted(lines):
+ print(line)
+F.close()