# HG changeset patch # User drosofff # Date 1434911329 14400 # Node ID 951cb6b3979b33c11889d0f62b09941cdd9accac planemo upload for repository https://bitbucket.org/drosofff/gedtools/ diff -r 000000000000 -r 951cb6b3979b fasta_tabular_converter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_tabular_converter.py Sun Jun 21 14:28:49 2015 -0400 @@ -0,0 +1,88 @@ +#!/usr/bin/python +# +import sys +from collections import defaultdict + +def readfasta_writetabular(fasta, tabular): + F = open(fasta, "r") + for line in F: + if line[0] == ">": continue + else: + seqdic[line[:-1]] += 1 + F.close() + F = open(tabular, "w") + for seq in sorted(seqdic, key=seqdic.get, reverse=True): + print >> F, "%s\t%s" % (seq, seqdic[seq]) + F.close() + + +def readtabular_writefasta(tabular, fasta): + F = open(tabular, "r") + Fw = open(fasta, "w") + counter = 0 + for line in F: + fields = line.split() + for i in range(int(fields[1])): + counter += 1 + print >> Fw, ">%s\n%s" % (counter, fields[0]) + F.close() + Fw.close() + +def readtabular_writefastaweighted (tabular, fasta): + F = open(tabular, "r") + Fw = open(fasta, "w") + counter = 0 + for line in F: + counter += 1 + fields = line[:-1].split() + print >> Fw, ">%s_%s\n%s" % (counter, fields[1], fields[0]) + F.close() + Fw.close() + +def readfastaeighted_writefastaweighted(fastaweigthed_input, fastaweigthed_reparsed): + F = open(fastaweigthed_input, "r") + number_reads = 0 + for line in F: + if line[0] == ">": + weigth = int(line[1:-1].split("_")[-1]) + number_reads += weigth + else: + seqdic[line[:-1]] += weigth + F.close() + F = open(fastaweigthed_reparsed, "w") + n=0 + for seq in sorted(seqdic, key=seqdic.get, reverse=True): + n += 1 + print >> F, ">%s_%s\n%s" % (n, seqdic[seq], seq) + F.close() + print "%s reads collapsed" % number_reads + +def readfastaeighted_writefasta(fastaweigthed, fasta): + F = open(fastaweigthed, "r") + Fw = open(fasta, "w") + counter = 0 + for line in F: + if line[0] == ">": + weigth = int(line[1:-1].split("_")[-1]) + else: + seq = line[:-1] + for i in range (weigth): + counter += 1 + print >> Fw, ">%s\n%s" % (counter, seq) + F.close() + Fw.close() + + +seqdic = defaultdict(int) +option = sys.argv[3] + +if option == "fasta2tabular": + readfasta_writetabular(sys.argv[1], sys.argv[2]) +elif option == "tabular2fasta": + readtabular_writefasta(sys.argv[1], sys.argv[2]) +elif option == "tabular2fastaweight": + readtabular_writefastaweighted (sys.argv[1], sys.argv[2]) +elif option == "fastaweight2fastaweight": + readfastaeighted_writefastaweighted(sys.argv[1], sys.argv[2]) +elif option == "fastaweight2fasta": + readfastaeighted_writefasta(sys.argv[1], sys.argv[2]) diff -r 000000000000 -r 951cb6b3979b fasta_tabular_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_tabular_converter.xml Sun Jun 21 14:28:49 2015 -0400 @@ -0,0 +1,75 @@ + + conversions + fasta_tabular_converter.py $input $output $switch.conversionType + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Converts fasta files to tabular files with sequence on first column and occurence of the sequence in the second column; and reciprocally. + +This format is suitable for storage of sequence datasets in the data library, and will be used in the future. + +Regeneration of the original fasta file from the tabular format is ensured by the same tool + +This tool also handle a fasta "weigthed" format with headers as: +>id_numberofreads +ATGCATGACCAGATAGGAC +etc... + +with generation of the "weigthed" format from a tabular format, and recompilation of catenated weighted fasta files + + + + diff -r 000000000000 -r 951cb6b3979b test-data/input.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.fa Sun Jun 21 14:28:49 2015 -0400 @@ -0,0 +1,500 @@ +>1 +TAGTTACCTTCATATCTCTCTTTA +>2 +TCTATTCATACAAAACACTAATACCC +>3 +ACAACCTCAACTCATATTT +>4 +TATAATTTTATTTTATATTTTCTCT +>5 +TCTTCTATATAATCCTTTATTATAA +>6 +TAAAATAAACCAAAACCCAAAAAT +>7 +AATCTACAATTTCCATTACGACTCC +>8 +TTTCCGACAAATACACCATCTTC +>9 +CAGATTCACTGATTTTCTTACGCC +>10 +CAAGAATACAAAAAATACTAATTA +>11 +AACTCTCTAATTTAACTTTGTGC +>12 +AAAAACACACAGACACAAGCAGCAAT +>13 +TTACTAGATCCACCCTCATTA +>14 +ATGTTATTTACTTTTTCCCCTTATA +>15 +CGATATTTTCTCCTCTTACC +>16 +TACAGAGAAATATACAACACTCACC +>17 +ATGAAATTCGAACAATACGTC +>18 +GAGAATAAATATTTCAATGGTCTATTG +>19 +TGCTTTTACTACATATTTTTTATTTTTTTA +>20 +CATACCTTAAATTATCTCTTTCTT +>21 +TTTGTTTTTCATTTTTTTATCTTT +>22 +TTATTATCTATTTTAATTTTTCTTAA +>23 +TATTATCATCTCGTTCTTCCTTCTC +>24 +TCAACTGGCAAGAATTTTTGAAAATT +>25 +TACAAATTTTTTTTTCTTTCTTAT +>26 +TTTTCTTTTTTTCATTTTCTCTTTTA +>27 +AAGATGGAGTAGTTTTTT +>28 +TCTCATCTTACAATTTTTTAAAACTT +>29 +CATACCTACAAAAAAGCTTCTCTTAC +>30 +TTTTATATTTCCTTATATCTTTACTA +>31 +GGAGATTGTAGAACGAAAGGAAAAT +>32 +TCTATTTCTTTATTTTTTTTATTAT +>33 +CGGACGGTATATTTTTTAATATAA +>34 +TTCTTGGACTACACATTTTTTATTGTTTTA +>35 +TACTATATACTTCTTCAAATCACA +>36 +ATTTTACTTCATCATTTTC +>37 +TATTTCCAACCTTCAACCTCAAATAA +>38 +CACGACTTTATTCTTTTTATCTCA +>39 +TTCTTTTACTACATATTTTTTATTTTTTTA +>40 +TTTATTACAACCCTATCTTACCTCAA +>41 +CGATATTTTCTCCTCGTACC +>42 +TAATTACCATTGCTAACTATCCA +>43 +CATTAATTCATCCATTTAAACTAA +>44 +GAAACAAACAACACATACCCTCTGGC +>45 +TACTTTTTTCTTAATTTTTTATTAAAC +>46 +TAACTTTAACTTTTTTACT +>47 +TTCTTTTACTACATATTGTTTATTTTTTTA +>48 +TCTAGTCTGAGCGTAGTACCAGATTG +>49 +TTTTTTATCAATTTTCACCATTCAT +>50 +AATGACACACTCTTCATCAAC +>51 +TAACATAAATTTTAATCATAAATTG +>52 +TCTATATTATTTTTATCAATTTTCACC +>53 +TCCCAACCCTCGAGCATCATTTTC +>54 +TAGTCATACATACCTAATTATACATA +>55 +TACAAAAAATGCGAAAATTGACCCT +>56 +GAGAACTTTTAATCATTTTAC +>57 +TCTTATTTTAATCTTCCAATTTC +>58 +CGGCACATGTTGAATTACACTCA +>59 +CAGTTTCACAAAAGATCTTTTAA +>60 +GCCAACGACCATACCACGA +>61 +CAAATAACAAACTGAATAAACGAAA +>62 +TAGTTTCACTACTTTATTCTTTTTA +>63 +TGAGGAAAACAGAAAAATGAGAGACA +>64 +TATATAAATCTTCAACATCAA +>65 +TGATTTACTTACATTCTTTTTTT +>66 +CTTCTTTTACTACATATTTTTTATTTTTTTA +>67 +TACTGAAAACGGGCGCATATCAGTGG +>68 +TATTCAATCACTCCATTATATATAACA +>69 +TATATTGCCTCCCCATAATCCTT +>70 +TCGCCGTAAAGCCAGTCGTTCTCC +>71 +TTTAAACACTTCCTACATCAAATTTC +>72 +TTTGTTTTTTACTATATTT +>73 +TCTTTTTTTTAATACTTATTTTCATT +>74 +TAAGTTTTTAATCATTTTTTTT +>75 +TGAGAATGACTTCTTCACGATCTCTT +>76 +AATTTATTTAATTTATATTCTAACTAA +>77 +AAAAAATATCTTTTTTAACTCGTGGCC +>78 +AAGACAACAATGACATATAAGACG +>79 +TGAGCTAGAACTGCACCCACTCCA +>80 +ACTAAACTTTTCTTACCATATTTCTA +>81 +TATTTCACTTTATACTTCCTTAA +>82 +TATCTATCTTTGATCTTCTTTTCA +>83 +TTTTTATCAATTTTCACCATTC +>84 +TAAAAATAATTGTCTTTAATTTCA +>85 +TTAAAGACGCAACAACTAACATT +>86 +TAACCTTGCAGAACTATACGATTCAAA +>87 +TACTATTTTATTATACATACATACATTA +>88 +TAGTGGACTTTAAAAAAAAAAAAAAAAAA +>89 +AATGTCACTTGAAGAATTCACGT +>90 +TAATAAGAAACTGTTCAAACAATCCAC +>91 +AAGCACGCCTTACCACAATTTATAA +>92 +CCATATATGACTGACTCATTTCAC +>93 +TTAATATGTAATTTCATACCTCAC +>94 +AGGATTTTTAAGCCCATATGTTTCC +>95 +ACAGCAGGACGGTGATCA +>96 +TGATGACGGGCAGCAGGGATTTTC +>97 +TTGTAAAACATTCTTTCTCCTGAC +>98 +TTCTTCCTTTTATCCTCTCTTAA +>99 +ATTATTAATAAATTATTATAA +>100 +CTGGAAACTATTGATCAAATT +>101 +TACAACTAACATCCTTTCTTCTTCC +>102 +TCAAATGCAAATTGGATTTATGA +>103 +TCCTGAGGACGAGGGGCGTTTAGC +>104 +TACACAGACTTACAAAACACATCCTTC +>105 +GAAAGGAAGGGAAGAAAGCGAAAGGA +>106 +TTAAACAATTTGGAATTAATT +>107 +TAGCCTTTACTAGGCTTTTTCTAA +>108 +CGATATTTTCTCCTCTTACCT +>109 +TGGAATGTAAAGAAGTATGGAG +>110 +TTTTTAACTCCCATCATTTTTCCTC +>111 +TTTTTTATCATTTTTCACCTAAAAAA +>112 +GAACAATTTTTCAATTTTTTACATTA +>113 +TATGGATTATTTCAAAATTTTTTTTT +>114 +TAACGGAGCACGAGAACGAAGTGG +>115 +CAATTTTTAATTCCTTTTTTCTTCTT +>116 +AGATGTTGATCTAAACTCTCCCA +>117 +TGAGCGGAGAACCAGAGTTGATGAGC +>118 +TAACAAATAGAACGTTCTAATTTAAA +>119 +CTAGACAAGATGCTATAAATTTTAAA +>120 +TTTTCTTTACCCATCTTTACTTTCCC +>121 +TACACACTCATCAACCAAAGGACG +>122 +TCTTATTATCATTTTTTTATCCCTT +>123 +TCAGAGTTCTACAGGTCCTACGATT +>124 +TTTATTACTTAGTCATAATTCCAA +>125 +GCCGGGGCGTGAGATGTCTGCATTA +>126 +GACGAAACGCAACAACAAAATGGACG +>127 +TAGACTTTCTACTCATTATTAC +>128 +AAATTGCAAAGATGGAAAATAAAACT +>129 +CCTTACTCAACATACTTAATCATACTTA +>130 +TGCTTGGACTACATATGGTTGAGTG +>131 +GAATGATCGCACCACCACCTCAACGTT +>132 +TCCACCTATTTATCTTTTCTT +>133 +TGGACAAGAACCACGCGACGGGTGT +>134 +CAAGATATGAACAAAGCAAAGACAC +>135 +CAAACGGAACAAGACATCACCATC +>136 +NATTCTTACTCCATTTCAATTTACT +>137 +TAGAACTCGAACCAGAGCTCC +>138 +CGGAAAAGAATGTAGACCATTTAA +>139 +TACAAACGGAACTTTCGTCATAA +>140 +GGTATCTTTATATTTTAATTTTCTT +>141 +TATTCCGACAATACCTTCTTTAC +>142 +AACTTTAAATTTTTAATAACCTT +>143 +CATAATATAAACTTATCTT +>144 +TATTTATAAATTTTTTCTTGAGAC +>145 +TTTTTTTTTTGTTTTTATTTTTATCAT +>146 +TATTATACATAGAATAACAAATCTTT +>147 +TGGAGTAGCACAGTCGTCTGAAATC +>148 +TATTTCTTTTTTAACTTCTTTTC +>149 +TTATAATCACGGCACCCTATACA +>150 +TTCTTTTACTACATATTTTTTATTTTT +>151 +TAGCGAGATGGACCAACGTGCTGT +>152 +CCAGAAAACAATACAACATCCTCA +>153 +TCCGAAAACAAGGCCCGTCGCT +>154 +TACTCAATAGAACTCTACTCACTCATA +>155 +AACGGGGAATAAGGGTTCG +>156 +TCAGTCTTTTTTTCTCTCCTA +>157 +AATATAAAAATACAATCAACCATTGCA +>158 +GGACGGAGAACTGATAAGGGCA +>159 +TAAAGAAGAAGAATTGATTTTAAT +>160 +TCATTACACTTCTTACAAAAC +>161 +CCGCGATCTGCTTATTTATAATCTT +>162 +TCTAAACACCCACGAAAATCTCTTAC +>163 +AACAGGAAAAACAGAAGGATTTCTA +>164 +TCTCTTTTATTTTTATCTTTCCTT +>165 +AACATTTTATCAATTATACATTA +>166 +GCAGATAGAAATCAATACAAAAATC +>167 +TTAATGACACACGGGAAAAACACCG +>168 +TACAGACAACACATACGGACTTAAGT +>169 +TCCACAACAACTCTATCTAAAGCATT +>170 +ATAAAGCTAGATTACCAAAGCAT +>171 +TACCTCTTTATTAACCTCCACCTCTA +>172 +TACACCTCTTTTTACTTTTTTATT +>173 +CACCGAACCGGGAAGGCGAACAAC +>174 +TAGGTACTTACCTTTTTTTTACACAA +>175 +AGGAATATGATGAAATAAAAAAAT +>176 +TATTATTTTTTTATTCCATTCATAT +>177 +TAAAATAAATAAGTCCGACGACAA +>178 +TCTGTATTTGACTTATTACTTTCTCC +>179 +AAGGAATTAAAGCAATAATTCTAA +>180 +TTCATTTTATTTTTAAATATCTTTTTT +>181 +TTAGTATTAATCTTCACTTAA +>182 +TATGGAGAAACAGCGATATAAGTCA +>183 +CCCGAAAAGCCGAGGACGACTTA +>184 +CACAGACTGAGGCAGAAAAAACAA +>185 +TCAAGCCTTTTGAAGAACTGACCTAAA +>186 +TAAGAAACTGAGCTAACGCAATGTACC +>187 +CGATATTTTCTCCTCTGACC +>188 +TAACTACTTTTACATTAATACTAA +>189 +ACCAGCACCTTCCGACTCAACGTCAAA +>190 +TCAAAGAACAATGTAAAGCCGCGAC +>191 +TTGATTCTTCTTTTTCACAAAA +>192 +TACAAAACAAACAAATTACAATCTAAA +>193 +GAAACCATTATCTTATCTTTATACA +>194 +CTATATTTTCTCTCTTACC +>195 +TCAGAAGAACAGAGAATTGATTTT +>196 +TAATAAATTATTAAATAAAAAAAAAA +>197 +CCGACCGAGCAAATAAACACAGGAACG +>198 +AAGTGAAGAAGTAGTTTTT +>199 +TGGACGGAGAACTGATAAGGGC +>200 +TGCTTGGACTACATATGGTTGAGGG +>201 +TACAAGACTAAAACAAACGTGAAGT +>202 +TGAAACTGAAACTAACATACAAAATATT +>203 +TATCTGATCAACAATCTTTTCCCAT +>204 +TTTAAGACTTATGAGCTTG +>205 +CAAGGCTCAGAAGAACATCACCAAGACC +>206 +TTCAAGTAGATTGCATTTTTTAATA +>207 +CGCAACCAGCAGCAACTCCTAGCAT +>208 +TACAAACGGAACTTTCTTCATAACTTC +>209 +ACAAATCATAAATTTTTTTTTACT +>210 +TCCGAAAAATCGTAGGACCCGGGCA +>211 +CGCCGCAAGATGAATACTCTAATGA +>212 +TGACCAAAGACAAACAAACAATAAATA +>213 +AAAAGGAAAAACAGAAAAATTGGG +>214 +TTCTTGATAACGCATCTTCTACAT +>215 +TACTGAAACAAGGAAACACAAGC +>216 +TCAAAAAGTAATAGGGATCGTTA +>217 +TGGAATGTAAAGAAGTATGGAG +>218 +TAAAATTGTAATATTTAAATAATAT +>219 +GAGGATTAAAAGAACGGTTTATAA +>220 +TTCTTTGACTACATATTTTTTATT +>221 +TACAAACGTAATTTTCGCATAACATC +>222 +AGAACAATTAAATAAAATAGCATA +>223 +TAATAATTTAAATAAATATAAATTT +>224 +AATTGCAACAGAGACTGGAA +>225 +TTAAGTTTTAGACATAATCTATTACAA +>226 +TGAAAGGAAAAACAGGACACGGGA +>227 +AACAGGGAGATCAACAGCGTTGACA +>228 +GGACGGAGAACTGATAATGGC +>229 +TGTGTAATCTTTCTACTTCTTCTAC +>230 +TTTACCAGAGGAGTCGAGTTTTT +>231 +AAAATCGACTGCCGAAAACATTTTAA +>232 +TGCTTGGACTACATATGGTTGAGTG +>233 +TACAGACAACACATACGGACTTAA +>234 +GAAGAGGAGGAGGAGTTTGTAAG +>235 +TTACAATCTACTATTCTTTTATTA +>236 +TTATTACATCGTCCACATATAACAAAA +>237 +TTTTTAACTCATTTTACAATTAAAC +>238 +AAAACCCGGACAAACCATCGGAGGA +>239 +CAACACATGACGCGACAATTCTTG +>240 +TACACACTCATCAACCAAAGTACGTA +>241 +TTTTCATTTCTTCTTCAAATCCTTT +>242 +AACAACTGCAAACATCTACCACA +>243 +TAGTTTTAAATATTTCTTTTTTTC +>244 +AAAGACAAAAGAAATACAGGCACT +>245 +TTTGATACCTTTATACCATACCTATT +>246 +ACAGCAGGACGGTGATCA +>247 +TTCTAGCACAACACGCACACATATA +>248 +TAATCAAAAAACTCTTCATTTTTA +>249 +TAGGAACTTCATACCGGTCTC +>250 +TATTTCAGCAACAGACTAAGACTAA diff -r 000000000000 -r 951cb6b3979b test-data/output.faw --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.faw Sun Jun 21 14:28:49 2015 -0400 @@ -0,0 +1,494 @@ +>1_2 +ACAGCAGGACGGTGATCA +>2_2 +TGCTTGGACTACATATGGTTGAGTG +>3_2 +TGGAATGTAAAGAAGTATGGAG +>4_1 +AATTTATTTAATTTATATTCTAACTAA +>5_1 +TTATAATCACGGCACCCTATACA +>6_1 +TCCGAAAAATCGTAGGACCCGGGCA +>7_1 +TCCCAACCCTCGAGCATCATTTTC +>8_1 +TTGATTCTTCTTTTTCACAAAA +>9_1 +AGAACAATTAAATAAAATAGCATA +>10_1 +CCAGAAAACAATACAACATCCTCA +>11_1 +TCTAGTCTGAGCGTAGTACCAGATTG +>12_1 +TTTTTAACTCCCATCATTTTTCCTC +>13_1 +GCAGATAGAAATCAATACAAAAATC +>14_1 +AAGTGAAGAAGTAGTTTTT +>15_1 +AATGTCACTTGAAGAATTCACGT +>16_1 +TTTACCAGAGGAGTCGAGTTTTT +>17_1 +GGTATCTTTATATTTTAATTTTCTT +>18_1 +TTACTAGATCCACCCTCATTA +>19_1 +TATATAAATCTTCAACATCAA +>20_1 +GAAACCATTATCTTATCTTTATACA +>21_1 +GGAGATTGTAGAACGAAAGGAAAAT +>22_1 +ACTAAACTTTTCTTACCATATTTCTA +>23_1 +TCAAGCCTTTTGAAGAACTGACCTAAA +>24_1 +TAACATAAATTTTAATCATAAATTG +>25_1 +TAGCGAGATGGACCAACGTGCTGT +>26_1 +TACAAACGTAATTTTCGCATAACATC +>27_1 +CGGAAAAGAATGTAGACCATTTAA +>28_1 +GAAAGGAAGGGAAGAAAGCGAAAGGA +>29_1 +CAAGAATACAAAAAATACTAATTA +>30_1 +CATACCTACAAAAAAGCTTCTCTTAC +>31_1 +TAAAATAAATAAGTCCGACGACAA +>32_1 +AGGAATATGATGAAATAAAAAAAT +>33_1 +TTTTTTTTTTGTTTTTATTTTTATCAT +>34_1 +TTTTTAACTCATTTTACAATTAAAC +>35_1 +CCGCGATCTGCTTATTTATAATCTT +>36_1 +TAGGTACTTACCTTTTTTTTACACAA +>37_1 +TCATTACACTTCTTACAAAAC +>38_1 +TATTATACATAGAATAACAAATCTTT +>39_1 +ATGTTATTTACTTTTTCCCCTTATA +>40_1 +TCCGAAAACAAGGCCCGTCGCT +>41_1 +TGCTTTTACTACATATTTTTTATTTTTTTA +>42_1 +GAACAATTTTTCAATTTTTTACATTA +>43_1 +AACATTTTATCAATTATACATTA +>44_1 +TATCTGATCAACAATCTTTTCCCAT +>45_1 +AATTGCAACAGAGACTGGAA +>46_1 +TATTCAATCACTCCATTATATATAACA +>47_1 +CAAACGGAACAAGACATCACCATC +>48_1 +TCTTCTATATAATCCTTTATTATAA +>49_1 +TGATGACGGGCAGCAGGGATTTTC +>50_1 +TTTATTACAACCCTATCTTACCTCAA +>51_1 +AACAGGAAAAACAGAAGGATTTCTA +>52_1 +CGATATTTTCTCCTCGTACC +>53_1 +AAATTGCAAAGATGGAAAATAAAACT +>54_1 +TACACACTCATCAACCAAAGGACG +>55_1 +TATATTGCCTCCCCATAATCCTT +>56_1 +TACAAACGGAACTTTCTTCATAACTTC +>57_1 +GGACGGAGAACTGATAATGGC +>58_1 +TTCTTTGACTACATATTTTTTATT +>59_1 +TCTTTTTTTTAATACTTATTTTCATT +>60_1 +TACTTTTTTCTTAATTTTTTATTAAAC +>61_1 +AATATAAAAATACAATCAACCATTGCA +>62_1 +CAGTTTCACAAAAGATCTTTTAA +>63_1 +GAAACAAACAACACATACCCTCTGGC +>64_1 +AATGACACACTCTTCATCAAC +>65_1 +AAGATGGAGTAGTTTTTT +>66_1 +ACAACCTCAACTCATATTT +>67_1 +TTAAACAATTTGGAATTAATT +>68_1 +TACAAAAAATGCGAAAATTGACCCT +>69_1 +GGACGGAGAACTGATAAGGGCA +>70_1 +TTAAGTTTTAGACATAATCTATTACAA +>71_1 +TTATTATCTATTTTAATTTTTCTTAA +>72_1 +CACCGAACCGGGAAGGCGAACAAC +>73_1 +TCCACCTATTTATCTTTTCTT +>74_1 +TCGCCGTAAAGCCAGTCGTTCTCC +>75_1 +TCAAAGAACAATGTAAAGCCGCGAC +>76_1 +TGGACAAGAACCACGCGACGGGTGT +>77_1 +CCCGAAAAGCCGAGGACGACTTA +>78_1 +TTTTTTATCATTTTTCACCTAAAAAA +>79_1 +TAGAACTCGAACCAGAGCTCC +>80_1 +TCTATATTATTTTTATCAATTTTCACC +>81_1 +TCTATTTCTTTATTTTTTTTATTAT +>82_1 +TTTGATACCTTTATACCATACCTATT +>83_1 +ATAAAGCTAGATTACCAAAGCAT +>84_1 +GCCAACGACCATACCACGA +>85_1 +CGGCACATGTTGAATTACACTCA +>86_1 +TACTATTTTATTATACATACATACATTA +>87_1 +TTAATGACACACGGGAAAAACACCG +>88_1 +TAGTTTCACTACTTTATTCTTTTTA +>89_1 +AACAGGGAGATCAACAGCGTTGACA +>90_1 +CGATATTTTCTCCTCTGACC +>91_1 +ACCAGCACCTTCCGACTCAACGTCAAA +>92_1 +AAGGAATTAAAGCAATAATTCTAA +>93_1 +TACAAAACAAACAAATTACAATCTAAA +>94_1 +CAATTTTTAATTCCTTTTTTCTTCTT +>95_1 +TACAGACAACACATACGGACTTAA +>96_1 +TCTGTATTTGACTTATTACTTTCTCC +>97_1 +TGAGCTAGAACTGCACCCACTCCA +>98_1 +CGCCGCAAGATGAATACTCTAATGA +>99_1 +TATTTCTTTTTTAACTTCTTTTC +>100_1 +TTACAATCTACTATTCTTTTATTA +>101_1 +TTTAAACACTTCCTACATCAAATTTC +>102_1 +TGTGTAATCTTTCTACTTCTTCTAC +>103_1 +TCTATTCATACAAAACACTAATACCC +>104_1 +TGGAGTAGCACAGTCGTCTGAAATC +>105_1 +AAGCACGCCTTACCACAATTTATAA +>106_1 +CTGGAAACTATTGATCAAATT +>107_1 +TACACAGACTTACAAAACACATCCTTC +>108_1 +TTCAAGTAGATTGCATTTTTTAATA +>109_1 +TTATTACATCGTCCACATATAACAAAA +>110_1 +CAAGGCTCAGAAGAACATCACCAAGACC +>111_1 +TGAGGAAAACAGAAAAATGAGAGACA +>112_1 +TCAAAAAGTAATAGGGATCGTTA +>113_1 +TAACTTTAACTTTTTTACT +>114_1 +TATTCCGACAATACCTTCTTTAC +>115_1 +TTTGTTTTTTACTATATTT +>116_1 +TTCATTTTATTTTTAAATATCTTTTTT +>117_1 +TACTCAATAGAACTCTACTCACTCATA +>118_1 +TGAAAGGAAAAACAGGACACGGGA +>119_1 +AAAATCGACTGCCGAAAACATTTTAA +>120_1 +TACAGAGAAATATACAACACTCACC +>121_1 +TCAACTGGCAAGAATTTTTGAAAATT +>122_1 +GAGAACTTTTAATCATTTTAC +>123_1 +TATTATCATCTCGTTCTTCCTTCTC +>124_1 +TTTTCATTTCTTCTTCAAATCCTTT +>125_1 +TAGTCATACATACCTAATTATACATA +>126_1 +ATTTTACTTCATCATTTTC +>127_1 +TCTCTTTTATTTTTATCTTTCCTT +>128_1 +GCCGGGGCGTGAGATGTCTGCATTA +>129_1 +AGGATTTTTAAGCCCATATGTTTCC +>130_1 +CAAGATATGAACAAAGCAAAGACAC +>131_1 +CAACACATGACGCGACAATTCTTG +>132_1 +CAAATAACAAACTGAATAAACGAAA +>133_1 +TGAGAATGACTTCTTCACGATCTCTT +>134_1 +TCTTATTATCATTTTTTTATCCCTT +>135_1 +TCAAATGCAAATTGGATTTATGA +>136_1 +CCTTACTCAACATACTTAATCATACTTA +>137_1 +TAGACTTTCTACTCATTATTAC +>138_1 +TGAAACTGAAACTAACATACAAAATATT +>139_1 +AAAACCCGGACAAACCATCGGAGGA +>140_1 +TACAGACAACACATACGGACTTAAGT +>141_1 +TATTTCAGCAACAGACTAAGACTAA +>142_1 +AACTTTAAATTTTTAATAACCTT +>143_1 +TATTTATAAATTTTTTCTTGAGAC +>144_1 +TTAATATGTAATTTCATACCTCAC +>145_1 +CACAGACTGAGGCAGAAAAAACAA +>146_1 +TAAAGAAGAAGAATTGATTTTAAT +>147_1 +TACTGAAAACGGGCGCATATCAGTGG +>148_1 +TCAGTCTTTTTTTCTCTCCTA +>149_1 +TATAATTTTATTTTATATTTTCTCT +>150_1 +NATTCTTACTCCATTTCAATTTACT +>151_1 +TTGTAAAACATTCTTTCTCCTGAC +>152_1 +TAATTACCATTGCTAACTATCCA +>153_1 +TTCTTCCTTTTATCCTCTCTTAA +>154_1 +TCTAAACACCCACGAAAATCTCTTAC +>155_1 +AAAAACACACAGACACAAGCAGCAAT +>156_1 +CGGACGGTATATTTTTTAATATAA +>157_1 +TATGGAGAAACAGCGATATAAGTCA +>158_1 +TACAACTAACATCCTTTCTTCTTCC +>159_1 +AACTCTCTAATTTAACTTTGTGC +>160_1 +TCCTGAGGACGAGGGGCGTTTAGC +>161_1 +TATTTCCAACCTTCAACCTCAAATAA +>162_1 +TGGACGGAGAACTGATAAGGGC +>163_1 +TTTAAGACTTATGAGCTTG +>164_1 +TTAAAGACGCAACAACTAACATT +>165_1 +TAGGAACTTCATACCGGTCTC +>166_1 +CGATATTTTCTCCTCTTACC +>167_1 +GAGGATTAAAAGAACGGTTTATAA +>168_1 +GAATGATCGCACCACCACCTCAACGTT +>169_1 +TTTTCTTTACCCATCTTTACTTTCCC +>170_1 +AAGACAACAATGACATATAAGACG +>171_1 +TAATAATTTAAATAAATATAAATTT +>172_1 +TACTGAAACAAGGAAACACAAGC +>173_1 +TCAGAAGAACAGAGAATTGATTTT +>174_1 +CATACCTTAAATTATCTCTTTCTT +>175_1 +TTCTTTTACTACATATTTTTTATTTTT +>176_1 +AAAAAATATCTTTTTTAACTCGTGGCC +>177_1 +TAACAAATAGAACGTTCTAATTTAAA +>178_1 +TAGTTACCTTCATATCTCTCTTTA +>179_1 +TAAAATTGTAATATTTAAATAATAT +>180_1 +AAAAGGAAAAACAGAAAAATTGGG +>181_1 +AGATGTTGATCTAAACTCTCCCA +>182_1 +TACCTCTTTATTAACCTCCACCTCTA +>183_1 +TTTCCGACAAATACACCATCTTC +>184_1 +ACAAATCATAAATTTTTTTTTACT +>185_1 +GACGAAACGCAACAACAAAATGGACG +>186_1 +TACAAATTTTTTTTTCTTTCTTAT +>187_1 +TACACCTCTTTTTACTTTTTTATT +>188_1 +TATGGATTATTTCAAAATTTTTTTTT +>189_1 +TTCTAGCACAACACGCACACATATA +>190_1 +TAACTACTTTTACATTAATACTAA +>191_1 +TCTCATCTTACAATTTTTTAAAACTT +>192_1 +TTCTTGGACTACACATTTTTTATTGTTTTA +>193_1 +TACACACTCATCAACCAAAGTACGTA +>194_1 +TACTATATACTTCTTCAAATCACA +>195_1 +TCAGAGTTCTACAGGTCCTACGATT +>196_1 +TGATTTACTTACATTCTTTTTTT +>197_1 +CCATATATGACTGACTCATTTCAC +>198_1 +GAAGAGGAGGAGGAGTTTGTAAG +>199_1 +AAAGACAAAAGAAATACAGGCACT +>200_1 +TACAAGACTAAAACAAACGTGAAGT +>201_1 +TAACGGAGCACGAGAACGAAGTGG +>202_1 +CTTCTTTTACTACATATTTTTTATTTTTTTA +>203_1 +TAATAAGAAACTGTTCAAACAATCCAC +>204_1 +TGAGCGGAGAACCAGAGTTGATGAGC +>205_1 +TATTATTTTTTTATTCCATTCATAT +>206_1 +TTTATTACTTAGTCATAATTCCAA +>207_1 +TTTTATATTTCCTTATATCTTTACTA +>208_1 +AACGGGGAATAAGGGTTCG +>209_1 +AATCTACAATTTCCATTACGACTCC +>210_1 +CCGACCGAGCAAATAAACACAGGAACG +>211_1 +TCCACAACAACTCTATCTAAAGCATT +>212_1 +TTCTTGATAACGCATCTTCTACAT +>213_1 +TGCTTGGACTACATATGGTTGAGGG +>214_1 +CAGATTCACTGATTTTCTTACGCC +>215_1 +TTTGTTTTTCATTTTTTTATCTTT +>216_1 +CTATATTTTCTCTCTTACC +>217_1 +TAACCTTGCAGAACTATACGATTCAAA +>218_1 +TAAGAAACTGAGCTAACGCAATGTACC +>219_1 +TTCTTTTACTACATATTTTTTATTTTTTTA +>220_1 +TATCTATCTTTGATCTTCTTTTCA +>221_1 +TAATAAATTATTAAATAAAAAAAAAA +>222_1 +TTTTTTATCAATTTTCACCATTCAT +>223_1 +TATTTCACTTTATACTTCCTTAA +>224_1 +TAGTTTTAAATATTTCTTTTTTTC +>225_1 +TTCTTTTACTACATATTGTTTATTTTTTTA +>226_1 +GAGAATAAATATTTCAATGGTCTATTG +>227_1 +CGATATTTTCTCCTCTTACCT +>228_1 +CACGACTTTATTCTTTTTATCTCA +>229_1 +TAGTGGACTTTAAAAAAAAAAAAAAAAAA +>230_1 +CATAATATAAACTTATCTT +>231_1 +ATGAAATTCGAACAATACGTC +>232_1 +AACAACTGCAAACATCTACCACA +>233_1 +TAAAAATAATTGTCTTTAATTTCA +>234_1 +CGCAACCAGCAGCAACTCCTAGCAT +>235_1 +ATTATTAATAAATTATTATAA +>236_1 +CATTAATTCATCCATTTAAACTAA +>237_1 +TCTTATTTTAATCTTCCAATTTC +>238_1 +CTAGACAAGATGCTATAAATTTTAAA +>239_1 +TGACCAAAGACAAACAAACAATAAATA +>240_1 +TTTTTATCAATTTTCACCATTC +>241_1 +TAAGTTTTTAATCATTTTTTTT +>242_1 +TAATCAAAAAACTCTTCATTTTTA +>243_1 +TACAAACGGAACTTTCGTCATAA +>244_1 +TTTTCTTTTTTTCATTTTCTCTTTTA +>245_1 +TAGCCTTTACTAGGCTTTTTCTAA +>246_1 +TTAGTATTAATCTTCACTTAA +>247_1 +TAAAATAAACCAAAACCCAAAAAT diff -r 000000000000 -r 951cb6b3979b test-data/output.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.tab Sun Jun 21 14:28:49 2015 -0400 @@ -0,0 +1,247 @@ +ACAGCAGGACGGTGATCA 2 +TGCTTGGACTACATATGGTTGAGTG 2 +TGGAATGTAAAGAAGTATGGAG 2 +AATTTATTTAATTTATATTCTAACTAA 1 +TTATAATCACGGCACCCTATACA 1 +TCCGAAAAATCGTAGGACCCGGGCA 1 +TCCCAACCCTCGAGCATCATTTTC 1 +TTGATTCTTCTTTTTCACAAAA 1 +AGAACAATTAAATAAAATAGCATA 1 +CCAGAAAACAATACAACATCCTCA 1 +TCTAGTCTGAGCGTAGTACCAGATTG 1 +TTTTTAACTCCCATCATTTTTCCTC 1 +GCAGATAGAAATCAATACAAAAATC 1 +AAGTGAAGAAGTAGTTTTT 1 +AATGTCACTTGAAGAATTCACGT 1 +TTTACCAGAGGAGTCGAGTTTTT 1 +GGTATCTTTATATTTTAATTTTCTT 1 +TTACTAGATCCACCCTCATTA 1 +TATATAAATCTTCAACATCAA 1 +GAAACCATTATCTTATCTTTATACA 1 +GGAGATTGTAGAACGAAAGGAAAAT 1 +ACTAAACTTTTCTTACCATATTTCTA 1 +TCAAGCCTTTTGAAGAACTGACCTAAA 1 +TAACATAAATTTTAATCATAAATTG 1 +TAGCGAGATGGACCAACGTGCTGT 1 +TACAAACGTAATTTTCGCATAACATC 1 +CGGAAAAGAATGTAGACCATTTAA 1 +GAAAGGAAGGGAAGAAAGCGAAAGGA 1 +CAAGAATACAAAAAATACTAATTA 1 +CATACCTACAAAAAAGCTTCTCTTAC 1 +TAAAATAAATAAGTCCGACGACAA 1 +AGGAATATGATGAAATAAAAAAAT 1 +TTTTTTTTTTGTTTTTATTTTTATCAT 1 +TTTTTAACTCATTTTACAATTAAAC 1 +CCGCGATCTGCTTATTTATAATCTT 1 +TAGGTACTTACCTTTTTTTTACACAA 1 +TCATTACACTTCTTACAAAAC 1 +TATTATACATAGAATAACAAATCTTT 1 +ATGTTATTTACTTTTTCCCCTTATA 1 +TCCGAAAACAAGGCCCGTCGCT 1 +TGCTTTTACTACATATTTTTTATTTTTTTA 1 +GAACAATTTTTCAATTTTTTACATTA 1 +AACATTTTATCAATTATACATTA 1 +TATCTGATCAACAATCTTTTCCCAT 1 +AATTGCAACAGAGACTGGAA 1 +TATTCAATCACTCCATTATATATAACA 1 +CAAACGGAACAAGACATCACCATC 1 +TCTTCTATATAATCCTTTATTATAA 1 +TGATGACGGGCAGCAGGGATTTTC 1 +TTTATTACAACCCTATCTTACCTCAA 1 +AACAGGAAAAACAGAAGGATTTCTA 1 +CGATATTTTCTCCTCGTACC 1 +AAATTGCAAAGATGGAAAATAAAACT 1 +TACACACTCATCAACCAAAGGACG 1 +TATATTGCCTCCCCATAATCCTT 1 +TACAAACGGAACTTTCTTCATAACTTC 1 +GGACGGAGAACTGATAATGGC 1 +TTCTTTGACTACATATTTTTTATT 1 +TCTTTTTTTTAATACTTATTTTCATT 1 +TACTTTTTTCTTAATTTTTTATTAAAC 1 +AATATAAAAATACAATCAACCATTGCA 1 +CAGTTTCACAAAAGATCTTTTAA 1 +GAAACAAACAACACATACCCTCTGGC 1 +AATGACACACTCTTCATCAAC 1 +AAGATGGAGTAGTTTTTT 1 +ACAACCTCAACTCATATTT 1 +TTAAACAATTTGGAATTAATT 1 +TACAAAAAATGCGAAAATTGACCCT 1 +GGACGGAGAACTGATAAGGGCA 1 +TTAAGTTTTAGACATAATCTATTACAA 1 +TTATTATCTATTTTAATTTTTCTTAA 1 +CACCGAACCGGGAAGGCGAACAAC 1 +TCCACCTATTTATCTTTTCTT 1 +TCGCCGTAAAGCCAGTCGTTCTCC 1 +TCAAAGAACAATGTAAAGCCGCGAC 1 +TGGACAAGAACCACGCGACGGGTGT 1 +CCCGAAAAGCCGAGGACGACTTA 1 +TTTTTTATCATTTTTCACCTAAAAAA 1 +TAGAACTCGAACCAGAGCTCC 1 +TCTATATTATTTTTATCAATTTTCACC 1 +TCTATTTCTTTATTTTTTTTATTAT 1 +TTTGATACCTTTATACCATACCTATT 1 +ATAAAGCTAGATTACCAAAGCAT 1 +GCCAACGACCATACCACGA 1 +CGGCACATGTTGAATTACACTCA 1 +TACTATTTTATTATACATACATACATTA 1 +TTAATGACACACGGGAAAAACACCG 1 +TAGTTTCACTACTTTATTCTTTTTA 1 +AACAGGGAGATCAACAGCGTTGACA 1 +CGATATTTTCTCCTCTGACC 1 +ACCAGCACCTTCCGACTCAACGTCAAA 1 +AAGGAATTAAAGCAATAATTCTAA 1 +TACAAAACAAACAAATTACAATCTAAA 1 +CAATTTTTAATTCCTTTTTTCTTCTT 1 +TACAGACAACACATACGGACTTAA 1 +TCTGTATTTGACTTATTACTTTCTCC 1 +TGAGCTAGAACTGCACCCACTCCA 1 +CGCCGCAAGATGAATACTCTAATGA 1 +TATTTCTTTTTTAACTTCTTTTC 1 +TTACAATCTACTATTCTTTTATTA 1 +TTTAAACACTTCCTACATCAAATTTC 1 +TGTGTAATCTTTCTACTTCTTCTAC 1 +TCTATTCATACAAAACACTAATACCC 1 +TGGAGTAGCACAGTCGTCTGAAATC 1 +AAGCACGCCTTACCACAATTTATAA 1 +CTGGAAACTATTGATCAAATT 1 +TACACAGACTTACAAAACACATCCTTC 1 +TTCAAGTAGATTGCATTTTTTAATA 1 +TTATTACATCGTCCACATATAACAAAA 1 +CAAGGCTCAGAAGAACATCACCAAGACC 1 +TGAGGAAAACAGAAAAATGAGAGACA 1 +TCAAAAAGTAATAGGGATCGTTA 1 +TAACTTTAACTTTTTTACT 1 +TATTCCGACAATACCTTCTTTAC 1 +TTTGTTTTTTACTATATTT 1 +TTCATTTTATTTTTAAATATCTTTTTT 1 +TACTCAATAGAACTCTACTCACTCATA 1 +TGAAAGGAAAAACAGGACACGGGA 1 +AAAATCGACTGCCGAAAACATTTTAA 1 +TACAGAGAAATATACAACACTCACC 1 +TCAACTGGCAAGAATTTTTGAAAATT 1 +GAGAACTTTTAATCATTTTAC 1 +TATTATCATCTCGTTCTTCCTTCTC 1 +TTTTCATTTCTTCTTCAAATCCTTT 1 +TAGTCATACATACCTAATTATACATA 1 +ATTTTACTTCATCATTTTC 1 +TCTCTTTTATTTTTATCTTTCCTT 1 +GCCGGGGCGTGAGATGTCTGCATTA 1 +AGGATTTTTAAGCCCATATGTTTCC 1 +CAAGATATGAACAAAGCAAAGACAC 1 +CAACACATGACGCGACAATTCTTG 1 +CAAATAACAAACTGAATAAACGAAA 1 +TGAGAATGACTTCTTCACGATCTCTT 1 +TCTTATTATCATTTTTTTATCCCTT 1 +TCAAATGCAAATTGGATTTATGA 1 +CCTTACTCAACATACTTAATCATACTTA 1 +TAGACTTTCTACTCATTATTAC 1 +TGAAACTGAAACTAACATACAAAATATT 1 +AAAACCCGGACAAACCATCGGAGGA 1 +TACAGACAACACATACGGACTTAAGT 1 +TATTTCAGCAACAGACTAAGACTAA 1 +AACTTTAAATTTTTAATAACCTT 1 +TATTTATAAATTTTTTCTTGAGAC 1 +TTAATATGTAATTTCATACCTCAC 1 +CACAGACTGAGGCAGAAAAAACAA 1 +TAAAGAAGAAGAATTGATTTTAAT 1 +TACTGAAAACGGGCGCATATCAGTGG 1 +TCAGTCTTTTTTTCTCTCCTA 1 +TATAATTTTATTTTATATTTTCTCT 1 +NATTCTTACTCCATTTCAATTTACT 1 +TTGTAAAACATTCTTTCTCCTGAC 1 +TAATTACCATTGCTAACTATCCA 1 +TTCTTCCTTTTATCCTCTCTTAA 1 +TCTAAACACCCACGAAAATCTCTTAC 1 +AAAAACACACAGACACAAGCAGCAAT 1 +CGGACGGTATATTTTTTAATATAA 1 +TATGGAGAAACAGCGATATAAGTCA 1 +TACAACTAACATCCTTTCTTCTTCC 1 +AACTCTCTAATTTAACTTTGTGC 1 +TCCTGAGGACGAGGGGCGTTTAGC 1 +TATTTCCAACCTTCAACCTCAAATAA 1 +TGGACGGAGAACTGATAAGGGC 1 +TTTAAGACTTATGAGCTTG 1 +TTAAAGACGCAACAACTAACATT 1 +TAGGAACTTCATACCGGTCTC 1 +CGATATTTTCTCCTCTTACC 1 +GAGGATTAAAAGAACGGTTTATAA 1 +GAATGATCGCACCACCACCTCAACGTT 1 +TTTTCTTTACCCATCTTTACTTTCCC 1 +AAGACAACAATGACATATAAGACG 1 +TAATAATTTAAATAAATATAAATTT 1 +TACTGAAACAAGGAAACACAAGC 1 +TCAGAAGAACAGAGAATTGATTTT 1 +CATACCTTAAATTATCTCTTTCTT 1 +TTCTTTTACTACATATTTTTTATTTTT 1 +AAAAAATATCTTTTTTAACTCGTGGCC 1 +TAACAAATAGAACGTTCTAATTTAAA 1 +TAGTTACCTTCATATCTCTCTTTA 1 +TAAAATTGTAATATTTAAATAATAT 1 +AAAAGGAAAAACAGAAAAATTGGG 1 +AGATGTTGATCTAAACTCTCCCA 1 +TACCTCTTTATTAACCTCCACCTCTA 1 +TTTCCGACAAATACACCATCTTC 1 +ACAAATCATAAATTTTTTTTTACT 1 +GACGAAACGCAACAACAAAATGGACG 1 +TACAAATTTTTTTTTCTTTCTTAT 1 +TACACCTCTTTTTACTTTTTTATT 1 +TATGGATTATTTCAAAATTTTTTTTT 1 +TTCTAGCACAACACGCACACATATA 1 +TAACTACTTTTACATTAATACTAA 1 +TCTCATCTTACAATTTTTTAAAACTT 1 +TTCTTGGACTACACATTTTTTATTGTTTTA 1 +TACACACTCATCAACCAAAGTACGTA 1 +TACTATATACTTCTTCAAATCACA 1 +TCAGAGTTCTACAGGTCCTACGATT 1 +TGATTTACTTACATTCTTTTTTT 1 +CCATATATGACTGACTCATTTCAC 1 +GAAGAGGAGGAGGAGTTTGTAAG 1 +AAAGACAAAAGAAATACAGGCACT 1 +TACAAGACTAAAACAAACGTGAAGT 1 +TAACGGAGCACGAGAACGAAGTGG 1 +CTTCTTTTACTACATATTTTTTATTTTTTTA 1 +TAATAAGAAACTGTTCAAACAATCCAC 1 +TGAGCGGAGAACCAGAGTTGATGAGC 1 +TATTATTTTTTTATTCCATTCATAT 1 +TTTATTACTTAGTCATAATTCCAA 1 +TTTTATATTTCCTTATATCTTTACTA 1 +AACGGGGAATAAGGGTTCG 1 +AATCTACAATTTCCATTACGACTCC 1 +CCGACCGAGCAAATAAACACAGGAACG 1 +TCCACAACAACTCTATCTAAAGCATT 1 +TTCTTGATAACGCATCTTCTACAT 1 +TGCTTGGACTACATATGGTTGAGGG 1 +CAGATTCACTGATTTTCTTACGCC 1 +TTTGTTTTTCATTTTTTTATCTTT 1 +CTATATTTTCTCTCTTACC 1 +TAACCTTGCAGAACTATACGATTCAAA 1 +TAAGAAACTGAGCTAACGCAATGTACC 1 +TTCTTTTACTACATATTTTTTATTTTTTTA 1 +TATCTATCTTTGATCTTCTTTTCA 1 +TAATAAATTATTAAATAAAAAAAAAA 1 +TTTTTTATCAATTTTCACCATTCAT 1 +TATTTCACTTTATACTTCCTTAA 1 +TAGTTTTAAATATTTCTTTTTTTC 1 +TTCTTTTACTACATATTGTTTATTTTTTTA 1 +GAGAATAAATATTTCAATGGTCTATTG 1 +CGATATTTTCTCCTCTTACCT 1 +CACGACTTTATTCTTTTTATCTCA 1 +TAGTGGACTTTAAAAAAAAAAAAAAAAAA 1 +CATAATATAAACTTATCTT 1 +ATGAAATTCGAACAATACGTC 1 +AACAACTGCAAACATCTACCACA 1 +TAAAAATAATTGTCTTTAATTTCA 1 +CGCAACCAGCAGCAACTCCTAGCAT 1 +ATTATTAATAAATTATTATAA 1 +CATTAATTCATCCATTTAAACTAA 1 +TCTTATTTTAATCTTCCAATTTC 1 +CTAGACAAGATGCTATAAATTTTAAA 1 +TGACCAAAGACAAACAAACAATAAATA 1 +TTTTTATCAATTTTCACCATTC 1 +TAAGTTTTTAATCATTTTTTTT 1 +TAATCAAAAAACTCTTCATTTTTA 1 +TACAAACGGAACTTTCGTCATAA 1 +TTTTCTTTTTTTCATTTTCTCTTTTA 1 +TAGCCTTTACTAGGCTTTTTCTAA 1 +TTAGTATTAATCTTCACTTAA 1 +TAAAATAAACCAAAACCCAAAAAT 1