Mercurial > repos > mheinzl > variant_analyzer2
changeset 6:11a2a34f8a2b draft
planemo upload for repository https://github.com/gpovysil/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
line wrap: on
line diff
--- a/mut2read.py Tue Oct 27 12:46:55 2020 +0000 +++ b/mut2read.py Mon Jan 18 09:49:15 2021 +0000 @@ -14,8 +14,7 @@ 0.2.1 2019-10-27 Gundula Povysil - ======= ========== ================= ================================ -USAGE: python mut2read.py DCS_Mutations.tabular DCS.bam Aligned_Families.tabular Interesting_Reads.fastq - tag_count_dict.json +USAGE: python mut2read.py DCS_Mutations.tabular DCS.bam Aligned_Families.tabular Interesting_Reads.fastq tag_count_dict.json """ import argparse @@ -25,12 +24,13 @@ import numpy as np import pysam +from cyvcf2 import VCF def make_argparser(): - parser = argparse.ArgumentParser(description='Takes a tabular file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file and creates a fastq file of reads of tags with mutation.') + parser = argparse.ArgumentParser(description='Takes a vcf file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file and creates a fastq file of reads of tags with mutation.') parser.add_argument('--mutFile', - help='TABULAR file with DCS mutations.') + help='VCF file with DCS mutations.') parser.add_argument('--bamFile', help='BAM file with aligned DCS reads.') parser.add_argument('--familiesFile', @@ -61,71 +61,67 @@ if os.path.isfile(file3) is False: sys.exit("Error: Could not find '{}'".format(file3)) - # read mut file - with open(file1, 'r') as mut: - mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str) - # read dcs bam file - # pysam.index(file2) +# pysam.index(file2) bam = pysam.AlignmentFile(file2, "rb") # get tags tag_dict = {} cvrg_dict = {} - if mut_array.shape == (1,13): - mut_array = mut_array.reshape((1, len(mut_array))) - - for m in range(len(mut_array[:, 0])): - print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) - chrom = mut_array[m, 1] - stop_pos = mut_array[m, 2].astype(int) + for variant in VCF(file1): + chrom = variant.CHROM + stop_pos = variant.start chrom_stop_pos = str(chrom) + "#" + str(stop_pos) - ref = mut_array[m, 9] - alt = mut_array[m, 10] + ref = variant.REF + alt = variant.ALT[0] +# nc = variant.format('NC') + ad = variant.format('AD') dcs_len = [] - - for pileupcolumn in bam.pileup(chrom.tostring(), stop_pos - 2, stop_pos, max_depth=100000000): + if len(ref) == len(alt): + for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000): - if pileupcolumn.reference_pos == stop_pos - 1: - count_alt = 0 - count_ref = 0 - count_indel = 0 - count_n = 0 - count_other = 0 - count_lowq = 0 - print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), - "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) - for pileupread in pileupcolumn.pileups: - if not pileupread.is_del and not pileupread.is_refskip: - # query position is None if is_del or is_refskip is set. - nuc = pileupread.alignment.query_sequence[pileupread.query_position] - dcs_len.append(len(pileupread.alignment.query_sequence)) - if nuc == alt: - count_alt += 1 - tag = pileupread.alignment.query_name - if tag in tag_dict: - tag_dict[tag][chrom_stop_pos] = alt + if pileupcolumn.reference_pos == stop_pos: + count_alt = 0 + count_ref = 0 + count_indel = 0 + count_n = 0 + count_other = 0 + count_lowq = 0 + print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), + "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) + for pileupread in pileupcolumn.pileups: + if not pileupread.is_del and not pileupread.is_refskip: + # query position is None if is_del or is_refskip is set. + nuc = pileupread.alignment.query_sequence[pileupread.query_position] + dcs_len.append(len(pileupread.alignment.query_sequence)) + if nuc == alt: + count_alt += 1 + tag = pileupread.alignment.query_name + if tag in tag_dict: + tag_dict[tag][chrom_stop_pos] = alt + else: + tag_dict[tag] = {} + tag_dict[tag][chrom_stop_pos] = alt + elif nuc == ref: + count_ref += 1 + elif nuc == "N": + count_n += 1 + elif nuc == "lowQ": + count_lowq += 1 else: - tag_dict[tag] = {} - tag_dict[tag][chrom_stop_pos] = alt - elif nuc == ref: - count_ref += 1 - elif nuc == "N": - count_n += 1 - elif nuc == "lowQ": - count_lowq += 1 + count_other += 1 else: - count_other += 1 - else: - count_indel += 1 - dcs_median = np.median(np.array(dcs_len)) - cvrg_dict[chrom_stop_pos] = (count_ref, count_alt, dcs_median) + count_indel += 1 + dcs_median = np.median(np.array(dcs_len)) + cvrg_dict[chrom_stop_pos] = (count_ref, count_alt, dcs_median) - print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s, median length of DCS = %s\n" % - (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, - count_indel, count_lowq, dcs_median)) + print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s, median length of DCS = %s\n" % + (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, + count_indel, count_lowq, dcs_median)) + else: + print("indels are currently not evaluated") bam.close() with open(json_file, "w") as f: @@ -153,3 +149,4 @@ if __name__ == '__main__': sys.exit(mut2read(sys.argv)) +
--- a/mut2read.xml Tue Oct 27 12:46:55 2020 +0000 +++ b/mut2read.xml Mon Jan 18 09:49:15 2021 +0000 @@ -1,10 +1,15 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="mut2read" name="DCS mutations to tags/reads:" version="1.0.1" profile="19.01"> +<tool id="mut2read" name="DCS mutations to tags/reads:" version="2.0.0" profile="19.01"> <description>Extracts all tags that carry a mutation in the duplex consensus sequence (DCS)</description> <macros> <import>va_macros.xml</import> </macros> - <expand macro="requirements"/> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.4.0">matplotlib</requirement> + <requirement type="package" version="0.15">pysam</requirement> + <requirement type="package" version="0.11.6">cyvcf2</requirement> + </requirements> <command><![CDATA[ ln -s '$file2' bam_input.bam && ln -s '${file2.metadata.bam_index}' bam_input.bam.bai && @@ -17,7 +22,7 @@ ]]> </command> <inputs> - <param name="file1" type="data" format="tabular" label="DCS Mutation File" optional="false" help="TABULAR file with DCS mutations. See Help section below for a detailed explanation."/> + <param name="file1" type="data" format="vcf" label="DCS Mutation File" optional="false" help="VCF file with DCS mutations. See Help section below for a detailed explanation."/> <param name="file2" type="data" format="bam" label="DCS BAM File" optional="false" help="BAM file with aligned DCS reads."/> <param name="file3" type="data" format="tabular" label="Aligned Families File" optional="false" help="TABULAR file with aligned families."/> </inputs> @@ -27,25 +32,25 @@ </outputs> <tests> <test> - <param name="file1" value="DCS_Mutations_test_data_VA.tabular"/> - <param name="file2" value="DCS_test_data_VA.bam"/> - <param name="file3" value="Aligned_Families_test_data_VA.tabular"/> - <output name="output_fastq" file="Interesting_Reads_test_data_VA.fastq" lines_diff="136"/> - <output name="output_json" file="tag_count_dict_test_data_VA.json" lines_diff="2"/> + <param name="file1" value="FreeBayes_test.vcf" lines_diff="2"/> + <param name="file2" value="DCS_test.bam"/> + <param name="file3" value="Aligned_Families_test.tabular"/> + <output name="output_fastq" file="Interesting_Reads_test.fastq" lines_diff="136"/> + <output name="output_json" file="tag_count_dict_test.json" lines_diff="2"/> </test> </tests> <help> <![CDATA[ **What it does** -Takes a tabular file with mutations, a BAM file of aligned DCS reads, and a +Takes a VCF file with mutations, a BAM file of aligned DCS reads, and a tabular file with aligned families as input and prints all tags of reads that carry a mutation to a user specified output file and creates a fastq file of reads of tags with a mutation. **Input** -**Dataset 1:** Tabular file with duplex consesus sequence (DCS) mutations as -generated by the **Variant Annotator** tool. +**Dataset 1:** VCF file with duplex consesus sequence (DCS) mutations. E.g. +generated by the `FreeBayes variant caller <https://arxiv.org/abs/1207.3907>`_. **Dataset 2:** BAM file of aligned DCS reads. This file can be obtained by the tool `Map with BWA-MEM <https://arxiv.org/abs/1303.3997>`_.
--- a/mut2sscs.py Tue Oct 27 12:46:55 2020 +0000 +++ b/mut2sscs.py Mon Jan 18 09:49:15 2021 +0000 @@ -27,12 +27,13 @@ import numpy as np import pysam +from cyvcf2 import VCF def make_argparser(): - parser = argparse.ArgumentParser(description='Takes a tabular file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.') + parser = argparse.ArgumentParser(description='Takes a vcf file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.') parser.add_argument('--mutFile', - help='TABULAR file with DCS mutations.') + help='VCR file with DCS mutations.') parser.add_argument('--bamFile', help='BAM file with aligned SSCS reads.') parser.add_argument('--outputJson', @@ -54,74 +55,77 @@ if os.path.isfile(file2) is False: sys.exit("Error: Could not find '{}'".format(file2)) - # 1. read mut file - with open(file1, 'r') as mut: - mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str) - - # 2 read SSCS bam file - # pysam.index(file2) + # read SSCS bam file +# pysam.index(file2) bam = pysam.AlignmentFile(file2, "rb") # get tags mut_pos_dict = {} ref_pos_dict = {} - if mut_array.shape == (1,13): - mut_array = mut_array.reshape((1, len(mut_array))) - for m in range(0, len(mut_array[:, 0])): - print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) - chrom = mut_array[m, 1] - stop_pos = mut_array[m, 2].astype(int) + for variant in VCF(file1): + chrom = variant.CHROM + stop_pos = variant.start chrom_stop_pos = str(chrom) + "#" + str(stop_pos) - ref = mut_array[m, 9] - alt = mut_array[m, 10] + ref = variant.REF + alt = variant.ALT[0] +# nc = variant.format('NC') + ad = variant.format('AD') + + if len(ref) == len(alt): - for pileupcolumn in bam.pileup(chrom.tostring(), stop_pos - 2, stop_pos, max_depth=1000000000): - if pileupcolumn.reference_pos == stop_pos - 1: - count_alt = 0 - count_ref = 0 - count_indel = 0 - print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), - "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) - for pileupread in pileupcolumn.pileups: - if not pileupread.is_del and not pileupread.is_refskip: - tag = pileupread.alignment.query_name - abba = tag[-2:] - # query position is None if is_del or is_refskip is set. - if pileupread.alignment.query_sequence[pileupread.query_position] == alt: - count_alt += 1 - if chrom_stop_pos in mut_pos_dict: - if abba in mut_pos_dict[chrom_stop_pos]: - mut_pos_dict[chrom_stop_pos][abba] += 1 + for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=1000000000): + if pileupcolumn.reference_pos == stop_pos: + count_alt = 0 + count_ref = 0 + count_indel = 0 + print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), + "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) + for pileupread in pileupcolumn.pileups: + if not pileupread.is_del and not pileupread.is_refskip: + tag = pileupread.alignment.query_name + abba = tag[-2:] + # query position is None if is_del or is_refskip is set. + if pileupread.alignment.query_sequence[pileupread.query_position] == alt: + count_alt += 1 + if chrom_stop_pos in mut_pos_dict: + if abba in mut_pos_dict[chrom_stop_pos]: + mut_pos_dict[chrom_stop_pos][abba] += 1 + else: + mut_pos_dict[chrom_stop_pos][abba] = 1 else: + mut_pos_dict[chrom_stop_pos] = {} mut_pos_dict[chrom_stop_pos][abba] = 1 - else: - mut_pos_dict[chrom_stop_pos] = {} - mut_pos_dict[chrom_stop_pos][abba] = 1 - elif pileupread.alignment.query_sequence[pileupread.query_position] == ref: - count_ref += 1 - if chrom_stop_pos in ref_pos_dict: - if abba in ref_pos_dict[chrom_stop_pos]: - ref_pos_dict[chrom_stop_pos][abba] += 1 + if chrom_stop_pos not in ref_pos_dict: + ref_pos_dict[chrom_stop_pos] = {} + ref_pos_dict[chrom_stop_pos][abba] = 0 + + elif pileupread.alignment.query_sequence[pileupread.query_position] == ref: + count_ref += 1 + if chrom_stop_pos in ref_pos_dict: + if abba in ref_pos_dict[chrom_stop_pos]: + ref_pos_dict[chrom_stop_pos][abba] += 1 + else: + ref_pos_dict[chrom_stop_pos][abba] = 1 else: + ref_pos_dict[chrom_stop_pos] = {} ref_pos_dict[chrom_stop_pos][abba] = 1 else: - ref_pos_dict[chrom_stop_pos] = {} - ref_pos_dict[chrom_stop_pos][abba] = 1 - else: - count_indel += 1 + count_indel += 1 - print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" % - (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel)) + print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" % + (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel)) - # if mutation is in DCS file but not in SSCS, then set counts to NA - if chrom_stop_pos not in mut_pos_dict.keys(): - mut_pos_dict[chrom_stop_pos] = {} - mut_pos_dict[chrom_stop_pos]["ab"] = 0 - mut_pos_dict[chrom_stop_pos]["ba"] = 0 - ref_pos_dict[chrom_stop_pos] = {} - ref_pos_dict[chrom_stop_pos]["ab"] = 0 - ref_pos_dict[chrom_stop_pos]["ba"] = 0 + # if mutation is in DCS file but not in SSCS, then set counts to NA + if chrom_stop_pos not in mut_pos_dict.keys(): + mut_pos_dict[chrom_stop_pos] = {} + mut_pos_dict[chrom_stop_pos]["ab"] = 0 + mut_pos_dict[chrom_stop_pos]["ba"] = 0 + ref_pos_dict[chrom_stop_pos] = {} + ref_pos_dict[chrom_stop_pos]["ab"] = 0 + ref_pos_dict[chrom_stop_pos]["ba"] = 0 + else: + print("indels are currently not evaluated") bam.close() # save counts @@ -131,3 +135,4 @@ if __name__ == '__main__': sys.exit(mut2sscs(sys.argv)) +
--- a/mut2sscs.xml Tue Oct 27 12:46:55 2020 +0000 +++ b/mut2sscs.xml Mon Jan 18 09:49:15 2021 +0000 @@ -1,10 +1,15 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="mut2sscs" name="DCS mutations to SSCS stats:" version="1.0.1" profile="19.01"> +<tool id="mut2sscs" name="DCS mutations to SSCS stats:" version="2.0.0" profile="19.01"> <description>Extracts all tags from the single stranded consensus sequence (SSCS) bam file that carry a mutation at the same position a mutation is called in the duplex consensus sequence (DCS) and calculates their frequencies</description> <macros> <import>va_macros.xml</import> </macros> - <expand macro="requirements"/> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.4.0">matplotlib</requirement> + <requirement type="package" version="0.15">pysam</requirement> + <requirement type="package" version="0.11.6">cyvcf2</requirement> + </requirements> <command><![CDATA[ ln -s '$file2' bam_input.bam && ln -s '${file2.metadata.bam_index}' bam_input.bam.bai && @@ -15,7 +20,7 @@ ]]> </command> <inputs> - <param name="file1" type="data" format="tabular" label="DCS Mutation File" optional="false" help="TABULAR file with DCS mutations. See Help section below for a detailed explanation."/> + <param name="file1" type="data" format="vcf" label="DCS Mutation File" optional="false" help="VCF file with DCS mutations. See Help section below for a detailed explanation."/> <param name="file2" type="data" format="bam" label="SSCS BAM File" optional="false" help="BAM file with aligned SSCS reads."/> </inputs> <outputs> @@ -23,31 +28,27 @@ </outputs> <tests> <test> - <param name="file1" value="DCS_Mutations_test_data_VA.tabular"/> - <param name="file2" value="SSCS_test_data_VA.bam"/> - <output name="output_json" file="SSCS_counts_test_data_VA.json" lines_diff="2"/> + <param name="file1" value="FreeBayes_test.vcf"/> + <param name="file2" value="SSCS_test.bam"/> + <output name="output_json" file="SSCS_counts_test.json" lines_diff="2"/> </test> </tests> <help> <![CDATA[ **What it does** -Takes a tabular file with DCS mutations and a BAM file of aligned SSCS reads +Takes a VCF file with DCS mutations and a BAM file of aligned SSCS reads as input and writes statistics about tags of reads that carry a mutation in the SSCS at the same position a mutation is called in the DCS to a user specified output file.. **Input** -**Dataset 1:** Tabular file with duplex consesus sequence (DCS) mutations as -generated by the **Variant Annotator** tool. +**Dataset 1:** VCF file with duplex consesus sequence (DCS) mutations. E.g. +generated by the `FreeBayes variant caller <https://arxiv.org/abs/1207.3907>`_. **Dataset 2:** BAM file of aligned single stranded consensus sequence (SSCS) reads. This file can be obtained by the tool `Map with BWA-MEM <https://arxiv.org/abs/1303.3997>`_. -**Dataset 3:** Tabular file with reads as produced by the -**Du Novo: Align families** tool of the `Du Novo Analysis Pipeline -<https://doi.org/10.1186/s13059-016-1039-4>`_ - **Output** The output is a json file containing dictonaries with stats of tags that carry a mutation in the SSCS
--- a/read2mut.py Tue Oct 27 12:46:55 2020 +0000 +++ b/read2mut.py Mon Jan 18 09:49:15 2021 +0000 @@ -33,11 +33,13 @@ import numpy as np import pysam import xlsxwriter +from cyvcf2 import VCF + def make_argparser(): - parser = argparse.ArgumentParser(description='Takes a tabular file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.') + parser = argparse.ArgumentParser(description='Takes a VCF file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.') parser.add_argument('--mutFile', - help='TABULAR file with DCS mutations.') + help='VCF file with DCS mutations.') parser.add_argument('--bamFile', help='BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).') parser.add_argument('--inputJson', @@ -45,7 +47,11 @@ parser.add_argument('--sscsJson', help='JSON file with SSCS counts collected by mut2sscs.py.') parser.add_argument('--outputFile', - help='Output xlsx file of mutation details.') + help='Output xlsx file with summary of mutations.') + parser.add_argument('--outputFile2', + help='Output xlsx file with allele frequencies of mutations.') + parser.add_argument('--outputFile3', + help='Output xlsx file with examples of the tier classification.') parser.add_argument('--thresh', type=int, default=0, help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') parser.add_argument('--phred', type=int, default=20, @@ -53,7 +59,11 @@ parser.add_argument('--trim', type=int, default=10, help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') parser.add_argument('--chimera_correction', action="store_true", - help='Count chimeric variants and correct the variant frequencies.') + help='Count chimeric variants and correct the variant frequencies') + parser.add_argument('--softclipping_dist', type=int, default=15, + help='Count mutation as an artifact if mutation lies within this parameter away from the softclipping part of the read.') + parser.add_argument('--reads_threshold', type=float, default=1.0, + help='Float number which specifies the minimum percentage of softclipped reads in a family to be considered in the softclipping tiers. Default: 1.0, means all reads of a family have to be softclipped.') return parser @@ -71,10 +81,14 @@ json_file = args.inputJson sscs_json = args.sscsJson outfile = args.outputFile + outfile2 = args.outputFile2 + outfile3 = args.outputFile3 thresh = args.thresh phred_score = args.phred trim = args.trim chimera_correction = args.chimera_correction + thr = args.softclipping_dist + threshold_reads = args.reads_threshold if os.path.isfile(file1) is False: sys.exit("Error: Could not find '{}'".format(file1)) @@ -88,10 +102,9 @@ sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) if trim < 0: sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) + if thr <= 0: + sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thr)) - with open(file1, 'r') as mut: - mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str) - # load dicts with open(json_file, "r") as f: (tag_dict, cvrg_dict) = json.load(f) @@ -103,77 +116,88 @@ # pysam.index(file2) bam = pysam.AlignmentFile(file2, "rb") - # 4. create mut_dict + # create mut_dict mut_dict = {} mut_read_pos_dict = {} mut_read_dict = {} reads_dict = {} - if mut_array.shape == (1, 13): - mut_array = mut_array.reshape((1, len(mut_array))) + mut_read_cigar_dict = {} + i = 0 + mut_array = [] - for m in range(0, len(mut_array[:, 0])): - print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) - chrom = mut_array[m, 1] - stop_pos = mut_array[m, 2].astype(int) + for count, variant in enumerate(VCF(file1)): + #if count == 2000: + # break + chrom = variant.CHROM + stop_pos = variant.start chrom_stop_pos = str(chrom) + "#" + str(stop_pos) - ref = mut_array[m, 9] - alt = mut_array[m, 10] - mut_dict[chrom_stop_pos] = {} - mut_read_pos_dict[chrom_stop_pos] = {} - reads_dict[chrom_stop_pos] = {} + ref = variant.REF + alt = variant.ALT[0] +# nc = variant.format('NC') + ad = variant.format('AD') + if len(ref) == len(alt): + mut_array.append([chrom, stop_pos, ref, alt]) + i += 1 + mut_dict[chrom_stop_pos] = {} + mut_read_pos_dict[chrom_stop_pos] = {} + reads_dict[chrom_stop_pos] = {} + mut_read_cigar_dict[chrom_stop_pos] = {} - for pileupcolumn in bam.pileup(chrom.tostring(), stop_pos - 2, stop_pos, max_depth=100000000): - if pileupcolumn.reference_pos == stop_pos - 1: - count_alt = 0 - count_ref = 0 - count_indel = 0 - count_n = 0 - count_other = 0 - count_lowq = 0 - n = 0 - print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), - "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) - for pileupread in pileupcolumn.pileups: - n += 1 - if not pileupread.is_del and not pileupread.is_refskip: - tag = pileupread.alignment.query_name - nuc = pileupread.alignment.query_sequence[pileupread.query_position] - phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 - if phred < phred_score: - nuc = "lowQ" - if tag not in mut_dict[chrom_stop_pos]: - mut_dict[chrom_stop_pos][tag] = {} - if nuc in mut_dict[chrom_stop_pos][tag]: - mut_dict[chrom_stop_pos][tag][nuc] += 1 + for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000): + if pileupcolumn.reference_pos == stop_pos: + count_alt = 0 + count_ref = 0 + count_indel = 0 + count_n = 0 + count_other = 0 + count_lowq = 0 + n = 0 + #print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), + # "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) + for pileupread in pileupcolumn.pileups: + n += 1 + if not pileupread.is_del and not pileupread.is_refskip: + tag = pileupread.alignment.query_name + nuc = pileupread.alignment.query_sequence[pileupread.query_position] + phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 + if phred < phred_score: + nuc = "lowQ" + if tag not in mut_dict[chrom_stop_pos]: + mut_dict[chrom_stop_pos][tag] = {} + if nuc in mut_dict[chrom_stop_pos][tag]: + mut_dict[chrom_stop_pos][tag][nuc] += 1 + else: + mut_dict[chrom_stop_pos][tag][nuc] = 1 + if tag not in mut_read_pos_dict[chrom_stop_pos]: + mut_read_pos_dict[chrom_stop_pos][tag] = [pileupread.query_position + 1] + reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)] + mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring] + else: + mut_read_pos_dict[chrom_stop_pos][tag].append(pileupread.query_position + 1) + reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence)) + mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring) + if nuc == alt: + count_alt += 1 + if tag not in mut_read_dict: + mut_read_dict[tag] = {} + mut_read_dict[tag][chrom_stop_pos] = (alt, ref) + else: + mut_read_dict[tag][chrom_stop_pos] = (alt, ref) + elif nuc == ref: + count_ref += 1 + elif nuc == "N": + count_n += 1 + elif nuc == "lowQ": + count_lowq += 1 + else: + count_other += 1 else: - mut_dict[chrom_stop_pos][tag][nuc] = 1 - if tag not in mut_read_pos_dict[chrom_stop_pos]: - mut_read_pos_dict[chrom_stop_pos][tag] = np.array(pileupread.query_position) + 1 - reads_dict[chrom_stop_pos][tag] = len(pileupread.alignment.query_sequence) - else: - mut_read_pos_dict[chrom_stop_pos][tag] = np.append( - mut_read_pos_dict[chrom_stop_pos][tag], pileupread.query_position + 1) - reads_dict[chrom_stop_pos][tag] = np.append( - reads_dict[chrom_stop_pos][tag], len(pileupread.alignment.query_sequence)) - if nuc == alt: - count_alt += 1 - if tag not in mut_read_dict: - mut_read_dict[tag] = {} - mut_read_dict[tag][chrom_stop_pos] = (alt, ref) - else: - mut_read_dict[tag][chrom_stop_pos] = (alt, ref) - elif nuc == ref: - count_ref += 1 - elif nuc == "N": - count_n += 1 - elif nuc == "lowQ": - count_lowq += 1 - else: - count_other += 1 - else: - count_indel += 1 - print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) - + count_indel += 1 + + #print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) + #else: + # print("indels are currently not evaluated") + mut_array = np.array(mut_array) for read in bam.fetch(until_eof=True): if read.is_unmapped: pure_tag = read.query_name[:-5] @@ -189,13 +213,17 @@ mut_dict[key][read.query_name][nuc] = 1 bam.close() - # 5. create pure_tags_dict + # create pure_tags_dict pure_tags_dict = {} for key1, value1 in sorted(mut_dict.items()): + if len(np.where(np.array(['#'.join(str(i) for i in z) + for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0: + continue + i = np.where(np.array(['#'.join(str(i) for i in z) - for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] - ref = mut_array[i, 9] - alt = mut_array[i, 10] + for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] + ref = mut_array[i, 2] + alt = mut_array[i, 3] pure_tags_dict[key1] = {} for key2, value2 in sorted(value1.items()): for key3, value3 in value2.items(): @@ -206,7 +234,7 @@ else: pure_tags_dict[key1][pure_tag] = 1 - # 6. create pure_tags_dict_short with thresh + # create pure_tags_dict_short with thresh if thresh > 0: pure_tags_dict_short = {} for key, value in sorted(pure_tags_dict.items()): @@ -215,16 +243,36 @@ else: pure_tags_dict_short = pure_tags_dict - # 7. output summary with threshold + # whole_array = [] + # for k in pure_tags_dict.values(): + # if len(k) != 0: + # keys = k.keys() + # if len(keys) > 1: + # for k1 in keys: + # whole_array.append(k1) + # else: + # whole_array.append(keys[0]) + + # output summary with threshold workbook = xlsxwriter.Workbook(outfile) + workbook2 = xlsxwriter.Workbook(outfile2) + workbook3 = xlsxwriter.Workbook(outfile3) ws1 = workbook.add_worksheet("Results") - ws2 = workbook.add_worksheet("Allele frequencies") - ws3 = workbook.add_worksheet("Tiers") + ws2 = workbook2.add_worksheet("Allele frequencies") + ws3 = workbook3.add_worksheet("Tiers") format1 = workbook.add_format({'bg_color': '#BCF5A9'}) # green format2 = workbook.add_format({'bg_color': '#FFC7CE'}) # red format3 = workbook.add_format({'bg_color': '#FACC2E'}) # yellow + format12 = workbook2.add_format({'bg_color': '#BCF5A9'}) # green + format22 = workbook2.add_format({'bg_color': '#FFC7CE'}) # red + format32 = workbook2.add_format({'bg_color': '#FACC2E'}) # yellow + + format13 = workbook3.add_format({'bg_color': '#BCF5A9'}) # green + format23 = workbook3.add_format({'bg_color': '#FFC7CE'}) # red + format33 = workbook3.add_format({'bg_color': '#FACC2E'}) # yellow + header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab', 'read median length.ba', 'DCS median length', 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', @@ -244,9 +292,14 @@ counter_tier32 = 0 counter_tier41 = 0 counter_tier42 = 0 - #if chimera_correction: + # if chimera_correction: # counter_tier43 = 0 - counter_tier5 = 0 + counter_tier51 = 0 + counter_tier52 = 0 + counter_tier53 = 0 + counter_tier54 = 0 + counter_tier55 = 0 + counter_tier6 = 0 row = 1 tier_dict = {} @@ -257,14 +310,16 @@ chimeric_tag = {} if key1 in pure_tags_dict_short.keys(): i = np.where(np.array(['#'.join(str(i) for i in z) - for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] - ref = mut_array[i, 9] - alt = mut_array[i, 10] + for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] + ref = mut_array[i, 2] + alt = mut_array[i, 3] dcs_median = cvrg_dict[key1][2] whole_array = pure_tags_dict_short[key1].keys() tier_dict[key1] = {} - values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5", 0)] + values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), + ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0), + ("tier 6", 0)] for k, v in values_tier_dict: tier_dict[key1][k] = v @@ -455,19 +510,38 @@ read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 read_len_median1 = read_len_median2 = read_len_median3 = read_len_median4 = 0 - + cigars_dcs1 = cigars_dcs2 = cigars_dcs3 = cigars_dcs4 = [] + pos_read1 = pos_read2 = pos_read3 = pos_read4 = [] + end_read1 = end_read2 = end_read3 = end_read4 = [] if key2[:-5] + '.ab.1' in mut_read_pos_dict[key1].keys(): - read_pos1 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.1']) - read_len_median1 = np.median(reads_dict[key1][key2[:-5] + '.ab.1']) + read_pos1 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.1'])) + read_len_median1 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.1'])) + cigars_dcs1 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.1'] + #print(mut_read_cigar_dict[key1][key2[:-5] + '.ab.1']) + pos_read1 = mut_read_pos_dict[key1][key2[:-5] + '.ab.1'] + #print(cigars_dcs1) + end_read1 = reads_dict[key1][key2[:-5] + '.ab.1'] if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys(): - read_pos2 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.2']) - read_len_median2 = np.median(reads_dict[key1][key2[:-5] + '.ab.2']) + read_pos2 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.2'])) + read_len_median2 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.2'])) + cigars_dcs2 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.2'] + pos_read2 = mut_read_pos_dict[key1][key2[:-5] + '.ab.2'] + end_read2 = reads_dict[key1][key2[:-5] + '.ab.2'] if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys(): - read_pos3 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.1']) - read_len_median3 = np.median(reads_dict[key1][key2[:-5] + '.ba.1']) + read_pos3 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.1'])) + read_len_median3 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.1'])) + cigars_dcs3 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.1'] + pos_read3 = mut_read_pos_dict[key1][key2[:-5] + '.ba.1'] + end_read3 = reads_dict[key1][key2[:-5] + '.ba.1'] if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys(): - read_pos4 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.2']) - read_len_median4 = np.median(reads_dict[key1][key2[:-5] + '.ba.2']) + read_pos4 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.2'])) + read_len_median4 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.2'])) + #print(mut_read_cigar_dict[key1][key2[:-5] + '.ba.2']) + cigars_dcs4 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.2'] + + pos_read4 = mut_read_pos_dict[key1][key2[:-5] + '.ba.2'] + #print(cigars_dcs4) + end_read4 = reads_dict[key1][key2[:-5] + '.ba.2'] used_keys.append(key2[:-5]) counts_mut += 1 @@ -497,21 +571,225 @@ details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) - - + trimmed = False contradictory = False + softclipped_mutation_allMates = False + softclipped_mutation_oneOfTwoMates = False + softclipped_mutation_oneOfTwoSSCS = False + softclipped_mutation_oneMate = False + softclipped_mutation_oneMateOneSSCS = False + print() + print(key1, cigars_dcs1, cigars_dcs4, cigars_dcs2, cigars_dcs3) + dist_start_read1 = dist_start_read2 = dist_start_read3 = dist_start_read4 = [] + dist_end_read1 = dist_end_read2 = dist_end_read3 = dist_end_read4 = [] + ratio_dist_start1 = ratio_dist_start2 = ratio_dist_start3 = ratio_dist_start4 = False + ratio_dist_end1 = ratio_dist_end2 = ratio_dist_end3 = ratio_dist_end4 = False - if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant + # mate 1 - SSCS ab + softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1] + ratio1 = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) >= threshold_reads + + if any(ij is True for ij in softclipped_idx1): + softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1] + softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1] + softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1] + dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)] + dist_end_read1 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end1, pos_read1, end_read1)] + + # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping + if any(ij is True for ij in softclipped_both_ends_idx1): + print(softclipped_both_ends_idx1) + for nr, indx in enumerate(softclipped_both_ends_idx1): + if indx: + if dist_start_read1[nr] <= dist_end_read1[nr]: + dist_end_read1[nr] = thr + 1000 # use dist of start and set start to very large number + else: + dist_start_read1[nr] = thr + 1000 # use dist of end and set start to very large number + ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads + ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads + print(key1, "mate1 ab", dist_start_read1, dist_end_read1, cigars_dcs1, ratio1, ratio_dist_start1, ratio_dist_end1) + + # mate 1 - SSCS ba + softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4] + ratio4 = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4))) >= threshold_reads + if any(ij is True for ij in softclipped_idx4): + softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4] + softclipped_start4 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs4] + softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4] + dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)] + dist_end_read4 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end4, pos_read4, end_read4)] + + # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping + if any(ij is True for ij in softclipped_both_ends_idx4): + print(softclipped_both_ends_idx4) + for nr, indx in enumerate(softclipped_both_ends_idx4): + if indx: + if dist_start_read4[nr] <= dist_end_read4[nr]: + dist_end_read4[nr] = thr + 1000 # use dist of start and set start to very large number + else: + dist_start_read4[nr] = thr + 1000 # use dist of end and set start to very large number + ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads + ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads + print(key1, "mate1 ba", dist_start_read4, dist_end_read4,cigars_dcs4, ratio4, ratio_dist_start4, ratio_dist_end4) + + # mate 2 - SSCS ab + softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2] + #print(sum(softclipped_idx2)) + ratio2 = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) >= threshold_reads + if any(ij is True for ij in softclipped_idx2): + softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2] + softclipped_start2 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs2] + softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2] + dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)] + dist_end_read2 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end2, pos_read2, end_read2)] + + # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping + if any(ij is True for ij in softclipped_both_ends_idx2): + print(softclipped_both_ends_idx2) + for nr, indx in enumerate(softclipped_both_ends_idx2): + if indx: + if dist_start_read2[nr] <= dist_end_read2[nr]: + dist_end_read2[nr] = thr + 1000 # use dist of start and set start to very large number + else: + dist_start_read2[nr] = thr + 1000 # use dist of end and set start to very large number + ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads + #print(ratio_dist_end2) + #print([True if x <= thr else False for x in ratio_dist_end2]) + ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads + print(key1, "mate2 ab", dist_start_read2, dist_end_read2,cigars_dcs2, ratio2, ratio_dist_start2, ratio_dist_end2) + + # mate 2 - SSCS ba + softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3] + ratio3 = safe_div(sum(softclipped_idx3), float(len(softclipped_idx3))) >= threshold_reads + if any(ij is True for ij in softclipped_idx3): + softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3] + softclipped_start3 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs3] + softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3] + dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)] + dist_end_read3 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end3, pos_read3, end_read3)] + + # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping + if any(ij is True for ij in softclipped_both_ends_idx3): + print(softclipped_both_ends_idx3) + for nr, indx in enumerate(softclipped_both_ends_idx3): + if indx: + if dist_start_read3[nr] <= dist_end_read3[nr]: + dist_end_read3[nr] = thr + 1000 # use dist of start and set start to a larger number than thresh + else: + dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number + #print([True if x <= thr else False for x in dist_start_read3]) + ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads + ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads + print(key1, "mate2 ba", dist_start_read3, dist_end_read3,cigars_dcs3, ratio3, ratio_dist_start3, ratio_dist_end3) + + if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) & - all(float(ij) == 0. for ij in [alt1ff, alt4ff]))): + all(float(ij) == 0. for ij in [alt1ff, alt4ff]))): alt1ff = 0 alt4ff = 0 alt2ff = 0 alt3ff = 0 trimmed = False contradictory = True + # softclipping tiers + # information of both mates available --> all reads for both mates and SSCS are softclipped + elif (ratio1 & ratio4 & ratio2 & ratio3 & + (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & + all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available + # if distance between softclipping and mutation is at start or end of the read smaller than threshold + softclipped_mutation_allMates = True + softclipped_mutation_oneOfTwoMates = False + softclipped_mutation_oneOfTwoSSCS = False + softclipped_mutation_oneMate = False + softclipped_mutation_oneMateOneSSCS = False + alt1ff = 0 + alt4ff = 0 + alt2ff = 0 + alt3ff = 0 + trimmed = False + contradictory = False + print(key1, "softclipped_mutation_allMates", softclipped_mutation_allMates) + # information of both mates available --> only one mate softclipped + elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) | + (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) & + all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available + # if distance between softclipping and mutation is at start or end of the read smaller than threshold + softclipped_mutation_allMates = False + softclipped_mutation_oneOfTwoMates = True + softclipped_mutation_oneOfTwoSSCS = False + softclipped_mutation_oneMate = False + softclipped_mutation_oneMateOneSSCS = False + alt1ff = 0 + alt4ff = 0 + alt2ff = 0 + alt3ff = 0 + trimmed = False + contradictory = False + print(key1, "softclipped_mutation_oneOfTwoMates", softclipped_mutation_oneOfTwoMates) + # information of both mates available --> only one mate softclipped + elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & + ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & + all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available + # if distance between softclipping and mutation is at start or end of the read smaller than threshold + softclipped_mutation_allMates = False + softclipped_mutation_oneOfTwoMates = False + softclipped_mutation_oneOfTwoSSCS = True + softclipped_mutation_oneMate = False + softclipped_mutation_oneMateOneSSCS = False + alt1ff = 0 + alt4ff = 0 + alt2ff = 0 + alt3ff = 0 + trimmed = False + contradictory = False + print(key1, "softclipped_mutation_oneOfTwoSSCS", softclipped_mutation_oneOfTwoSSCS, [alt1ff, alt2ff, alt3ff, alt4ff]) + # information of one mate available --> all reads of one mate are softclipped + elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & + all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) | + (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & + all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available + # if distance between softclipping and mutation is at start or end of the read smaller than threshold + #if ((((len(dist_start_read1) > 0 | len(dist_end_read1) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1))) & + # ((len(dist_start_read4) > 0 | len(dist_end_read4) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4)))) | + # (((len(dist_start_read2) > 0 | len(dist_end_read2) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2))) & + # ((len(dist_start_read3) > 0 | len(dist_end_read3) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3))))): + softclipped_mutation_allMates = False + softclipped_mutation_oneOfTwoMates = False + softclipped_mutation_oneOfTwoSSCS = False + softclipped_mutation_oneMate = True + softclipped_mutation_oneMateOneSSCS = False + alt1ff = 0 + alt4ff = 0 + alt2ff = 0 + alt3ff = 0 + trimmed = False + contradictory = False + print(key1, "softclipped_mutation_oneMate", softclipped_mutation_oneMate) + # information of one mate available --> only one SSCS is softclipped + elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & + (all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff]))) | + (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & + (all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) < 0. for ij in [alt2ff, alt3ff])))): # all mates available + # if distance between softclipping and mutation is at start or end of the read smaller than threshold + #if ((all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1)) | + # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4))) | + # (all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2)) | + # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3)))): + softclipped_mutation_allMates = False + softclipped_mutation_oneOfTwoMates = False + softclipped_mutation_oneOfTwoSSCS = False + softclipped_mutation_oneMate = False + softclipped_mutation_oneMateOneSSCS = True + alt1ff = 0 + alt4ff = 0 + alt2ff = 0 + alt3ff = 0 + trimmed = False + contradictory = False + print(key1, "softclipped_mutation_oneMateOneSSCS", softclipped_mutation_oneMateOneSSCS) + else: if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): beg1 = total1new @@ -526,14 +804,14 @@ alt4ff = 0 alt4f = 0 trimmed = True - + if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): beg2 = total2new total2new = 0 alt2ff = 0 alt2f = 0 trimmed = True - + if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): beg3 = total3new total3new = 0 @@ -621,10 +899,35 @@ counter_tier42 += 1 tier_dict[key1]["tier 4.2"] += 1 + elif softclipped_mutation_allMates: + tier = "5.1" + counter_tier51 += 1 + tier_dict[key1]["tier 5.1"] += 1 + + elif softclipped_mutation_oneOfTwoMates: + tier = "5.2" + counter_tier52 += 1 + tier_dict[key1]["tier 5.2"] += 1 + + elif softclipped_mutation_oneOfTwoSSCS: + tier = "5.3" + counter_tier53 += 1 + tier_dict[key1]["tier 5.3"] += 1 + + elif softclipped_mutation_oneMate: + tier = "5.4" + counter_tier54 += 1 + tier_dict[key1]["tier 5.4"] += 1 + + elif softclipped_mutation_oneMateOneSSCS: + tier = "5.5" + counter_tier55 += 1 + tier_dict[key1]["tier 5.5"] += 1 + else: - tier = "5" - counter_tier5 += 1 - tier_dict[key1]["tier 5"] += 1 + tier = "6" + counter_tier6 += 1 + tier_dict[key1]["tier 6"] += 1 chrom, pos = re.split(r'\#', key1) var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) @@ -702,7 +1005,7 @@ if key_chimera in chimeric_tag.keys(): chimeric_tag[key_chimera].append(float(tier)) else: - chimeric_tag[key_chimera] = [float(tier)] + chimeric_tag[key_chimera] = [float(tier)] if (read_pos1 == -1): read_pos1 = read_len_median1 = None @@ -750,13 +1053,13 @@ if chimera_correction: header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', - 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', - 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5') + 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', + 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') else: header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', - 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', - 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', - 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5') + 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', + 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', + 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') ws2.write_row(0, 0, header_line2) row = 0 @@ -764,9 +1067,9 @@ for key1, value1 in sorted(tier_dict.items()): if key1 in pure_tags_dict_short.keys(): i = np.where(np.array(['#'.join(str(i) for i in z) - for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] - ref = mut_array[i, 9] - alt = mut_array[i, 10] + for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] + ref = mut_array[i, 2] + alt = mut_array[i, 3] chrom, pos = re.split(r'\#', key1) ref_count = cvrg_dict[key1][0] alt_count = cvrg_dict[key1][1] @@ -782,6 +1085,8 @@ if len(used_tiers) > 1: cum = safe_div(sum(used_tiers), cvrg) cum_af.append(cum) + if sum(used_tiers) == 0: # skip mutations that are filtered by the VA in the first place + continue lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) if chimera_correction: chimeras_all = chimera_dict[key1][0] @@ -806,20 +1111,21 @@ lst = tuple(lst) ws2.write_row(row + 1, 0, lst) if chimera_correction: - ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format1, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)}) - ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format3, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)}) - ws2.conditional_format('V{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format2, 'multi_range': 'V{}:Z{} V1:Z1'.format(row + 2, row + 2)}) + ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format12, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)}) + ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format32, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)}) + ws2.conditional_format('V{}:AE{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format22, 'multi_range': 'V{}:AE{} V1:AE1'.format(row + 2, row + 2)}) else: - ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format1, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) - ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format3, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) - ws2.conditional_format('P{}:T{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:T{} P1:T1'.format(row + 2, row + 2)}) + ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format12, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) + ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format32, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) + ws2.conditional_format('P{}:Y{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format22, 'multi_range': 'P{}:Y{} P1:Y1'.format(row + 2, row + 2)}) row += 1 # sheet 3 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), - ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), - ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41), - ("tier 4.2", counter_tier42), ("tier 5", counter_tier5)] + ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), + ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41), + ("tier 4.2", counter_tier42), ("tier 5.1", counter_tier51), ("tier 5.2", counter_tier52), + ("tier 5.3", counter_tier53), ("tier 5.4", counter_tier54), ("tier 5.5", counter_tier55), ("tier 6", counter_tier6)] header = ("tier", "count") ws3.write_row(0, 0, header) @@ -839,7 +1145,21 @@ 'criteria': '=$A${}>="3"'.format(i + 2), 'format': format2}) - description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), ("Tier 5", "remaining variants")] + description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), + ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), + ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), + ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), + ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), + ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), + ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), + ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), + ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), + ("Tier 5.1", "variants is close to softclipping in both mates"), + ("Tier 5.2", "variants is close to softclipping in one of the mates"), + ("Tier 5.3", "variants is close to softclipping in one of the SSCS of both mates"), + ("Tier 5.4", "variants is close to softclipping in one mate (no information of second mate"), + ("Tier 5.5", "variants is close to softclipping in one of the SSCS (no information of the second mate"), + ("Tier 6", "remaining variants")] examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), @@ -899,20 +1219,21 @@ ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], - [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", + [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", ""), ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], - [("Chr5:5-20000-13983-G-C", "5", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", + [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], + [("Chr5:5-20000-13983-G-C", "6", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", ""), ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] - start_row = 15 + start_row = 20 ws3.write(start_row, 0, "Description of tiers with examples") ws3.write_row(start_row + 1, 0, header_line) row = 0 @@ -921,19 +1242,22 @@ ex = examples_tiers[i] for k in range(len(ex)): ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k]) - ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format1, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) + ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format13, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), - 'format': format3, + 'format': format33, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(start_row + 2 + row + i + k + 2), - 'format': format2, + 'format': format23, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) row += 3 workbook.close() + workbook2.close() + workbook3.close() if __name__ == '__main__': sys.exit(read2mut(sys.argv)) +
--- a/read2mut.xml Tue Oct 27 12:46:55 2020 +0000 +++ b/read2mut.xml Mon Jan 18 09:49:15 2021 +0000 @@ -1,12 +1,16 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="read2mut" name="Call specific mutations in reads:" version="1.0.5" profile="19.01"> +<tool id="read2mut" name="Call specific mutations in reads:" version="2.1.0" profile="19.01"> <description>Looks for reads with mutation at known positions and calculates frequencies and stats.</description> <macros> <import>va_macros.xml</import> </macros> - <expand macro="requirements"> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.4.0">matplotlib</requirement> + <requirement type="package" version="0.15">pysam</requirement> <requirement type="package" version="1.1.0">xlsxwriter</requirement> - </expand> + <requirement type="package" version="0.11.6">cyvcf2</requirement> + </requirements> <command><![CDATA[ ln -s '$file2' bam_input.bam && ln -s '${file2.metadata.bam_index}' bam_input.bam.bai && @@ -19,11 +23,15 @@ --phred '$phred' --trim '$trim' $chimera_correction + --softclipping_dist '$softclipping_dist' + --reads_threshold '$reads_threshold' --outputFile '$output_xlsx' + --outputFile2 '$output_xlsx2' + --outputFile3 '$output_xlsx3' ]]> </command> <inputs> - <param name="file1" type="data" format="tabular" label="DCS Mutation File" optional="false" help="TABULAR file with DCS mutations. See Help section below for a detailed explanation."/> + <param name="file1" type="data" format="vcf" label="DCS Mutation File" optional="false" help="VCF file with DCS mutations. See Help section below for a detailed explanation."/> <param name="file2" type="data" format="bam" label="BAM File of raw reads" optional="false" help="BAM file with aligned raw reads of selected tags."/> <param name="file3" type="data" format="json" label="JSON File with DCS tag stats" optional="false" help="JSON file generated by DCS mutations to tags/reads"/> <param name="file4" type="data" format="json" label="JSON File with SSCS tag stats" optional="false" help="JSON file generated by DCS mutations to SSCS stats."/> @@ -31,35 +39,43 @@ <param name="phred" type="integer" label="Phred quality score threshold" min="0" max="41" value="20" help="Integer threshold for Phred quality score. Only reads higher than this threshold are considered. Default = 20."/> <param name="trim" type="integer" label="Trimming threshold" value="10" help="Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10."/> <param name="chimera_correction" type="boolean" label="Apply chimera correction?" truevalue="--chimera_correction" falsevalue="" checked="False" help="Count chimeric variants and correct the variant frequencies."/> + <param name="softclipping_dist" type="integer" label="Distance between artifact and softclipping of the reads" min="1" value="15" help="Count mutation as an artifact if mutation lies within this parameter away from the softclipping part of the reads. Default = 20"/> +<param name="reads_threshold" type="float" label="Minimum percentage of softclipped reads in a family" min="0.0" max="1.0" value="1.0" help="Float number which specifies the minimum percentage of softclipped reads in a family to be considered in the softclipping tiers. Default: 1.0, means all reads of a family have to be softclipped."/> </inputs> <outputs> - <data name="output_xlsx" format="xlsx" label="${tool.name} on ${on_string}: XLSX"/> + <data name="output_xlsx" format="xlsx" label="${tool.name} on ${on_string}: XLSX summary"/> + <data name="output_xlsx2" format="xlsx" label="${tool.name} on ${on_string}: XLSX allele frequencies"/> + <data name="output_xlsx3" format="xlsx" label="${tool.name} on ${on_string}: XLSX tiers"/> </outputs> <tests> <test> - <param name="file1" value="DCS_Mutations_test_data_VA.tabular"/> - <param name="file2" value="Interesting_Reads_test_data_VA.trim.bam"/> - <param name="file3" value="tag_count_dict_test_data_VA.json"/> - <param name="file4" value="SSCS_counts_test_data_VA.json"/> + <param name="file1" value="FreeBayes_test.vcf"/> + <param name="file2" value="Interesting_Reads_test.trim.bam"/> + <param name="file3" value="tag_count_dict_test.json"/> + <param name="file4" value="SSCS_counts_test.json"/> <param name="thresh" value="0"/> <param name="phred" value="20"/> <param name="trim" value="10"/> - <param name="chimera_correction" value="False"/> - <output name="output_xlsx" file="mutant_reads_summary_short_trim_test_data_VA.xlsx" decompress="true" lines_diff="10"/> + <param name="chimera_correction"/> + <param name="softclipping_dist" value="15"/> + <param name="reads_threshold" value="1.0"/> + <output name="output_xlsx" file="Variant_Analyzer_summary_test.xlsx" decompress="true" lines_diff="10"/> + <output name="output_xlsx2" file="Variant_Analyzer_allele_frequencies_test.xlsx" decompress="true" lines_diff="10"/> + <output name="output_xlsx3" file="Variant_Analyzer_tiers_test.xlsx" decompress="true" lines_diff="10"/> </test> </tests> <help> <![CDATA[ **What it does** -Takes a tabular file with mutations, a BAM file of aligned raw reads, and JSON files +Takes a VCF file with mutations, a BAM file of aligned raw reads, and JSON files created by the tools **DCS mutations to tags/reads** and **DCS mutations to SSCS stats** as input and calculates frequencies and stats for DCS mutations based on information from the raw reads. **Input** -**Dataset 1:** Tabular file with duplex consesus sequence (DCS) mutations as -generated by the **Variant Annotator** tool. +**Dataset 1:** VCF file with duplex consesus sequence (DCS) mutations. E.g. +generated by the `FreeBayes variant caller <https://arxiv.org/abs/1207.3907>`_. **Dataset 2:** BAM file of aligned raw reads. This file can be obtained by the tool `Map with BWA-MEM <https://arxiv.org/abs/1303.3997>`_. @@ -74,7 +90,7 @@ **Output** -The output is an XLSX file containing frequencies stats for DCS mutations based +The output are three XLSX files containing frequencies stats for DCS mutations based on information from the raw reads. In addition to that a tier based classification is provided based on the amout of support for a true variant call.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Aligned_Families_test.tabular Mon Jan 18 09:49:15 2021 +0000 @@ -0,0 +1,41 @@ +GATAACCTTGCTTCGTGATTAATC ab 1 M01897:257:000000000-AYB6W:1:2112:28792:17250 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAATGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGFGFFGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGAFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFGG8<FED@FGEGGGGGGGGGGGGGGGGGGGGGGGGFEFGGGGGGGEEGGGGGGGGGCECE8EGGGEFFGGGGGGFGGGDEDD5EGGGGGFGGGDCFCFGFGFGCFAGGFDFFFEEGGFF3><:>>FD>FFC=4=:0<;DD>6461992<)892<AFBFFFFFF244:-1:1>:=0306(4)-.42((44(667(449?0, +GATAACCTTGCTTCGTGATTAATC ba 2 M01897:257:000000000-AYB6W:1:1108:16316:3620 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGGGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGFGGGGGGFCFGGGGFGGGGEGGFGGGGGGGFFGGGGGGGGGGGGGGGGBB?CFGGGGGGCCGGFGGGGGGGCFGECC79CEECGDDD99CFGGGGF4>>GG>BDE@BBFG5=B?AF98::A42<EF?;B::((7:?7???<)/:?91;1,6?F?29902( +GATAACCTTGCTTCGTGATTAATC ba 2 M01897:257:000000000-AYB6W:1:1118:22651:3876 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGDGGGGEEGGGGGGGGGGGGGGGGGGGCEFDFGGGFGFGGEGEBFGG<FGFCFEFDFDF<DGGGGGGFGFGFGGGGGFGGGCE:FFBFGGGGDGGGGGGGGDD@FGGFC6AE7E1CGGFGCCFGGGGCEGFGGGCCFG9A*59@FGD><?9=CFF6>3BBDFFF392?G)-96<2<:<:44<232:B3:F>6??F0(34A248:>?1,(.404-,((4( +GATAACCTTGCTTCGTGATTAATC ba 2 M01897:257:000000000-AYB6W:1:1118:5518:20674 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACCCCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCCAGAAGCGGGACGGCCGTAAGTCCCAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC FGEF9FGGCCG@FGGGGGGCFCC@EGGG<FFGGFGGCGGGGGEG?FGGGGDGGGGGGGGFEFGGGGGGGGG7==F:F,=FEGGFFFFG<<F<=8FG>CAF9E8CFCEFFFFF,?F=F8FFD=,DFFE+@CGGGFF7D<FGGEFEGGFG2DCCFGECC*=CGEGGGGGGGGCCFFFGF7FFGFGGGGGGGFFG=E56C55CEGF3:F*9*./>FG***27)?::D)5557@>BFD@)/))).(().9<2((-29BF>F4(83,:12-)4)2,3??<<1:(7>((, +GATAACCTTGCTTCGTGATTAATC ab 2 M01897:257:000000000-AYB6W:1:2112:28792:17250 1:N:0:1 GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG FGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGFFGGGGGGGGGGGGGGGGCFGGFFGGGCGGGGGGGGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGGGGGEGFGGFGFFGFGFGFFBEFGFFFF7F?FFB?DF>FDFFB:)9>FBFFF?F099E>;<?1:<?0>F0;BB1 +GATAACCTTGCTTCGTGATTAATC ba 1 M01897:257:000000000-AYB6W:1:1108:16316:3620 2:N:0:1 GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCGCGGGACACG GGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGDGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGDGCEEFGGGGGCGGGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGFGGGCGFFGGGGGGGGGGFFFFGGGGCEG==CECFFCDGGGGGGGGGGGGG597*<FGGFGDFC35>+*:=6FDFF4CFFF9B204>G?FE)5FAF?:7>FBB<A?FB(9?<AFFF<0?B?F4:BFF2>B69>;B))6<<?(,(46((4,42(7>926(82 +GATAACCTTGCTTCGTGATTAATC ba 1 M01897:257:000000000-AYB6W:1:1118:22651:3876 2:N:0:1 GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTTCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCCCGGGACACG FDCCF9FFDFGGGGGGGGGGGGFGGGGGGFGGGGGGGGGGGEGFECFGGGGGGGGGGGGG8EGFGEGFG,F@GGGGGEGGEG7FGGGGG@BFFGCCGGEGGGGGGGFGGGGGGFGGG@FFF9CFGGGGGGGGGGGGGGGGGGGFFGF5E*CECC>EGFGG7EGD==?E8:E7CCE3C+?:C?FFG@D3B5:>78)/C6=FFF<>B>>0:@EBF3))14>B?20>?A<2:>99>F<<AD7??BF0??)8<0<BF?>>>FAA9A:,403>BF?2;B(46(4((((( +GATTGGATAACGTTGTGGCAATTG ab 1 M01897:257:000000000-AYB6W:1:1101:14310:2734 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGGAAGTCACCGGAATCCGGGACGTCCTGGCAGCTAGGGCGGGCCCCGAGCCAGG GGGGCCFGGGGGGGGGFFGGGGGGGFGGGGEGGGGGGGFGGGGGGGGFGGGGGGGGGGGGGGGGGGGG<@FGGCCGGGGGGCFFGGGGGGGGC,@ECFBFDDGGG@FGGGGG9CFG@CCFF@DCDFC>=CEGGDEGCC@CDC*=CC*=5>FGCFEGGDFGG?<EGGGFFFD49FD=6>:CGFD>5)/)47C@4),85:B:DF?(8)448:D:,5?7**430;>01661(-4((74,94:)(,-(-18(--(-(-(-(=01,442))(.(,(2((-(-(-((,(( +GATTGGATAACGTTGTGGCAATTG ab 1 M01897:257:000000000-AYB6W:1:1112:4840:17845 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCCGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCATACTGGGCACAGGGCCAGGCGTGAGGGCTCAAGAAGCGGGACCGCCGTCAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTCGGCGTGTCCCTAGCTAGC GGGC6<,CFDG8FG,CF6C<FGGDD<FGGGFGGGFG<E8,6FCFC,77BF,CFGGGFFFGGFEF:<CECC7:F:@?DFCEGFFE8?EGGF<,+,44=7,B,C@DGC@7+?F8,7=D,A=>,9=FFFG:@=BC7CCEFGFDGGGG788CEF66EFGGG7CF*:**=C5=FGG5AC=+:C*2:EFF*7*2/97DD>FC)7>@G@5(704(255005FFFB??FFB39((,--32()(./6>B<(())9))-38>0,43(-((((33<)-,((--(.4)).43)).( +GATTGGATAACGTTGTGGCAATTG ab 1 M01897:257:000000000-AYB6W:1:1118:8154:20084 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGG@EDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=FGGGGGFFBFGGGGGGGFGGFGGGGFGGGFGGGGGEGGGGGGG:FGGGG5EGGGG:FEFGEGGGGGGGGGGGGGGGD:EG?FGFFGFCEGG>GFFFCGGFFFGEFE:>?(7.()44>B>G*=F<7:F9>D>9>F03;26:6)6>B<9(38<7A?FB2>>?FF(=:?(((.2:A:)-4((.63-,49>:?0 +GATTGGATAACGTTGTGGCAATTG ab 1 M01897:257:000000000-AYB6W:1:2110:10849:23965 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG@CGGGGGGGFGGGGECGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGEGGGCCCGGGGGGGGGGGGGGGGGDGGEGGGDGGGGDFGGGGGFDGDDFFFGGGFFFGFGFE@:?GFFFFFGFFFFFD2?BFFFF09>B9>F(7)2.9A2)6:44<@A7BF?>BF?>6>:((,(,5AF?F91(-:B<>,(3>00( +GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:1113:13084:11145 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGTACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGCFGGGGFFFFBGFFFBEFFFFGFF?@AFFB?FFFFFFFFFFFFFFFB00:?FFFAFFFFFFFFF66>FFF<1 +GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:1115:14952:6061 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GFGGGGGEGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGFGGGGGGGGGGGGG,FGFFFFFGAFGGGGGGGGGGGGFGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGEGFGGGGGEGFGGGDCBEGGGGGGGGGGGGGGGGFEGGFGFGGGGGGFDGGGCDGD9DFFFGD4>FGG4FF@9DD>DD>>FFDBGFFDFFFFFFFFF=?:8?F><F>?F?FBFF?7>F>DB<>?B9>>?9>9?F1 +GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:2101:12835:23979 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGACGTAAGTCCAAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGGGTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8FFGGGGFFGGGGGGGGGGFGGGGGGDFGGGGGFGGGGGGGGGGGGGEGG7FFGGGG@FGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGC6CFFFGGCFFGGGGGG:C:47FFD3*1<677<6<;EGB@><)-3:>55-9))).:12<6)4430;>3>0(*4??F1(.:7?>(,((-.8B1999B?1 +GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:2102:25716:13556 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGCTTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGCGCCAGC ,C@FCGGGGGE8C@FCFFGEECGCFGGDFFEFFGFFGGFGGGFFGFCF@FFAFGGGGGCGEG,EFGGG?=F@EGGGC,<=FF?DEGFFFGFFGFFDGCGG?FDDG>EFGFGA9?EFE@FGGGDFGFCFFGC+@EEE@:F:E7C1:FBCF@7<2CFFF**::8CFEEGE7C8BFF?CFGGC<9CFGCEG+CCC8:CFDCDC=:**202:65*CF5CGFD)6?5))).753>><:5>9@466-.((.9::0)4B>)8><>:0(80:2))501(--3:FF(,4>02( +GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:2107:10919:21008 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACCGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGCEGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG>EGEGGGFGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGCGGGGGGGGFGGFGGGFG@EGGBDGGFFFFGFFFFF)*4<:B@F?G6<>9BFFFFB?BFF?DF?BFF00:BFFFB;BFBB2 +GATTGGATAACGTTGTGGCAATTG ab 2 M01897:257:000000000-AYB6W:1:1101:14310:2734 1:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCGCTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGCGTCTCCCGGGTGCCTGGTGGCGGTGTGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFFEGGGGGFFGGGGGGGGGGGGGGFFEE:=FED=FFFFFFEEEGGGGGG:FC:FFGGGGCFGGGGGGGFGFGGGCCG9FGGGGGGGGGFFEFBFGGEEE<EGGGGGGGGGGGGGF@FCCCEEGGCFFGGFGGGGGG:>EEGDGFDC4>EDDFGGEBEFGE5>CGGFFF*)<FF<<FF:61:<BFFB7??9::?FF:07<7)(.,,2<1(11(,3:>7:773(-766223:(( +GATTGGATAACGTTGTGGCAATTG ab 2 M01897:257:000000000-AYB6W:1:1112:4840:17845 1:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCTTGGCCCGGTCCTGGCCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCCGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGCGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC FCEFGG@@FGGGGEFGGFFGGDCF8CG86ECEGGGFGF,C,C@@FFFGGE:,CFFDG7CFFGECE=<FF9<F,C<7++=7+4@+@=FFFGGFA,CB,EF9@7F::3@F@BFCC7E=FC@FCCCFF<=FGA7:FGFG,37F9FCG7:3?>7:FCGGG:@FC6B,=EE7FFE>EGG9C?*=5CC7887*/=*:?C5E76C*::*//C>D*8D4377CF*;:?)055.547;FF?4*7F)<2@<AF:))766:23)(731F>?>(41((4))8>7:0(--,-338(( +GATTGGATAACGTTGTGGCAATTG ab 2 M01897:257:000000000-AYB6W:1:1118:8154:20084 1:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC GGGGGGGGGGGGGGGGGGGDDGGFFGGDFGGGGGGGGGGGGFEECFGGCFGEGGGGGGGGGGGGGGGFGGFCCFFFFGGGDGGGDGGGFGGGGGCFFFGGFGGGGGGGGFGC7EGG<=BFFGEG<DCFGGFGGGGGGFGGGGEGGGFGGGGGCGCGGGFCCFGGGGGGGGEGGGGGGGGGGGGGGFEEGEGGG5>FGGGGGDGGGDGFFFGFGGGFGGFFF6@FFFFFFFF<FFFFFF???FFA>?B2>B<0<?78AFF1706>9B?AF:?:0(3139:FF<?0 +GATTGGATAACGTTGTGGCAATTG ab 2 M01897:257:000000000-AYB6W:1:2110:10849:23965 1:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGEFGFGGGGGGGGGGGGGGGEGGGGGGGFGGFDGGGGGGGGGGDGGGGGGGGGGGGFGFF=EF9E7E?FFGFDFC?>GGFDBGFGFC?FFGABFFFB>G?AFFBFB@>>?BB>BEF?AB:??0:B;71??F?(. +GATTGGATAACGTTGTGGCAATTG ba 1 M01897:257:000000000-AYB6W:1:1113:13084:11145 2:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTACCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFCDFBGGGFFFFFFFGF:?D>FFBF>?F@FFFFFBFFFF??FFFF?62>:?FF>?FDFFFF?0:?F?ABFF><AFB>09BF?AFFF?:?>>9>?FF::0 +GATTGGATAACGTTGTGGCAATTG ba 1 M01897:257:000000000-AYB6W:1:1115:14952:6061 2:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC GFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEDGF@FGGGGGGGGFGGGGGGCGGGFGGGGFGFFFCD@7DGF58FFFFFGF3:>D:6>>GBBFF474<?FFFF?B?B0(:1:F?068>:79?28508?>>4<04>AA<09>0>F?:6<B?F0(4969<F:?:( +GATTGGATAACGTTGTGGCAATTG ba 1 M01897:257:000000000-AYB6W:1:2101:12835:23979 2:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCCCTTCCCATCTGGGTCCCCAACGGCCTCTCCTGCGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGTC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD7FEFBFGGGGGGGCFGGGFGGGGGCGEGGG=CFGGECBFCFACC7DDFGGC9FFGCCFFFFF<@F?FFEGGGGGG8:57ACC=@60;CC:7,CEEGCGG4CFCA<?C<<FGGAFCDC5:>6C?3C.76)0319:*4)57?F*5<?2=FFF:?328()7395?(,22)((79>?:7:93:B)1)21>99(489<1(((,751681(8-( +GATTGGATAACGTTGTGGCAATTG ba 1 M01897:257:000000000-AYB6W:1:2102:25716:13556 2:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTTGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCCTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGCGCCCACTTCCCATCTGCGTCCCCACAGGCCTCTCCTGTTGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGTCC ;,EACFFEGD6FFGGG8F<@<FEGGGGGGFCE7F@,EDG9<@:FCFCEF,CFGF>BF6+,9BFA+4EDFFFEFGCC,BFCB:FFGD==C?CFG,,CEE9E7CGGE@FCCGD+8:CC<9DFGG,@:B9:F9BC@5DD5FFG;@DFBCECEE7EC7,,?CFDCDGGGFFFFC://=?=4CF4C+C558DDF5EDGBB5/*.<DEE:))4()-*-)*948*6:74).4;9?F:1((>0?@B2?04)4)1699<<:>247667<340;>B?A71((-49@(-((4()) +CCTCCCGGCAGTGCGAAAATGTCA ab 1 M01897:257:000000000-AYB6W:1:1108:17396:7377 2:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGGCCAGGCCGGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGTGGCTGCCCAGGCGGCCTGTTTTTTTGCAGGCTCCCTACGCTACGGGGTGGGCTTTTTCCGTTTCATCTTGGTGTTGCCGGCTGGGACGCCTTGCGCC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG7FGGGGGGGGGGGC<FGGGDGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGDEC8C>C5*:/C:*:<+2/>C:*:*+*<>?+*+0<5:/>E5<35***<6293*935=DC)))1707C5)(1*))())()*06)(((0,(*(,(,(-4(9),4D6(4((5)4*(,).2))-).5)5:228))-1(-(((((-((,()5(-( +CCTCCCGGCAGTGCGAAAATGTCA ba 2 M01897:257:000000000-AYB6W:1:1106:22053:22582 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGACGCGGGCAGTGTGTATGCAGTCATCCTCAGCTACGGGCTGGGCTTCTTCCTGTTTATCCTGGTGGTGGCGGCTGTGTCGCTCTGCCGTC CFGGAFCFCFGGGFDGDDDGGDGGGG;F:BFGEGFGGGGFF<FFDECG@CFDGGF@FECFAEGFGGGGGAFFEGGGEGF<?E@FFGFEFGEGG+BEF=<FGGCFCFGGGGGGGG8FDFGGDF@FFGGGEEG*88:C88AFEC>8A:@;EFG8>:EEGE0<CCF+<E:CE/C8C*8C*;;C:0*;=EFEDG*/0*7*:7*18*27:CFGD?>>7+CGG>?F:?4*7?FG6).-))7)/<BF0)6.)/--/)67.:F209304(((493(,:5-)(2;:<2).4(( +CCTCCCGGCAGTGCGAAAATGTCA ba 2 M01897:257:000000000-AYB6W:1:1109:21342:10199 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGC?>DFGGFGGGGGGFFFFFFFFFF@FFFFCDFGF?FFAFFFDAAFFBFB9?FFD08<<6?BFFF;F?2<??6??<7>B>9 +CCTCCCGGCAGTGCGAAAATGTCA ba 2 M01897:257:000000000-AYB6W:1:1111:28216:18792 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=8FGGEGDGGGGGGGGCFFGGGGGGGGGFFFFFFFFFGFFFFB5<BEFB>8AABAFF<9<5FBF?):F:B?:2@FFFF1.54<?:.323<?FF9 +CCTCCCGGCAGTGCGAAAATGTCA ab 2 M01897:257:000000000-AYB6W:1:1108:17396:7377 1:N:0:1 CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGTGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGAAGCCCACCCCGT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGBCGGGGGGGGGGGGGGCFGGGGGGGGEGGEGGGGGGGEGGGGGGGGGGGGGGGDGDDDEFDGGFFGFFFFFGFFFF>EFBFFFGFFFFF:BFFF?F?FFFFFF?F<BBF??BBFFFFBBFF +CCTCCCGGCAGTGCGAAAATGTCA ba 1 M01897:257:000000000-AYB6W:1:1106:22053:22582 2:N:0:1 CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCACCAGGAGGCCTGGCGGGCCGGCAGCTCAGAACCTGATATCTACTTTCTGTTAGCTGTCGCTCGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGTGGGAGCCTAGCCCTTTCTTGGGGTGGCTGCGCAGGCGGCAGAGCGTCACAGCTGCTACAACCAGGATGAACAGGAAGAGCCCCACCCCGTC FCF<9C@F8E9@FGC,,,,<,CF<,C@B@CC@<F,,@F::FD+FC@@F,CFFEEDFGD:C=<<B?FF:E8,B,B,AC<FA8C44++B=>F7F?+A7FF+==<F+:+@7+AFB,8C:F**>CC@F?CCFFCFC@C,26,3224@C@C,,?CG+<+2CFC*:*:);C7E*21*9CE**>DDFC7+:0=/))5C)1)(*)00>*9:(.4(,577:*=47)721),,),(-(4(47()((43460(.)(0..).))).4(()(,(,)6)((((,4((((4(-(((((( +CCTCCCGGCAGTGCGAAAATGTCA ba 1 M01897:257:000000000-AYB6W:1:1109:21342:10199 2:N:0:1 CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGCAGCCCAGCCCGT- GGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGFGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGFEGDFGFGGGGFGFGGFGGGEG?FGCDGGEGGGGGGGGG6>FEGFDFGGFFGGGEE3DFF@=@FFGF2?>FB9FFFFFBFFFBFFFFFF9>>F>F68?>>?:BABFFFFF6B??:?BF5<>BB<49?:?:?(4?:0:0(.3399 +CCTAGTCTTTGATTGGCCACTTTT ab 1 M01897:257:000000000-AYB6W:1:1106:12553:14962 2:N:0:1 TAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCACGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGAAGCCCACCCCGT-- GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEEEGGGGGGGGFGGGGGGFGGFFGGD@9DFFFGGFFFFF7AFFGFFFFFFFFFFFFFFBFF6>FFFFBBB>FBBADBFFF((428?F<?F>:?:DFA:1:?FF7??F10(3<F96 +CCTAGTCTTTGATTGGCCACTTTT ab 1 M01897:257:000000000-AYB6W:1:1106:15615:18803 2:N:0:1 TAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGCGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCACGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGAAGCCCACCCCGTAC GGCGGGGGGGGFFFGGCFGFGFGGGD@@F7FGGDCC:EFCFFGG:@DGGGGFGGGGGGGEGGG9EFGGGGGGGGGGG,CEEGDGEGGGGGGFFGGGGCGGGFG<FFDGGFEG@EG7FDEGCEEDECGGCFGGGGGFCFF2CCEFCFGCF;FGFGFF5BCFFCFGGGGGG5CGC=EE<CGGGGG6CBFFEFF53@6CED>755>:DF>AFFGA=>FF>>09>09@6>>BFBF(3.116>F:0)-:1)37(:<?BFFF<)57?AA2:637?0,(4.10(:39(31( +CCTAGTCTTTGATTGGCCACTTTT ab 1 M01897:257:000000000-AYB6W:1:1110:11692:17499 2:N:0:1 TAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCACGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGACGCCCACCCCGT-- GGGGG@EGGGGGGGGGFGGGGFGGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDEGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGFFGGDFFFGFFFGFFF<FFBFFGFFFFFDFFFFFFFFF6((49>>)4:D>F6:AFBBFFFFFF0(8,1<6?7(6<B:22::?(4999<F?BBF? +CCTAGTCTTTGATTGGCCACTTTT ab 1 M01897:257:000000000-AYB6W:1:1110:21292:16434 2:N:0:1 TAGGCTCTACATGGTGAGCAGAGACGCGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGTGGGAAGCGGGAGATCTTGTGCACGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGTTGAACAGGAAGAAGCCCACCCCTT-- GFGGGGD8C@FGGFCFCFFGGGGGEE+@CCFFEDCDFCF@CFCFGGGGGGGFEDFFGGGDGF@FFGEGF<FGGCF7<+4,=BFCFGGGDGG:?<=EE:FGFD8F+:C+>FGACCCCFGGE7EGGGGGGGFGCCFF;@A<;E;<BFCCGG;>CFGFFFFG5:*=+588C8>57EEGGDEFGGGCFC?*9@FGGGB>)97)<=@F?/))3:BGFFFE:)5925?>326909>>6>54,7((-8)-8-71(--24641:B)47445270,(3124(.,(,:<>(. +CCTAGTCTTTGATTGGCCACTTTT ab 2 M01897:257:000000000-AYB6W:1:1106:12553:14962 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGG--CCAGGCAGGGCCCCAAGCCCCTTGTCT-TGCAGCCGGGGGGGGGGCGGTGGGAGCCTAACAAGCGGGGCGGGGGGTTGGAGGCCTCCCCAAGTTCGGGGGTGGCTTCTTCCTGTTCATCCTTGGTGTGGGGGCTGTGACGCCTTTGCGGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG* ?8*;*;**:*;***2***2A***00+< C++0++;***:**:*****://:**;**0++*++2*/:E/*1**)))/)1)+*1**))9))**)/**)03>))8D)(8().5<*)-7))1)67)6/.8118((4(-,.()-(()(-)).(,- +CCTAGTCTTTGATTGGCCACTTTT ab 2 M01897:257:000000000-AYB6W:1:1106:15615:18803 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGA--CCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCC-GC GGGGGFGGGGFGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGDEFGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGCFGGGGGGGCFGEGFGEEGGGGGGGDGGDGGCFGGGGGGGDGGGGGGFGGGGGGGGGGG GDGGGGGGGFGGGGFGGGGGGGGGFGG9FFGGGGGGGGGCEGECGGCFCEEGGGGGGGGGGGCGGCEC3*:C>DG=FC<?CGGFFGFFFGFFGFFFFFG:BDFFF<:AFFFFFFFF4<4?BAFB:<BB??09>B0?B><:D243847 10 +CCTAGTCTTTGATTGGCCACTTTT ab 2 M01897:257:000000000-AYB6W:1:1110:11692:17499 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGGGGCCAGGCCAGGCCCCAACGCCCATGTCTTTGCAGCCGAGGGGGAGCTGGTTGGGGCTGACGAGGCGGGCAGTGGGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGGGGTGGCGGCTTGTACCCTCTTCC--- GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8>5*;:8C8E@;?*:;88*2CE*8*++<C99+9+@C**88*:C*:?6<C*+1*:858C*;7/9E*1CGCCC0*)1)*+<C7C.5766<69=<))9*05>3/4;<31<2)9:4=).0))/69?<((213(7:960(,1.-))))(()-) +GATAAGCCAACTGCCATCTAGAAT ab 1 M01897:257:000000000-AYB6W:1:1105:25798:19415 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTACGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTTTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGFGGGFGGGGGGGGGGGGGGEFCFFGGDEGG8EGGGGFGGGGEGFEGFFG<EDGGGGGGGGGCC7FGGGGGGGFGG=>FFCDBF)7:>7FF:EF?<?FEE:@F@?6??F6>B01>BF;FFF*4(,2:24?FBBF>?F?FFBF0;B2:0(:??FF7:BF?03:2<BBFBFB?0 +GATAAGCCAACTGCCATCTAGAAT ba 2 M01897:257:000000000-AYB6W:1:2104:15100:19675 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTCGCGTGTCCCGAGCCAGC GGFGFGGDGGGGGGCGGGD@EFGGFFGEGGGGGGGFECDEDFFGGGFGGGGFGDEGDGGEGGGGGG:FGGG@CFECFGGGGGFFGGGGGGF<CGGEGGGEFGGGEE7FGFF,=B=DBBFFDDFFFGGGGGGGEGGG:><FEGCF:FGEFGGFFFGGGDGGFEGDFGGGFGGGFFGGGCGGGEGFGGGGFFCFFEDG57CGGCFFC6C*CEGG6:CGGG:6<C>>CEFDGB7B5/<<:9<>>><F279?FG<>>>:>:D(47:6<26)402346>2<>(-49??0 +GATAAGCCAACTGCCATCTAGAAT ab 2 M01897:257:000000000-AYB6W:1:1105:25798:19415 1:N:0:1 GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFFFFGGFFGDB9FFGFFFF0F?:?FFFFFFFFFFFFBF@FFBA?B9;9B9>BB>FFF>FF>><?4
--- a/test-data/Aligned_Families_test_data_VA.tabular Tue Oct 27 12:46:55 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -GATAACCTTGCTTCGTGATTAATC ab 1 M01897:257:000000000-AYB6W:1:2112:28792:17250 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAATGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGFGFFGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGAFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFGG8<FED@FGEGGGGGGGGGGGGGGGGGGGGGGGGFEFGGGGGGGEEGGGGGGGGGCECE8EGGGEFFGGGGGGFGGGDEDD5EGGGGGFGGGDCFCFGFGFGCFAGGFDFFFEEGGFF3><:>>FD>FFC=4=:0<;DD>6461992<)892<AFBFFFFFF244:-1:1>:=0306(4)-.42((44(667(449?0, -GATAACCTTGCTTCGTGATTAATC ba 2 M01897:257:000000000-AYB6W:1:1108:16316:3620 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGGGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGFGGGGGGFCFGGGGFGGGGEGGFGGGGGGGFFGGGGGGGGGGGGGGGGBB?CFGGGGGGCCGGFGGGGGGGCFGECC79CEECGDDD99CFGGGGF4>>GG>BDE@BBFG5=B?AF98::A42<EF?;B::((7:?7???<)/:?91;1,6?F?29902( -GATAACCTTGCTTCGTGATTAATC ba 2 M01897:257:000000000-AYB6W:1:1118:22651:3876 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGDGGGGEEGGGGGGGGGGGGGGGGGGGCEFDFGGGFGFGGEGEBFGG<FGFCFEFDFDF<DGGGGGGFGFGFGGGGGFGGGCE:FFBFGGGGDGGGGGGGGDD@FGGFC6AE7E1CGGFGCCFGGGGCEGFGGGCCFG9A*59@FGD><?9=CFF6>3BBDFFF392?G)-96<2<:<:44<232:B3:F>6??F0(34A248:>?1,(.404-,((4( -GATAACCTTGCTTCGTGATTAATC ba 2 M01897:257:000000000-AYB6W:1:1118:5518:20674 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACCCCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCCAGAAGCGGGACGGCCGTAAGTCCCAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC FGEF9FGGCCG@FGGGGGGCFCC@EGGG<FFGGFGGCGGGGGEG?FGGGGDGGGGGGGGFEFGGGGGGGGG7==F:F,=FEGGFFFFG<<F<=8FG>CAF9E8CFCEFFFFF,?F=F8FFD=,DFFE+@CGGGFF7D<FGGEFEGGFG2DCCFGECC*=CGEGGGGGGGGCCFFFGF7FFGFGGGGGGGFFG=E56C55CEGF3:F*9*./>FG***27)?::D)5557@>BFD@)/))).(().9<2((-29BF>F4(83,:12-)4)2,3??<<1:(7>((, -GATAACCTTGCTTCGTGATTAATC ab 2 M01897:257:000000000-AYB6W:1:2112:28792:17250 1:N:0:1 GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG FGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGFFGGGGGGGGGGGGGGGGCFGGFFGGGCGGGGGGGGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGGGGGEGFGGFGFFGFGFGFFBEFGFFFF7F?FFB?DF>FDFFB:)9>FBFFF?F099E>;<?1:<?0>F0;BB1 -GATAACCTTGCTTCGTGATTAATC ba 1 M01897:257:000000000-AYB6W:1:1108:16316:3620 2:N:0:1 GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCGCGGGACACG GGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGDGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGDGCEEFGGGGGCGGGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGFGGGCGFFGGGGGGGGGGFFFFGGGGCEG==CECFFCDGGGGGGGGGGGGG597*<FGGFGDFC35>+*:=6FDFF4CFFF9B204>G?FE)5FAF?:7>FBB<A?FB(9?<AFFF<0?B?F4:BFF2>B69>;B))6<<?(,(46((4,42(7>926(82 -GATAACCTTGCTTCGTGATTAATC ba 1 M01897:257:000000000-AYB6W:1:1118:22651:3876 2:N:0:1 GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTTCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCCCGGGACACG FDCCF9FFDFGGGGGGGGGGGGFGGGGGGFGGGGGGGGGGGEGFECFGGGGGGGGGGGGG8EGFGEGFG,F@GGGGGEGGEG7FGGGGG@BFFGCCGGEGGGGGGGFGGGGGGFGGG@FFF9CFGGGGGGGGGGGGGGGGGGGFFGF5E*CECC>EGFGG7EGD==?E8:E7CCE3C+?:C?FFG@D3B5:>78)/C6=FFF<>B>>0:@EBF3))14>B?20>?A<2:>99>F<<AD7??BF0??)8<0<BF?>>>FAA9A:,403>BF?2;B(46(4((((( -GATTGGATAACGTTGTGGCAATTG ab 1 M01897:257:000000000-AYB6W:1:1101:14310:2734 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGGAAGTCACCGGAATCCGGGACGTCCTGGCAGCTAGGGCGGGCCCCGAGCCAGG GGGGCCFGGGGGGGGGFFGGGGGGGFGGGGEGGGGGGGFGGGGGGGGFGGGGGGGGGGGGGGGGGGGG<@FGGCCGGGGGGCFFGGGGGGGGC,@ECFBFDDGGG@FGGGGG9CFG@CCFF@DCDFC>=CEGGDEGCC@CDC*=CC*=5>FGCFEGGDFGG?<EGGGFFFD49FD=6>:CGFD>5)/)47C@4),85:B:DF?(8)448:D:,5?7**430;>01661(-4((74,94:)(,-(-18(--(-(-(-(=01,442))(.(,(2((-(-(-((,(( -GATTGGATAACGTTGTGGCAATTG ab 1 M01897:257:000000000-AYB6W:1:1112:4840:17845 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCCGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCATACTGGGCACAGGGCCAGGCGTGAGGGCTCAAGAAGCGGGACCGCCGTCAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTCGGCGTGTCCCTAGCTAGC GGGC6<,CFDG8FG,CF6C<FGGDD<FGGGFGGGFG<E8,6FCFC,77BF,CFGGGFFFGGFEF:<CECC7:F:@?DFCEGFFE8?EGGF<,+,44=7,B,C@DGC@7+?F8,7=D,A=>,9=FFFG:@=BC7CCEFGFDGGGG788CEF66EFGGG7CF*:**=C5=FGG5AC=+:C*2:EFF*7*2/97DD>FC)7>@G@5(704(255005FFFB??FFB39((,--32()(./6>B<(())9))-38>0,43(-((((33<)-,((--(.4)).43)).( -GATTGGATAACGTTGTGGCAATTG ab 1 M01897:257:000000000-AYB6W:1:1118:8154:20084 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGG@EDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=FGGGGGFFBFGGGGGGGFGGFGGGGFGGGFGGGGGEGGGGGGG:FGGGG5EGGGG:FEFGEGGGGGGGGGGGGGGGD:EG?FGFFGFCEGG>GFFFCGGFFFGEFE:>?(7.()44>B>G*=F<7:F9>D>9>F03;26:6)6>B<9(38<7A?FB2>>?FF(=:?(((.2:A:)-4((.63-,49>:?0 -GATTGGATAACGTTGTGGCAATTG ab 1 M01897:257:000000000-AYB6W:1:2110:10849:23965 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG@CGGGGGGGFGGGGECGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGEGGGCCCGGGGGGGGGGGGGGGGGDGGEGGGDGGGGDFGGGGGFDGDDFFFGGGFFFGFGFE@:?GFFFFFGFFFFFD2?BFFFF09>B9>F(7)2.9A2)6:44<@A7BF?>BF?>6>:((,(,5AF?F91(-:B<>,(3>00( -GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:1113:13084:11145 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGTACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGCFGGGGFFFFBGFFFBEFFFFGFF?@AFFB?FFFFFFFFFFFFFFFB00:?FFFAFFFFFFFFF66>FFF<1 -GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:1115:14952:6061 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GFGGGGGEGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGFGGGGGGGGGGGGG,FGFFFFFGAFGGGGGGGGGGGGFGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGEGFGGGGGEGFGGGDCBEGGGGGGGGGGGGGGGGFEGGFGFGGGGGGFDGGGCDGD9DFFFGD4>FGG4FF@9DD>DD>>FFDBGFFDFFFFFFFFF=?:8?F><F>?F?FBFF?7>F>DB<>?B9>>?9>9?F1 -GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:2101:12835:23979 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGACGTAAGTCCAAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGGGTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8FFGGGGFFGGGGGGGGGGFGGGGGGDFGGGGGFGGGGGGGGGGGGGEGG7FFGGGG@FGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGC6CFFFGGCFFGGGGGG:C:47FFD3*1<677<6<;EGB@><)-3:>55-9))).:12<6)4430;>3>0(*4??F1(.:7?>(,((-.8B1999B?1 -GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:2102:25716:13556 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGCTTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGCGCCAGC ,C@FCGGGGGE8C@FCFFGEECGCFGGDFFEFFGFFGGFGGGFFGFCF@FFAFGGGGGCGEG,EFGGG?=F@EGGGC,<=FF?DEGFFFGFFGFFDGCGG?FDDG>EFGFGA9?EFE@FGGGDFGFCFFGC+@EEE@:F:E7C1:FBCF@7<2CFFF**::8CFEEGE7C8BFF?CFGGC<9CFGCEG+CCC8:CFDCDC=:**202:65*CF5CGFD)6?5))).753>><:5>9@466-.((.9::0)4B>)8><>:0(80:2))501(--3:FF(,4>02( -GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:2107:10919:21008 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACCGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGCEGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG>EGEGGGFGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGCGGGGGGGGFGGFGGGFG@EGGBDGGFFFFGFFFFF)*4<:B@F?G6<>9BFFFFB?BFF?DF?BFF00:BFFFB;BFBB2 -GATTGGATAACGTTGTGGCAATTG ab 2 M01897:257:000000000-AYB6W:1:1101:14310:2734 1:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCGCTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGCGTCTCCCGGGTGCCTGGTGGCGGTGTGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFFEGGGGGFFGGGGGGGGGGGGGGFFEE:=FED=FFFFFFEEEGGGGGG:FC:FFGGGGCFGGGGGGGFGFGGGCCG9FGGGGGGGGGFFEFBFGGEEE<EGGGGGGGGGGGGGF@FCCCEEGGCFFGGFGGGGGG:>EEGDGFDC4>EDDFGGEBEFGE5>CGGFFF*)<FF<<FF:61:<BFFB7??9::?FF:07<7)(.,,2<1(11(,3:>7:773(-766223:(( -GATTGGATAACGTTGTGGCAATTG ab 2 M01897:257:000000000-AYB6W:1:1112:4840:17845 1:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCTTGGCCCGGTCCTGGCCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCCGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGCGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC FCEFGG@@FGGGGEFGGFFGGDCF8CG86ECEGGGFGF,C,C@@FFFGGE:,CFFDG7CFFGECE=<FF9<F,C<7++=7+4@+@=FFFGGFA,CB,EF9@7F::3@F@BFCC7E=FC@FCCCFF<=FGA7:FGFG,37F9FCG7:3?>7:FCGGG:@FC6B,=EE7FFE>EGG9C?*=5CC7887*/=*:?C5E76C*::*//C>D*8D4377CF*;:?)055.547;FF?4*7F)<2@<AF:))766:23)(731F>?>(41((4))8>7:0(--,-338(( -GATTGGATAACGTTGTGGCAATTG ab 2 M01897:257:000000000-AYB6W:1:1118:8154:20084 1:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC GGGGGGGGGGGGGGGGGGGDDGGFFGGDFGGGGGGGGGGGGFEECFGGCFGEGGGGGGGGGGGGGGGFGGFCCFFFFGGGDGGGDGGGFGGGGGCFFFGGFGGGGGGGGFGC7EGG<=BFFGEG<DCFGGFGGGGGGFGGGGEGGGFGGGGGCGCGGGFCCFGGGGGGGGEGGGGGGGGGGGGGGFEEGEGGG5>FGGGGGDGGGDGFFFGFGGGFGGFFF6@FFFFFFFF<FFFFFF???FFA>?B2>B<0<?78AFF1706>9B?AF:?:0(3139:FF<?0 -GATTGGATAACGTTGTGGCAATTG ab 2 M01897:257:000000000-AYB6W:1:2110:10849:23965 1:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGEFGFGGGGGGGGGGGGGGGEGGGGGGGFGGFDGGGGGGGGGGDGGGGGGGGGGGGFGFF=EF9E7E?FFGFDFC?>GGFDBGFGFC?FFGABFFFB>G?AFFBFB@>>?BB>BEF?AB:??0:B;71??F?(. -GATTGGATAACGTTGTGGCAATTG ba 1 M01897:257:000000000-AYB6W:1:1113:13084:11145 2:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTACCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFCDFBGGGFFFFFFFGF:?D>FFBF>?F@FFFFFBFFFF??FFFF?62>:?FF>?FDFFFF?0:?F?ABFF><AFB>09BF?AFFF?:?>>9>?FF::0 -GATTGGATAACGTTGTGGCAATTG ba 1 M01897:257:000000000-AYB6W:1:1115:14952:6061 2:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC GFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEDGF@FGGGGGGGGFGGGGGGCGGGFGGGGFGFFFCD@7DGF58FFFFFGF3:>D:6>>GBBFF474<?FFFF?B?B0(:1:F?068>:79?28508?>>4<04>AA<09>0>F?:6<B?F0(4969<F:?:( -GATTGGATAACGTTGTGGCAATTG ba 1 M01897:257:000000000-AYB6W:1:2101:12835:23979 2:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCCCTTCCCATCTGGGTCCCCAACGGCCTCTCCTGCGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGTC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD7FEFBFGGGGGGGCFGGGFGGGGGCGEGGG=CFGGECBFCFACC7DDFGGC9FFGCCFFFFF<@F?FFEGGGGGG8:57ACC=@60;CC:7,CEEGCGG4CFCA<?C<<FGGAFCDC5:>6C?3C.76)0319:*4)57?F*5<?2=FFF:?328()7395?(,22)((79>?:7:93:B)1)21>99(489<1(((,751681(8-( -GATTGGATAACGTTGTGGCAATTG ba 1 M01897:257:000000000-AYB6W:1:2102:25716:13556 2:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTTGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCCTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGCGCCCACTTCCCATCTGCGTCCCCACAGGCCTCTCCTGTTGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGTCC ;,EACFFEGD6FFGGG8F<@<FEGGGGGGFCE7F@,EDG9<@:FCFCEF,CFGF>BF6+,9BFA+4EDFFFEFGCC,BFCB:FFGD==C?CFG,,CEE9E7CGGE@FCCGD+8:CC<9DFGG,@:B9:F9BC@5DD5FFG;@DFBCECEE7EC7,,?CFDCDGGGFFFFC://=?=4CF4C+C558DDF5EDGBB5/*.<DEE:))4()-*-)*948*6:74).4;9?F:1((>0?@B2?04)4)1699<<:>247667<340;>B?A71((-49@(-((4()) -CCTCCCGGCAGTGCGAAAATGTCA ab 1 M01897:257:000000000-AYB6W:1:1108:17396:7377 2:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGGCCAGGCCGGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGTGGCTGCCCAGGCGGCCTGTTTTTTTGCAGGCTCCCTACGCTACGGGGTGGGCTTTTTCCGTTTCATCTTGGTGTTGCCGGCTGGGACGCCTTGCGCC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG7FGGGGGGGGGGGC<FGGGDGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGDEC8C>C5*:/C:*:<+2/>C:*:*+*<>?+*+0<5:/>E5<35***<6293*935=DC)))1707C5)(1*))())()*06)(((0,(*(,(,(-4(9),4D6(4((5)4*(,).2))-).5)5:228))-1(-(((((-((,()5(-( -CCTCCCGGCAGTGCGAAAATGTCA ba 2 M01897:257:000000000-AYB6W:1:1106:22053:22582 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGACGCGGGCAGTGTGTATGCAGTCATCCTCAGCTACGGGCTGGGCTTCTTCCTGTTTATCCTGGTGGTGGCGGCTGTGTCGCTCTGCCGTC CFGGAFCFCFGGGFDGDDDGGDGGGG;F:BFGEGFGGGGFF<FFDECG@CFDGGF@FECFAEGFGGGGGAFFEGGGEGF<?E@FFGFEFGEGG+BEF=<FGGCFCFGGGGGGGG8FDFGGDF@FFGGGEEG*88:C88AFEC>8A:@;EFG8>:EEGE0<CCF+<E:CE/C8C*8C*;;C:0*;=EFEDG*/0*7*:7*18*27:CFGD?>>7+CGG>?F:?4*7?FG6).-))7)/<BF0)6.)/--/)67.:F209304(((493(,:5-)(2;:<2).4(( -CCTCCCGGCAGTGCGAAAATGTCA ba 2 M01897:257:000000000-AYB6W:1:1109:21342:10199 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGC?>DFGGFGGGGGGFFFFFFFFFF@FFFFCDFGF?FFAFFFDAAFFBFB9?FFD08<<6?BFFF;F?2<??6??<7>B>9 -CCTCCCGGCAGTGCGAAAATGTCA ba 2 M01897:257:000000000-AYB6W:1:1111:28216:18792 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=8FGGEGDGGGGGGGGCFFGGGGGGGGGFFFFFFFFFGFFFFB5<BEFB>8AABAFF<9<5FBF?):F:B?:2@FFFF1.54<?:.323<?FF9 -CCTCCCGGCAGTGCGAAAATGTCA ab 2 M01897:257:000000000-AYB6W:1:1108:17396:7377 1:N:0:1 CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGTGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGAAGCCCACCCCGT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGBCGGGGGGGGGGGGGGCFGGGGGGGGEGGEGGGGGGGEGGGGGGGGGGGGGGGDGDDDEFDGGFFGFFFFFGFFFF>EFBFFFGFFFFF:BFFF?F?FFFFFF?F<BBF??BBFFFFBBFF -CCTCCCGGCAGTGCGAAAATGTCA ba 1 M01897:257:000000000-AYB6W:1:1106:22053:22582 2:N:0:1 CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCACCAGGAGGCCTGGCGGGCCGGCAGCTCAGAACCTGATATCTACTTTCTGTTAGCTGTCGCTCGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGTGGGAGCCTAGCCCTTTCTTGGGGTGGCTGCGCAGGCGGCAGAGCGTCACAGCTGCTACAACCAGGATGAACAGGAAGAGCCCCACCCCGTC FCF<9C@F8E9@FGC,,,,<,CF<,C@B@CC@<F,,@F::FD+FC@@F,CFFEEDFGD:C=<<B?FF:E8,B,B,AC<FA8C44++B=>F7F?+A7FF+==<F+:+@7+AFB,8C:F**>CC@F?CCFFCFC@C,26,3224@C@C,,?CG+<+2CFC*:*:);C7E*21*9CE**>DDFC7+:0=/))5C)1)(*)00>*9:(.4(,577:*=47)721),,),(-(4(47()((43460(.)(0..).))).4(()(,(,)6)((((,4((((4(-(((((( -CCTCCCGGCAGTGCGAAAATGTCA ba 1 M01897:257:000000000-AYB6W:1:1109:21342:10199 2:N:0:1 CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGCAGCCCAGCCCGT- GGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGFGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGFEGDFGFGGGGFGFGGFGGGEG?FGCDGGEGGGGGGGGG6>FEGFDFGGFFGGGEE3DFF@=@FFGF2?>FB9FFFFFBFFFBFFFFFF9>>F>F68?>>?:BABFFFFF6B??:?BF5<>BB<49?:?:?(4?:0:0(.3399 -CCTAGTCTTTGATTGGCCACTTTT ab 1 M01897:257:000000000-AYB6W:1:1106:12553:14962 2:N:0:1 TAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCACGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGAAGCCCACCCCGT-- GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEEEGGGGGGGGFGGGGGGFGGFFGGD@9DFFFGGFFFFF7AFFGFFFFFFFFFFFFFFBFF6>FFFFBBB>FBBADBFFF((428?F<?F>:?:DFA:1:?FF7??F10(3<F96 -CCTAGTCTTTGATTGGCCACTTTT ab 1 M01897:257:000000000-AYB6W:1:1106:15615:18803 2:N:0:1 TAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGCGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCACGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGAAGCCCACCCCGTAC GGCGGGGGGGGFFFGGCFGFGFGGGD@@F7FGGDCC:EFCFFGG:@DGGGGFGGGGGGGEGGG9EFGGGGGGGGGGG,CEEGDGEGGGGGGFFGGGGCGGGFG<FFDGGFEG@EG7FDEGCEEDECGGCFGGGGGFCFF2CCEFCFGCF;FGFGFF5BCFFCFGGGGGG5CGC=EE<CGGGGG6CBFFEFF53@6CED>755>:DF>AFFGA=>FF>>09>09@6>>BFBF(3.116>F:0)-:1)37(:<?BFFF<)57?AA2:637?0,(4.10(:39(31( -CCTAGTCTTTGATTGGCCACTTTT ab 1 M01897:257:000000000-AYB6W:1:1110:11692:17499 2:N:0:1 TAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCACGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGACGCCCACCCCGT-- GGGGG@EGGGGGGGGGFGGGGFGGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDEGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGFFGGDFFFGFFFGFFF<FFBFFGFFFFFDFFFFFFFFF6((49>>)4:D>F6:AFBBFFFFFF0(8,1<6?7(6<B:22::?(4999<F?BBF? -CCTAGTCTTTGATTGGCCACTTTT ab 1 M01897:257:000000000-AYB6W:1:1110:21292:16434 2:N:0:1 TAGGCTCTACATGGTGAGCAGAGACGCGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGTGGGAAGCGGGAGATCTTGTGCACGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGTTGAACAGGAAGAAGCCCACCCCTT-- GFGGGGD8C@FGGFCFCFFGGGGGEE+@CCFFEDCDFCF@CFCFGGGGGGGFEDFFGGGDGF@FFGEGF<FGGCF7<+4,=BFCFGGGDGG:?<=EE:FGFD8F+:C+>FGACCCCFGGE7EGGGGGGGFGCCFF;@A<;E;<BFCCGG;>CFGFFFFG5:*=+588C8>57EEGGDEFGGGCFC?*9@FGGGB>)97)<=@F?/))3:BGFFFE:)5925?>326909>>6>54,7((-8)-8-71(--24641:B)47445270,(3124(.,(,:<>(. -CCTAGTCTTTGATTGGCCACTTTT ab 2 M01897:257:000000000-AYB6W:1:1106:12553:14962 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGG--CCAGGCAGGGCCCCAAGCCCCTTGTCT-TGCAGCCGGGGGGGGGGCGGTGGGAGCCTAACAAGCGGGGCGGGGGGTTGGAGGCCTCCCCAAGTTCGGGGGTGGCTTCTTCCTGTTCATCCTTGGTGTGGGGGCTGTGACGCCTTTGCGGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG* ?8*;*;**:*;***2***2A***00+< C++0++;***:**:*****://:**;**0++*++2*/:E/*1**)))/)1)+*1**))9))**)/**)03>))8D)(8().5<*)-7))1)67)6/.8118((4(-,.()-(()(-)).(,- -CCTAGTCTTTGATTGGCCACTTTT ab 2 M01897:257:000000000-AYB6W:1:1106:15615:18803 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGA--CCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCC-GC GGGGGFGGGGFGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGDEFGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGCFGGGGGGGCFGEGFGEEGGGGGGGDGGDGGCFGGGGGGGDGGGGGGFGGGGGGGGGGG GDGGGGGGGFGGGGFGGGGGGGGGFGG9FFGGGGGGGGGCEGECGGCFCEEGGGGGGGGGGGCGGCEC3*:C>DG=FC<?CGGFFGFFFGFFGFFFFFG:BDFFF<:AFFFFFFFF4<4?BAFB:<BB??09>B0?B><:D243847 10 -CCTAGTCTTTGATTGGCCACTTTT ab 2 M01897:257:000000000-AYB6W:1:1110:11692:17499 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGGGGCCAGGCCAGGCCCCAACGCCCATGTCTTTGCAGCCGAGGGGGAGCTGGTTGGGGCTGACGAGGCGGGCAGTGGGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGGGGTGGCGGCTTGTACCCTCTTCC--- GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8>5*;:8C8E@;?*:;88*2CE*8*++<C99+9+@C**88*:C*:?6<C*+1*:858C*;7/9E*1CGCCC0*)1)*+<C7C.5766<69=<))9*05>3/4;<31<2)9:4=).0))/69?<((213(7:960(,1.-))))(()-) -GATAAGCCAACTGCCATCTAGAAT ab 1 M01897:257:000000000-AYB6W:1:1105:25798:19415 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTACGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTTTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGFGGGFGGGGGGGGGGGGGGEFCFFGGDEGG8EGGGGFGGGGEGFEGFFG<EDGGGGGGGGGCC7FGGGGGGGFGG=>FFCDBF)7:>7FF:EF?<?FEE:@F@?6??F6>B01>BF;FFF*4(,2:24?FBBF>?F?FFBF0;B2:0(:??FF7:BF?03:2<BBFBFB?0 -GATAAGCCAACTGCCATCTAGAAT ba 2 M01897:257:000000000-AYB6W:1:2104:15100:19675 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTCGCGTGTCCCGAGCCAGC GGFGFGGDGGGGGGCGGGD@EFGGFFGEGGGGGGGFECDEDFFGGGFGGGGFGDEGDGGEGGGGGG:FGGG@CFECFGGGGGFFGGGGGGF<CGGEGGGEFGGGEE7FGFF,=B=DBBFFDDFFFGGGGGGGEGGG:><FEGCF:FGEFGGFFFGGGDGGFEGDFGGGFGGGFFGGGCGGGEGFGGGGFFCFFEDG57CGGCFFC6C*CEGG6:CGGG:6<C>>CEFDGB7B5/<<:9<>>><F279?FG<>>>:>:D(47:6<26)402346>2<>(-49??0 -GATAAGCCAACTGCCATCTAGAAT ab 2 M01897:257:000000000-AYB6W:1:1105:25798:19415 1:N:0:1 GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFFFFGGFFGDB9FFGFFFF0F?:?FFFFFFFFFFFFBF@FFBA?B9;9B9>BB>FFF>FF>><?4
--- a/test-data/DCS_Mutations_test_data_VA.tabular Tue Oct 27 12:46:55 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MAF BIAS -__NONE__ ACH_TDII_5regions 505 1 2208 0 0 2209 1 C A 0.00045 1.09465 -__NONE__ ACH_TDII_5regions 571 0 2817 0 1 2818 1 C T 0.00035 1.04139 -__NONE__ ACH_TDII_5regions 958 0 1 0 14667 14668 1 T C 7e-05 1.03624 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/FreeBayes_test.vcf Mon Jan 18 09:49:15 2021 +0000 @@ -0,0 +1,65 @@ +##fileformat=VCFv4.2 +##fileDate=20201027 +##source=freeBayes v1.3.1-dirty +##reference=localref.fa +##contig=<ID=ACH_TDII_5regions,length=3171> +##phasing=none +##commandline="freebayes --region ACH_TDII_5regions:0..3171 --bam b_0.bam --fasta-reference localref.fa --vcf ./vcf_output/part_ACH_TDII_5regions:0..3171.vcf --min-coverage 1 --skip-coverage 0 --limit-coverage 0 -O -m 1 -q 0 -R 0 -Y 0 -e 1000 -F 0.0 -C 1 -G 1 --min-alternate-qsum 0" +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> +##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> +##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> +##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> +##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> +##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> +##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> +##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> +##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> +##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> +##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> +##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> +##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> +##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> +##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> +##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> +##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> +##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> +##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> +##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> +##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> +##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> +##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> +##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +##INFO=<ID=Split primitives,Number=0,Type=Flag,Description="The allele was parsed using vcfallelicprimitives."> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT unknown +ACH_TDII_5regions 505 . C A 21.9913 . AB=0.5;ABP=3.0103;AC=1;AF=0.5;AN=2;AO=1;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=5.18177;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=5.05096;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=93;QR=93;RO=1;RPL=0;RPP=5.18177;RPPR=5.18177;RPR=1;RUN=1;SAF=0;SAP=5.18177;SAR=1;SRF=0;SRP=5.18177;SRR=1;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:2:1,1:1:93:1:93:-5.39772,0,-5.39772 +ACH_TDII_5regions 571 . C T 21.9913 . AB=0.5;ABP=3.0103;AC=1;AF=0.5;AN=2;AO=1;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=5.18177;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=5.05096;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=93;QR=93;RO=1;RPL=1;RPP=5.18177;RPPR=5.18177;RPR=0;RUN=1;SAF=0;SAP=5.18177;SAR=1;SRF=0;SRP=5.18177;SRR=1;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:2:1,1:1:93:1:93:-5.39772,0,-5.39772 +ACH_TDII_5regions 958 . T C 50.9778 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1;CIGAR=1X;DP=1;DPB=1;DPRA=0;EPP=5.18177;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=5.99146;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=93;QR=0;RO=0;RPL=0;RPP=5.18177;RPPR=0;RPR=1;RUN=1;SAF=0;SAP=5.18177;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 1/1:1:0,1:0:0:1:93:-5.99978,-0.30103,0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Interesting_Reads_test.fastq Mon Jan 18 09:49:15 2021 +0000 @@ -0,0 +1,124 @@ +@GATAACCTTGCTTCGTGATTAATC.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAATGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGFGFFGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGAFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFGG8<FED@FGEGGGGGGGGGGGGGGGGGGGGGGGGFEFGGGGGGGEEGGGGGGGGGCECE8EGGGEFFGGGGGGFGGGDEDD5EGGGGGFGGGDCFCFGFGFGCFAGGFDFFFEEGGFF3><:>>FD>FFC=4=:0<;DD>6461992<)892<AFBFFFFFF244:-1:1>:=0306(4)-.42((44(667(449?0, +@GATAACCTTGCTTCGTGATTAATC.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGGGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGFGGGGGGFCFGGGGFGGGGEGGFGGGGGGGFFGGGGGGGGGGGGGGGGBB?CFGGGGGGCCGGFGGGGGGGCFGECC79CEECGDDD99CFGGGGF4>>GG>BDE@BBFG5=B?AF98::A42<EF?;B::((7:?7???<)/:?91;1,6?F?29902( +@GATAACCTTGCTTCGTGATTAATC.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGDGGGGEEGGGGGGGGGGGGGGGGGGGCEFDFGGGFGFGGEGEBFGG<FGFCFEFDFDF<DGGGGGGFGFGFGGGGGFGGGCE:FFBFGGGGDGGGGGGGGDD@FGGFC6AE7E1CGGFGCCFGGGGCEGFGGGCCFG9A*59@FGD><?9=CFF6>3BBDFFF392?G)-96<2<:<:44<232:B3:F>6??F0(34A248:>?1,(.404-,((4( +@GATAACCTTGCTTCGTGATTAATC.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACCCCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCCAGAAGCGGGACGGCCGTAAGTCCCAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +FGEF9FGGCCG@FGGGGGGCFCC@EGGG<FFGGFGGCGGGGGEG?FGGGGDGGGGGGGGFEFGGGGGGGGG7==F:F,=FEGGFFFFG<<F<=8FG>CAF9E8CFCEFFFFF,?F=F8FFD=,DFFE+@CGGGFF7D<FGGEFEGGFG2DCCFGECC*=CGEGGGGGGGGCCFFFGF7FFGFGGGGGGGFFG=E56C55CEGF3:F*9*./>FG***27)?::D)5557@>BFD@)/))).(().9<2((-29BF>F4(83,:12-)4)2,3??<<1:(7>((, +@GATAACCTTGCTTCGTGATTAATC.ab.2 +GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG ++ +FGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGFFGGGGGGGGGGGGGGGGCFGGFFGGGCGGGGGGGGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGGGGGEGFGGFGFFGFGFGFFBEFGFFFF7F?FFB?DF>FDFFB:)9>FBFFF?F099E>;<?1:<?0>F0;BB1 +@GATAACCTTGCTTCGTGATTAATC.ba.1 +GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCGCGGGACACG ++ +GGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGDGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGDGCEEFGGGGGCGGGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGFGGGCGFFGGGGGGGGGGFFFFGGGGCEG==CECFFCDGGGGGGGGGGGGG597*<FGGFGDFC35>+*:=6FDFF4CFFF9B204>G?FE)5FAF?:7>FBB<A?FB(9?<AFFF<0?B?F4:BFF2>B69>;B))6<<?(,(46((4,42(7>926(82 +@GATAACCTTGCTTCGTGATTAATC.ba.1 +GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTTCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCCCGGGACACG ++ +FDCCF9FFDFGGGGGGGGGGGGFGGGGGGFGGGGGGGGGGGEGFECFGGGGGGGGGGGGG8EGFGEGFG,F@GGGGGEGGEG7FGGGGG@BFFGCCGGEGGGGGGGFGGGGGGFGGG@FFF9CFGGGGGGGGGGGGGGGGGGGFFGF5E*CECC>EGFGG7EGD==?E8:E7CCE3C+?:C?FFG@D3B5:>78)/C6=FFF<>B>>0:@EBF3))14>B?20>?A<2:>99>F<<AD7??BF0??)8<0<BF?>>>FAA9A:,403>BF?2;B(46(4((((( +@GATTGGATAACGTTGTGGCAATTG.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGGAAGTCACCGGAATCCGGGACGTCCTGGCAGCTAGGGCGGGCCCCGAGCCAGG ++ +GGGGCCFGGGGGGGGGFFGGGGGGGFGGGGEGGGGGGGFGGGGGGGGFGGGGGGGGGGGGGGGGGGGG<@FGGCCGGGGGGCFFGGGGGGGGC,@ECFBFDDGGG@FGGGGG9CFG@CCFF@DCDFC>=CEGGDEGCC@CDC*=CC*=5>FGCFEGGDFGG?<EGGGFFFD49FD=6>:CGFD>5)/)47C@4),85:B:DF?(8)448:D:,5?7**430;>01661(-4((74,94:)(,-(-18(--(-(-(-(=01,442))(.(,(2((-(-(-((,(( +@GATTGGATAACGTTGTGGCAATTG.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCCGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCATACTGGGCACAGGGCCAGGCGTGAGGGCTCAAGAAGCGGGACCGCCGTCAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTCGGCGTGTCCCTAGCTAGC ++ +GGGC6<,CFDG8FG,CF6C<FGGDD<FGGGFGGGFG<E8,6FCFC,77BF,CFGGGFFFGGFEF:<CECC7:F:@?DFCEGFFE8?EGGF<,+,44=7,B,C@DGC@7+?F8,7=D,A=>,9=FFFG:@=BC7CCEFGFDGGGG788CEF66EFGGG7CF*:**=C5=FGG5AC=+:C*2:EFF*7*2/97DD>FC)7>@G@5(704(255005FFFB??FFB39((,--32()(./6>B<(())9))-38>0,43(-((((33<)-,((--(.4)).43)).( +@GATTGGATAACGTTGTGGCAATTG.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGG@EDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=FGGGGGFFBFGGGGGGGFGGFGGGGFGGGFGGGGGEGGGGGGG:FGGGG5EGGGG:FEFGEGGGGGGGGGGGGGGGD:EG?FGFFGFCEGG>GFFFCGGFFFGEFE:>?(7.()44>B>G*=F<7:F9>D>9>F03;26:6)6>B<9(38<7A?FB2>>?FF(=:?(((.2:A:)-4((.63-,49>:?0 +@GATTGGATAACGTTGTGGCAATTG.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG@CGGGGGGGFGGGGECGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGEGGGCCCGGGGGGGGGGGGGGGGGDGGEGGGDGGGGDFGGGGGFDGDDFFFGGGFFFGFGFE@:?GFFFFFGFFFFFD2?BFFFF09>B9>F(7)2.9A2)6:44<@A7BF?>BF?>6>:((,(,5AF?F91(-:B<>,(3>00( +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGTACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGCFGGGGFFFFBGFFFBEFFFFGFF?@AFFB?FFFFFFFFFFFFFFFB00:?FFFAFFFFFFFFF66>FFF<1 +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GFGGGGGEGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGFGGGGGGGGGGGGG,FGFFFFFGAFGGGGGGGGGGGGFGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGEGFGGGGGEGFGGGDCBEGGGGGGGGGGGGGGGGFEGGFGFGGGGGGFDGGGCDGD9DFFFGD4>FGG4FF@9DD>DD>>FFDBGFFDFFFFFFFFF=?:8?F><F>?F?FBFF?7>F>DB<>?B9>>?9>9?F1 +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGACGTAAGTCCAAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGGGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8FFGGGGFFGGGGGGGGGGFGGGGGGDFGGGGGFGGGGGGGGGGGGGEGG7FFGGGG@FGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGC6CFFFGGCFFGGGGGG:C:47FFD3*1<677<6<;EGB@><)-3:>55-9))).:12<6)4430;>3>0(*4??F1(.:7?>(,((-.8B1999B?1 +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGCTTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGCGCCAGC ++ +,C@FCGGGGGE8C@FCFFGEECGCFGGDFFEFFGFFGGFGGGFFGFCF@FFAFGGGGGCGEG,EFGGG?=F@EGGGC,<=FF?DEGFFFGFFGFFDGCGG?FDDG>EFGFGA9?EFE@FGGGDFGFCFFGC+@EEE@:F:E7C1:FBCF@7<2CFFF**::8CFEEGE7C8BFF?CFGGC<9CFGCEG+CCC8:CFDCDC=:**202:65*CF5CGFD)6?5))).753>><:5>9@466-.((.9::0)4B>)8><>:0(80:2))501(--3:FF(,4>02( +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACCGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGCEGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG>EGEGGGFGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGCGGGGGGGGFGGFGGGFG@EGGBDGGFFFFGFFFFF)*4<:B@F?G6<>9BFFFFB?BFF?DF?BFF00:BFFFB;BFBB2 +@GATTGGATAACGTTGTGGCAATTG.ab.2 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCGCTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGCGTCTCCCGGGTGCCTGGTGGCGGTGTGGGGG ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFFEGGGGGFFGGGGGGGGGGGGGGFFEE:=FED=FFFFFFEEEGGGGGG:FC:FFGGGGCFGGGGGGGFGFGGGCCG9FGGGGGGGGGFFEFBFGGEEE<EGGGGGGGGGGGGGF@FCCCEEGGCFFGGFGGGGGG:>EEGDGFDC4>EDDFGGEBEFGE5>CGGFFF*)<FF<<FF:61:<BFFB7??9::?FF:07<7)(.,,2<1(11(,3:>7:773(-766223:(( +@GATTGGATAACGTTGTGGCAATTG.ab.2 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCTTGGCCCGGTCCTGGCCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCCGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGCGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC ++ +FCEFGG@@FGGGGEFGGFFGGDCF8CG86ECEGGGFGF,C,C@@FFFGGE:,CFFDG7CFFGECE=<FF9<F,C<7++=7+4@+@=FFFGGFA,CB,EF9@7F::3@F@BFCC7E=FC@FCCCFF<=FGA7:FGFG,37F9FCG7:3?>7:FCGGG:@FC6B,=EE7FFE>EGG9C?*=5CC7887*/=*:?C5E76C*::*//C>D*8D4377CF*;:?)055.547;FF?4*7F)<2@<AF:))766:23)(731F>?>(41((4))8>7:0(--,-338(( +@GATTGGATAACGTTGTGGCAATTG.ab.2 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC ++ +GGGGGGGGGGGGGGGGGGGDDGGFFGGDFGGGGGGGGGGGGFEECFGGCFGEGGGGGGGGGGGGGGGFGGFCCFFFFGGGDGGGDGGGFGGGGGCFFFGGFGGGGGGGGFGC7EGG<=BFFGEG<DCFGGFGGGGGGFGGGGEGGGFGGGGGCGCGGGFCCFGGGGGGGGEGGGGGGGGGGGGGGFEEGEGGG5>FGGGGGDGGGDGFFFGFGGGFGGFFF6@FFFFFFFF<FFFFFF???FFA>?B2>B<0<?78AFF1706>9B?AF:?:0(3139:FF<?0 +@GATTGGATAACGTTGTGGCAATTG.ab.2 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGEFGFGGGGGGGGGGGGGGGEGGGGGGGFGGFDGGGGGGGGGGDGGGGGGGGGGGGFGFF=EF9E7E?FFGFDFC?>GGFDBGFGFC?FFGABFFFB>G?AFFBFB@>>?BB>BEF?AB:??0:B;71??F?(. +@GATTGGATAACGTTGTGGCAATTG.ba.1 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTACCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFCDFBGGGFFFFFFFGF:?D>FFBF>?F@FFFFFBFFFF??FFFF?62>:?FF>?FDFFFF?0:?F?ABFF><AFB>09BF?AFFF?:?>>9>?FF::0 +@GATTGGATAACGTTGTGGCAATTG.ba.1 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC ++ +GFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEDGF@FGGGGGGGGFGGGGGGCGGGFGGGGFGFFFCD@7DGF58FFFFFGF3:>D:6>>GBBFF474<?FFFF?B?B0(:1:F?068>:79?28508?>>4<04>AA<09>0>F?:6<B?F0(4969<F:?:( +@GATTGGATAACGTTGTGGCAATTG.ba.1 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCCCTTCCCATCTGGGTCCCCAACGGCCTCTCCTGCGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGTC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD7FEFBFGGGGGGGCFGGGFGGGGGCGEGGG=CFGGECBFCFACC7DDFGGC9FFGCCFFFFF<@F?FFEGGGGGG8:57ACC=@60;CC:7,CEEGCGG4CFCA<?C<<FGGAFCDC5:>6C?3C.76)0319:*4)57?F*5<?2=FFF:?328()7395?(,22)((79>?:7:93:B)1)21>99(489<1(((,751681(8-( +@GATTGGATAACGTTGTGGCAATTG.ba.1 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTTGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCCTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGCGCCCACTTCCCATCTGCGTCCCCACAGGCCTCTCCTGTTGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGTCC ++ +;,EACFFEGD6FFGGG8F<@<FEGGGGGGFCE7F@,EDG9<@:FCFCEF,CFGF>BF6+,9BFA+4EDFFFEFGCC,BFCB:FFGD==C?CFG,,CEE9E7CGGE@FCCGD+8:CC<9DFGG,@:B9:F9BC@5DD5FFG;@DFBCECEE7EC7,,?CFDCDGGGFFFFC://=?=4CF4C+C558DDF5EDGBB5/*.<DEE:))4()-*-)*948*6:74).4;9?F:1((>0?@B2?04)4)1699<<:>247667<340;>B?A71((-49@(-((4()) +@CCTCCCGGCAGTGCGAAAATGTCA.ab.1 +CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGGCCAGGCCGGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGTGGCTGCCCAGGCGGCCTGTTTTTTTGCAGGCTCCCTACGCTACGGGGTGGGCTTTTTCCGTTTCATCTTGGTGTTGCCGGCTGGGACGCCTTGCGCC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG7FGGGGGGGGGGGC<FGGGDGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGDEC8C>C5*:/C:*:<+2/>C:*:*+*<>?+*+0<5:/>E5<35***<6293*935=DC)))1707C5)(1*))())()*06)(((0,(*(,(,(-4(9),4D6(4((5)4*(,).2))-).5)5:228))-1(-(((((-((,()5(-( +@CCTCCCGGCAGTGCGAAAATGTCA.ba.2 +CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGACGCGGGCAGTGTGTATGCAGTCATCCTCAGCTACGGGCTGGGCTTCTTCCTGTTTATCCTGGTGGTGGCGGCTGTGTCGCTCTGCCGTC ++ +CFGGAFCFCFGGGFDGDDDGGDGGGG;F:BFGEGFGGGGFF<FFDECG@CFDGGF@FECFAEGFGGGGGAFFEGGGEGF<?E@FFGFEFGEGG+BEF=<FGGCFCFGGGGGGGG8FDFGGDF@FFGGGEEG*88:C88AFEC>8A:@;EFG8>:EEGE0<CCF+<E:CE/C8C*8C*;;C:0*;=EFEDG*/0*7*:7*18*27:CFGD?>>7+CGG>?F:?4*7?FG6).-))7)/<BF0)6.)/--/)67.:F209304(((493(,:5-)(2;:<2).4(( +@CCTCCCGGCAGTGCGAAAATGTCA.ba.2 +CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGC?>DFGGFGGGGGGFFFFFFFFFF@FFFFCDFGF?FFAFFFDAAFFBFB9?FFD08<<6?BFFF;F?2<??6??<7>B>9 +@CCTCCCGGCAGTGCGAAAATGTCA.ba.2 +CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=8FGGEGDGGGGGGGGCFFGGGGGGGGGFFFFFFFFFGFFFFB5<BEFB>8AABAFF<9<5FBF?):F:B?:2@FFFF1.54<?:.323<?FF9 +@CCTCCCGGCAGTGCGAAAATGTCA.ab.2 +CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGTGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGAAGCCCACCCCGT ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGBCGGGGGGGGGGGGGGCFGGGGGGGGEGGEGGGGGGGEGGGGGGGGGGGGGGGDGDDDEFDGGFFGFFFFFGFFFF>EFBFFFGFFFFF:BFFF?F?FFFFFF?F<BBF??BBFFFFBBFF +@CCTCCCGGCAGTGCGAAAATGTCA.ba.1 +CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCACCAGGAGGCCTGGCGGGCCGGCAGCTCAGAACCTGATATCTACTTTCTGTTAGCTGTCGCTCGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGTGGGAGCCTAGCCCTTTCTTGGGGTGGCTGCGCAGGCGGCAGAGCGTCACAGCTGCTACAACCAGGATGAACAGGAAGAGCCCCACCCCGTC ++ +FCF<9C@F8E9@FGC,,,,<,CF<,C@B@CC@<F,,@F::FD+FC@@F,CFFEEDFGD:C=<<B?FF:E8,B,B,AC<FA8C44++B=>F7F?+A7FF+==<F+:+@7+AFB,8C:F**>CC@F?CCFFCFC@C,26,3224@C@C,,?CG+<+2CFC*:*:);C7E*21*9CE**>DDFC7+:0=/))5C)1)(*)00>*9:(.4(,577:*=47)721),,),(-(4(47()((43460(.)(0..).))).4(()(,(,)6)((((,4((((4(-(((((( +@CCTCCCGGCAGTGCGAAAATGTCA.ba.1 +CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGCAGCCCAGCCCGT ++ +GGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGFGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGFEGDFGFGGGGFGFGGFGGGEG?FGCDGGEGGGGGGGGG6>FEGFDFGGFFGGGEE3DFF@=@FFGF2?>FB9FFFFFBFFFBFFFFFF9>>F>F68?>>?:BABFFFFF6B??:?BF5<>BB<49?:?:?(4?:0:0(.3399
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Interesting_Reads_test.trim.fastq Mon Jan 18 09:49:15 2021 +0000 @@ -0,0 +1,120 @@ +@GATAACCTTGCTTCGTGATTAATC.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAATGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCT ++ +GGGGGGGGGGGGGGFGFFGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGAFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFGG8<FED@FGEGGGGGGGGGGGGGGGGGGGGGGGGFEFGGGGGGGEEGGGGGGGGGCECE8EGGGEFFGGGGGGFGGGDEDD5EGGGGGFGGGDCFCFGFGFGCFAGGFDFFFEEGGFF3><:>>FD>FFC=4=:0<;DD>6461992<)892<AFBFFFFFF244:-1:1>:=0306(4 +@GATAACCTTGCTTCGTGATTAATC.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGGGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAG ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGFGGGGGGFCFGGGGFGGGGEGGFGGGGGGGFFGGGGGGGGGGGGGGGGBB?CFGGGGGGCCGGFGGGGGGGCFGECC79CEECGDDD99CFGGGGF4>>GG>BDE@BBFG5=B?AF98::A42<EF?;B::((7:?7???<)/:?91;1,6?F?29902 +@GATAACCTTGCTTCGTGATTAATC.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGDGGGGEEGGGGGGGGGGGGGGGGGGGCEFDFGGGFGFGGEGEBFGG<FGFCFEFDFDF<DGGGGGGFGFGFGGGGGFGGGCE:FFBFGGGGDGGGGGGGGDD@FGGFC6AE7E1CGGFGCCFGGGGCEGFGGGCCFG9A*59@FGD><?9=CFF6>3BBDFFF392?G)-96<2<:<:44<232:B3:F>6??F0(3 +@GATAACCTTGCTTCGTGATTAATC.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACCCCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGA ++ +FGEF9FGGCCG@FGGGGGGCFCC@EGGG<FFGGFGGCGGGGGEG?FGGGGDGGGGGGGGFEFGGGGGGGGG7==F:F,=FEGGFFFFG<<F<=8FG>CAF9E8CFCEFFFFF,?F=F8FFD=,DFFE+@CGGGFF7D<FGGEFEGGFG2DCCFGECC*=CGEGGGGGGGGCCFFFGF7FFGFGGGGGGGFFG=E56C55CEGF3:F*9 +@GATAACCTTGCTTCGTGATTAATC.ab.2 +GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG ++ +FGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGFFGGGGGGGGGGGGGGGGCFGGFFGGGCGGGGGGGGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGGGGGEGFGGFGFFGFGFGFFBEFGFFFF7F?FFB?DF>FDFFB:)9>FBFFF?F099E>;<?1:<?0>F0;BB1 +@GATAACCTTGCTTCGTGATTAATC.ba.1 +GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGG ++ +GGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGDGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGDGCEEFGGGGGCGGGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGFGGGCGFFGGGGGGGGGGFFFFGGGGCEG==CECFFCDGGGGGGGGGGGGG597*<FGGFGDFC35>+*:=6FDFF4CFFF9B204>G?FE)5FAF?:7>FBB<A?FB(9?<AFFF<0?B?F4:BFF2>B69>;B))6<<? +@GATAACCTTGCTTCGTGATTAATC.ba.1 +GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGG ++ +FDCCF9FFDFGGGGGGGGGGGGFGGGGGGFGGGGGGGGGGGEGFECFGGGGGGGGGGGGG8EGFGEGFG,F@GGGGGEGGEG7FGGGGG@BFFGCCGGEGGGGGGGFGGGGGGFGGG@FFF9CFGGGGGGGGGGGGGGGGGGGFFGF5E*CECC>EGFGG7EGD==?E8:E7CCE3C+?:C?FFG@D3B5:>78)/C6=FFF<>B>>0:@EBF3 +@GATTGGATAACGTTGTGGCAATTG.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCAC ++ +GGGGCCFGGGGGGGGGFFGGGGGGGFGGGGEGGGGGGGFGGGGGGGGFGGGGGGGGGGGGGGGGGGGG<@FGGCCGGGGGGCFFGGGGGGGGC,@ECFBFDDGGG@FGGGGG9CFG@CCFF@DCDFC>=CEGGDEGCC@CDC*=CC*=5>FGCFEGGDFGG?<EGGGFFFD49FD=6>:CGFD>5 +@GATTGGATAACGTTGTGGCAATTG.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCCGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGG ++ +GGGC6<,CFDG8FG,CF6C<FGGDD<FGGGFGGGFG<E8,6FCFC,77BF,CFGGGFFFGGFEF:<CECC7:F:@?DFCEGFFE8?EGGF< +@GATTGGATAACGTTGTGGCAATTG.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAG ++ +GGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGG@EDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=FGGGGGFFBFGGGGGGGFGGFGGGGFGGGFGGGGGEGGGGGGG:FGGGG5EGGGG:FEFGEGGGGGGGGGGGGGGGD:EG?FGFFGFCEGG>GFFFCGGFFFGEFE:>?(7 +@GATTGGATAACGTTGTGGCAATTG.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTA ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG@CGGGGGGGFGGGGECGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGEGGGCCCGGGGGGGGGGGGGGGGGDGGEGGGDGGGGDFGGGGGFDGDDFFFGGGFFFGFGFE@:?GFFFFFGFFFFFD2?BFFFF09>B9>F(7 +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGTACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGCFGGGGFFFFBGFFFBEFFFFGFF?@AFFB?FFFFFFFFFFFFFFFB00:?FFFAFFFFFFFFF66>FFF<1 +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GFGGGGGEGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGFGGGGGGGGGGGGG,FGFFFFFGAFGGGGGGGGGGGGFGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGEGFGGGGGEGFGGGDCBEGGGGGGGGGGGGGGGGFEGGFGFGGGGGGFDGGGCDGD9DFFFGD4>FGG4FF@9DD>DD>>FFDBGFFDFFFFFFFFF=?:8?F><F>?F?FBFF?7>F>DB<>?B9>>?9>9?F1 +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGACGTAAGTC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8FFGGGGFFGGGGGGGGGGFGGGGGGDFGGGGGFGGGGGGGGGGGGGEGG7FFGGGG@FGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGC6CFFFGGCFFGGGGGG:C:47FFD3*1<677<6<;EGB@><)-3:>55-9 +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAG ++ +,C@FCGGGGGE8C@FCFFGEECGCFGGDFFEFFGFFGGFGGGFFGFCF@FFAFGGGGGCGEG,EFGGG?=F@EGGGC,<=FF?DEGFFFGFFGFFDGCGG?FDDG>EFGFGA9?EFE@FGGGDFGFCFFGC+@EEE@:F:E7C1:FBCF@7<2CFFF**::8CFEEGE7C8BFF?CFGGC<9CFGCEG+CCC8:CFDCDC=:**2 +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACCGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGCEGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG>EGEGGGFGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGCGGGGGGGGFGGFGGGFG@EGGBDGGFFFFGFFFFF)*4<:B@F?G6<>9BFFFFB?BFF?DF?BFF00:BFFFB;BFBB2 +@GATTGGATAACGTTGTGGCAATTG.ab.2 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCGCTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGG ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFFEGGGGGFFGGGGGGGGGGGGGGFFEE:=FED=FFFFFFEEEGGGGGG:FC:FFGGGGCFGGGGGGGFGFGGGCCG9FGGGGGGGGGFFEFBFGGEEE<EGGGGGGGGGGGGGF@FCCCEEGGCFFGGFGGGGGG:>EEGDGFDC4>EDDFGGEBEFGE5>CGGFFF*)<FF<<FF:61:<BFFB7??9::?FF:07<7 +@GATTGGATAACGTTGTGGCAATTG.ab.2 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCTTGGCCCGGTCCTGGCCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCCGGGTCCCCAAAGGCCTCTCCTGTGGC ++ +FCEFGG@@FGGGGEFGGFFGGDCF8CG86ECEGGGFGF,C,C@@FFFGGE:,CFFDG7CFFGECE=<FF9<F,C<7++=7+4@+@=FFFGGFA,CB,EF9@7F::3@F@BFCC7E=FC@FCCCFF<=FGA7:FGFG,37F9FCG7:3?>7:FCGGG:@FC6B,=EE7FFE>EGG9C?*=5CC7887*/=*:?C5E76C*::*//C>D*8D4377CF*;:?)055.547;FF?4*7F)<2@<AF:))7 +@GATTGGATAACGTTGTGGCAATTG.ab.2 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCG ++ +GGGGGGGGGGGGGGGGGGGDDGGFFGGDFGGGGGGGGGGGGFEECFGGCFGEGGGGGGGGGGGGGGGFGGFCCFFFFGGGDGGGDGGGFGGGGGCFFFGGFGGGGGGGGFGC7EGG<=BFFGEG<DCFGGFGGGGGGFGGGGEGGGFGGGGGCGCGGGFCCFGGGGGGGGEGGGGGGGGGGGGGGFEEGEGGG5>FGGGGGDGGGDGFFFGFGGGFGGFFF6@FFFFFFFF<FFFFFF???FFA>?B2>B<0<?78AFF1706>9B?AF:?:0(3 +@GATTGGATAACGTTGTGGCAATTG.ab.2 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGG ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGEFGFGGGGGGGGGGGGGGGEGGGGGGGFGGFDGGGGGGGGGGDGGGGGGGGGGGGFGFF=EF9E7E?FFGFDFC?>GGFDBGFGFC?FFGABFFFB>G?AFFBFB@>>?BB>BEF?AB:??0:B;71??F? +@GATTGGATAACGTTGTGGCAATTG.ba.1 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTACCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFCDFBGGGFFFFFFFGF:?D>FFBF>?F@FFFFFBFFFF??FFFF?62>:?FF>?FDFFFF?0:?F?ABFF><AFB>09BF?AFFF?:?>>9>?FF::0 +@GATTGGATAACGTTGTGGCAATTG.ba.1 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGA ++ +GFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEDGF@FGGGGGGGGFGGGGGGCGGGFGGGGFGFFFCD@7DGF58FFFFFGF3:>D:6>>GBBFF474<?FFFF?B?B0(:1:F?068>:79?28508?>>4<04>AA<09>0>F?:6<B?F0(4969<F:?: +@GATTGGATAACGTTGTGGCAATTG.ba.1 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAG ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD7FEFBFGGGGGGGCFGGGFGGGGGCGEGGG=CFGGECBFCFACC7DDFGGC9FFGCCFFFFF<@F?FFEGGGGGG8:57ACC=@60;CC:7,CEEGCGG4CFCA<?C<<FGGAFCDC5:>6C?3C.76)03 +@GATTGGATAACGTTGTGGCAATTG.ba.1 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTTGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCCTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCA ++ +;,EACFFEGD6FFGGG8F<@<FEGGGGGGFCE7F@,EDG9<@:FCFCEF,CFGF>BF6+,9BFA+4EDFFFEFGCC,BFCB:FFGD==C?CFG,,CEE9E7CGGE@FCCGD+8:CC<9DFGG,@:B9:F9BC@5DD5FFG;@DFBCECEE7EC7,,?CFDCDGGGFFFFC://=?=4CF4C+C558DDF5EDGBB5 +@CCTCCCGGCAGTGCGAAAATGTCA.ab.1 +CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGGCCAGGCCGGGCCTCAACGCCCAT ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG7FGGGGGGGGGGGC<FGGGDGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGDEC8C>C5*:/C:*:<+2/>C:*: +@CCTCCCGGCAGTGCGAAAATGTCA.ba.2 +CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGAC ++ +CFGGAFCFCFGGGFDGDDDGGDGGGG;F:BFGEGFGGGGFF<FFDECG@CFDGGF@FECFAEGFGGGGGAFFEGGGEGF<?E@FFGFEFGEGG+BEF=<FGGCFCFGGGGGGGG8FDFGGDF@FFGGGEEG*88:C88AFEC>8A:@;EFG8>:EEGE0<CCF+<E:CE/C8C*8C*;;C:0*;=EFEDG*/0 +@CCTCCCGGCAGTGCGAAAATGTCA.ba.2 +CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGC?>DFGGFGGGGGGFFFFFFFFFF@FFFFCDFGF?FFAFFFDAAFFBFB9?FFD08<<6?BFFF;F?2<??6??<7>B>9 +@CCTCCCGGCAGTGCGAAAATGTCA.ba.2 +CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=8FGGEGDGGGGGGGGCFFGGGGGGGGGFFFFFFFFFGFFFFB5<BEFB>8AABAFF<9<5FBF?):F:B?:2@FFFF1.54<?:.323<?FF9 +@CCTCCCGGCAGTGCGAAAATGTCA.ab.2 +CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGTGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGAAGCCCACCCCGT ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGBCGGGGGGGGGGGGGGCFGGGGGGGGEGGEGGGGGGGEGGGGGGGGGGGGGGGDGDDDEFDGGFFGFFFFFGFFFF>EFBFFFGFFFFF:BFFF?F?FFFFFF?F<BBF??BBFFFFBBFF +@CCTCCCGGCAGTGCGAAAATGTCA.ba.1 +CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGCAGCCCA ++ +GGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGFGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGFEGDFGFGGGGFGFGGFGGGEG?FGCDGGEGGGGGGGGG6>FEGFDFGGFFGGGEE3DFF@=@FFGF2?>FB9FFFFFBFFFBFFFFFF9>>F>F68?>>?:BABFFFFF6B??:?BF5<>BB<49?:?:?(4?:0:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Interesting_Reads_test.trim.log Mon Jan 18 09:49:15 2021 +0000 @@ -0,0 +1,31 @@ +GATAACCTTGCTTCGTGATTAATC.ab.1 264 0 264 20 +GATAACCTTGCTTCGTGATTAATC.ba.2 283 0 283 1 +GATAACCTTGCTTCGTGATTAATC.ba.2 263 0 263 21 +GATAACCTTGCTTCGTGATTAATC.ba.2 208 0 208 76 +GATAACCTTGCTTCGTGATTAATC.ab.2 284 0 284 0 +GATAACCTTGCTTCGTGATTAATC.ba.1 264 0 264 20 +GATAACCTTGCTTCGTGATTAATC.ba.1 214 0 214 70 +GATTGGATAACGTTGTGGCAATTG.ab.1 185 0 185 99 +GATTGGATAACGTTGTGGCAATTG.ab.1 91 0 91 193 +GATTGGATAACGTTGTGGCAATTG.ab.1 205 0 205 79 +GATTGGATAACGTTGTGGCAATTG.ab.1 233 0 233 51 +GATTGGATAACGTTGTGGCAATTG.ba.2 284 0 284 0 +GATTGGATAACGTTGTGGCAATTG.ba.2 284 0 284 0 +GATTGGATAACGTTGTGGCAATTG.ba.2 237 0 237 47 +GATTGGATAACGTTGTGGCAATTG.ba.2 205 0 205 79 +GATTGGATAACGTTGTGGCAATTG.ba.2 284 0 284 0 +GATTGGATAACGTTGTGGCAATTG.ab.2 252 0 252 32 +GATTGGATAACGTTGTGGCAATTG.ab.2 247 0 247 37 +GATTGGATAACGTTGTGGCAATTG.ab.2 275 0 275 9 +GATTGGATAACGTTGTGGCAATTG.ab.2 282 0 282 2 +GATTGGATAACGTTGTGGCAATTG.ba.1 284 0 284 0 +GATTGGATAACGTTGTGGCAATTG.ba.1 283 0 283 1 +GATTGGATAACGTTGTGGCAATTG.ba.1 207 0 207 77 +GATTGGATAACGTTGTGGCAATTG.ba.1 196 0 196 88 +CCTCCCGGCAGTGCGAAAATGTCA.ab.1 158 0 158 126 +CCTCCCGGCAGTGCGAAAATGTCA.ba.2 193 0 193 91 +CCTCCCGGCAGTGCGAAAATGTCA.ba.2 284 0 284 0 +CCTCCCGGCAGTGCGAAAATGTCA.ba.2 284 0 284 0 +CCTCCCGGCAGTGCGAAAATGTCA.ab.2 283 0 283 0 +CCTCCCGGCAGTGCGAAAATGTCA.ba.1 0 0 0 0 +CCTCCCGGCAGTGCGAAAATGTCA.ba.1 277 0 277 6
--- a/test-data/Interesting_Reads_test_data_VA.fastq Tue Oct 27 12:46:55 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,124 +0,0 @@ -@GATAACCTTGCTTCGTGATTAATC.ab.1 -CTAGAGGGCCAGACCCTGGAGAGAATGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC -+ -GGGGGGGGGGGGGGFGFFGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGAFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFGG8<FED@FGEGGGGGGGGGGGGGGGGGGGGGGGGFEFGGGGGGGEEGGGGGGGGGCECE8EGGGEFFGGGGGGFGGGDEDD5EGGGGGFGGGDCFCFGFGFGCFAGGFDFFFEEGGFF3><:>>FD>FFC=4=:0<;DD>6461992<)892<AFBFFFFFF244:-1:1>:=0306(4)-.42((44(667(449?0, -@GATAACCTTGCTTCGTGATTAATC.ba.2 -CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGGGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGFGGGGGGFCFGGGGFGGGGEGGFGGGGGGGFFGGGGGGGGGGGGGGGGBB?CFGGGGGGCCGGFGGGGGGGCFGECC79CEECGDDD99CFGGGGF4>>GG>BDE@BBFG5=B?AF98::A42<EF?;B::((7:?7???<)/:?91;1,6?F?29902( -@GATAACCTTGCTTCGTGATTAATC.ba.2 -CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGDGGGGEEGGGGGGGGGGGGGGGGGGGCEFDFGGGFGFGGEGEBFGG<FGFCFEFDFDF<DGGGGGGFGFGFGGGGGFGGGCE:FFBFGGGGDGGGGGGGGDD@FGGFC6AE7E1CGGFGCCFGGGGCEGFGGGCCFG9A*59@FGD><?9=CFF6>3BBDFFF392?G)-96<2<:<:44<232:B3:F>6??F0(34A248:>?1,(.404-,((4( -@GATAACCTTGCTTCGTGATTAATC.ba.2 -CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACCCCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCCAGAAGCGGGACGGCCGTAAGTCCCAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC -+ -FGEF9FGGCCG@FGGGGGGCFCC@EGGG<FFGGFGGCGGGGGEG?FGGGGDGGGGGGGGFEFGGGGGGGGG7==F:F,=FEGGFFFFG<<F<=8FG>CAF9E8CFCEFFFFF,?F=F8FFD=,DFFE+@CGGGFF7D<FGGEFEGGFG2DCCFGECC*=CGEGGGGGGGGCCFFFGF7FFGFGGGGGGGFFG=E56C55CEGF3:F*9*./>FG***27)?::D)5557@>BFD@)/))).(().9<2((-29BF>F4(83,:12-)4)2,3??<<1:(7>((, -@GATAACCTTGCTTCGTGATTAATC.ab.2 -GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG -+ -FGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGFFGGGGGGGGGGGGGGGGCFGGFFGGGCGGGGGGGGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGGGGGEGFGGFGFFGFGFGFFBEFGFFFF7F?FFB?DF>FDFFB:)9>FBFFF?F099E>;<?1:<?0>F0;BB1 -@GATAACCTTGCTTCGTGATTAATC.ba.1 -GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCGCGGGACACG -+ -GGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGDGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGDGCEEFGGGGGCGGGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGFGGGCGFFGGGGGGGGGGFFFFGGGGCEG==CECFFCDGGGGGGGGGGGGG597*<FGGFGDFC35>+*:=6FDFF4CFFF9B204>G?FE)5FAF?:7>FBB<A?FB(9?<AFFF<0?B?F4:BFF2>B69>;B))6<<?(,(46((4,42(7>926(82 -@GATAACCTTGCTTCGTGATTAATC.ba.1 -GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTTCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCCCGGGACACG -+ -FDCCF9FFDFGGGGGGGGGGGGFGGGGGGFGGGGGGGGGGGEGFECFGGGGGGGGGGGGG8EGFGEGFG,F@GGGGGEGGEG7FGGGGG@BFFGCCGGEGGGGGGGFGGGGGGFGGG@FFF9CFGGGGGGGGGGGGGGGGGGGFFGF5E*CECC>EGFGG7EGD==?E8:E7CCE3C+?:C?FFG@D3B5:>78)/C6=FFF<>B>>0:@EBF3))14>B?20>?A<2:>99>F<<AD7??BF0??)8<0<BF?>>>FAA9A:,403>BF?2;B(46(4((((( -@GATTGGATAACGTTGTGGCAATTG.ab.1 -CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGGAAGTCACCGGAATCCGGGACGTCCTGGCAGCTAGGGCGGGCCCCGAGCCAGG -+ -GGGGCCFGGGGGGGGGFFGGGGGGGFGGGGEGGGGGGGFGGGGGGGGFGGGGGGGGGGGGGGGGGGGG<@FGGCCGGGGGGCFFGGGGGGGGC,@ECFBFDDGGG@FGGGGG9CFG@CCFF@DCDFC>=CEGGDEGCC@CDC*=CC*=5>FGCFEGGDFGG?<EGGGFFFD49FD=6>:CGFD>5)/)47C@4),85:B:DF?(8)448:D:,5?7**430;>01661(-4((74,94:)(,-(-18(--(-(-(-(=01,442))(.(,(2((-(-(-((,(( -@GATTGGATAACGTTGTGGCAATTG.ab.1 -CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCCGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCATACTGGGCACAGGGCCAGGCGTGAGGGCTCAAGAAGCGGGACCGCCGTCAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTCGGCGTGTCCCTAGCTAGC -+ -GGGC6<,CFDG8FG,CF6C<FGGDD<FGGGFGGGFG<E8,6FCFC,77BF,CFGGGFFFGGFEF:<CECC7:F:@?DFCEGFFE8?EGGF<,+,44=7,B,C@DGC@7+?F8,7=D,A=>,9=FFFG:@=BC7CCEFGFDGGGG788CEF66EFGGG7CF*:**=C5=FGG5AC=+:C*2:EFF*7*2/97DD>FC)7>@G@5(704(255005FFFB??FFB39((,--32()(./6>B<(())9))-38>0,43(-((((33<)-,((--(.4)).43)).( -@GATTGGATAACGTTGTGGCAATTG.ab.1 -CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC -+ -GGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGG@EDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=FGGGGGFFBFGGGGGGGFGGFGGGGFGGGFGGGGGEGGGGGGG:FGGGG5EGGGG:FEFGEGGGGGGGGGGGGGGGD:EG?FGFFGFCEGG>GFFFCGGFFFGEFE:>?(7.()44>B>G*=F<7:F9>D>9>F03;26:6)6>B<9(38<7A?FB2>>?FF(=:?(((.2:A:)-4((.63-,49>:?0 -@GATTGGATAACGTTGTGGCAATTG.ab.1 -CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG@CGGGGGGGFGGGGECGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGEGGGCCCGGGGGGGGGGGGGGGGGDGGEGGGDGGGGDFGGGGGFDGDDFFFGGGFFFGFGFE@:?GFFFFFGFFFFFD2?BFFFF09>B9>F(7)2.9A2)6:44<@A7BF?>BF?>6>:((,(,5AF?F91(-:B<>,(3>00( -@GATTGGATAACGTTGTGGCAATTG.ba.2 -CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGTACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGCFGGGGFFFFBGFFFBEFFFFGFF?@AFFB?FFFFFFFFFFFFFFFB00:?FFFAFFFFFFFFF66>FFF<1 -@GATTGGATAACGTTGTGGCAATTG.ba.2 -CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC -+ -GFGGGGGEGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGFGGGGGGGGGGGGG,FGFFFFFGAFGGGGGGGGGGGGFGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGEGFGGGGGEGFGGGDCBEGGGGGGGGGGGGGGGGFEGGFGFGGGGGGFDGGGCDGD9DFFFGD4>FGG4FF@9DD>DD>>FFDBGFFDFFFFFFFFF=?:8?F><F>?F?FBFF?7>F>DB<>?B9>>?9>9?F1 -@GATTGGATAACGTTGTGGCAATTG.ba.2 -CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGACGTAAGTCCAAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGGGTCCCGAGCCAGC -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8FFGGGGFFGGGGGGGGGGFGGGGGGDFGGGGGFGGGGGGGGGGGGGEGG7FFGGGG@FGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGC6CFFFGGCFFGGGGGG:C:47FFD3*1<677<6<;EGB@><)-3:>55-9))).:12<6)4430;>3>0(*4??F1(.:7?>(,((-.8B1999B?1 -@GATTGGATAACGTTGTGGCAATTG.ba.2 -CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGCTTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGCGCCAGC -+ -,C@FCGGGGGE8C@FCFFGEECGCFGGDFFEFFGFFGGFGGGFFGFCF@FFAFGGGGGCGEG,EFGGG?=F@EGGGC,<=FF?DEGFFFGFFGFFDGCGG?FDDG>EFGFGA9?EFE@FGGGDFGFCFFGC+@EEE@:F:E7C1:FBCF@7<2CFFF**::8CFEEGE7C8BFF?CFGGC<9CFGCEG+CCC8:CFDCDC=:**202:65*CF5CGFD)6?5))).753>><:5>9@466-.((.9::0)4B>)8><>:0(80:2))501(--3:FF(,4>02( -@GATTGGATAACGTTGTGGCAATTG.ba.2 -CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACCGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGCEGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG>EGEGGGFGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGCGGGGGGGGFGGFGGGFG@EGGBDGGFFFFGFFFFF)*4<:B@F?G6<>9BFFFFB?BFF?DF?BFF00:BFFFB;BFBB2 -@GATTGGATAACGTTGTGGCAATTG.ab.2 -GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCGCTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGCGTCTCCCGGGTGCCTGGTGGCGGTGTGGGGG -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFFEGGGGGFFGGGGGGGGGGGGGGFFEE:=FED=FFFFFFEEEGGGGGG:FC:FFGGGGCFGGGGGGGFGFGGGCCG9FGGGGGGGGGFFEFBFGGEEE<EGGGGGGGGGGGGGF@FCCCEEGGCFFGGFGGGGGG:>EEGDGFDC4>EDDFGGEBEFGE5>CGGFFF*)<FF<<FF:61:<BFFB7??9::?FF:07<7)(.,,2<1(11(,3:>7:773(-766223:(( -@GATTGGATAACGTTGTGGCAATTG.ab.2 -GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCTTGGCCCGGTCCTGGCCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCCGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGCGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC -+ -FCEFGG@@FGGGGEFGGFFGGDCF8CG86ECEGGGFGF,C,C@@FFFGGE:,CFFDG7CFFGECE=<FF9<F,C<7++=7+4@+@=FFFGGFA,CB,EF9@7F::3@F@BFCC7E=FC@FCCCFF<=FGA7:FGFG,37F9FCG7:3?>7:FCGGG:@FC6B,=EE7FFE>EGG9C?*=5CC7887*/=*:?C5E76C*::*//C>D*8D4377CF*;:?)055.547;FF?4*7F)<2@<AF:))766:23)(731F>?>(41((4))8>7:0(--,-338(( -@GATTGGATAACGTTGTGGCAATTG.ab.2 -GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC -+ -GGGGGGGGGGGGGGGGGGGDDGGFFGGDFGGGGGGGGGGGGFEECFGGCFGEGGGGGGGGGGGGGGGFGGFCCFFFFGGGDGGGDGGGFGGGGGCFFFGGFGGGGGGGGFGC7EGG<=BFFGEG<DCFGGFGGGGGGFGGGGEGGGFGGGGGCGCGGGFCCFGGGGGGGGEGGGGGGGGGGGGGGFEEGEGGG5>FGGGGGDGGGDGFFFGFGGGFGGFFF6@FFFFFFFF<FFFFFF???FFA>?B2>B<0<?78AFF1706>9B?AF:?:0(3139:FF<?0 -@GATTGGATAACGTTGTGGCAATTG.ab.2 -GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGEFGFGGGGGGGGGGGGGGGEGGGGGGGFGGFDGGGGGGGGGGDGGGGGGGGGGGGFGFF=EF9E7E?FFGFDFC?>GGFDBGFGFC?FFGABFFFB>G?AFFBFB@>>?BB>BEF?AB:??0:B;71??F?(. -@GATTGGATAACGTTGTGGCAATTG.ba.1 -GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTACCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFCDFBGGGFFFFFFFGF:?D>FFBF>?F@FFFFFBFFFF??FFFF?62>:?FF>?FDFFFF?0:?F?ABFF><AFB>09BF?AFFF?:?>>9>?FF::0 -@GATTGGATAACGTTGTGGCAATTG.ba.1 -GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC -+ -GFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEDGF@FGGGGGGGGFGGGGGGCGGGFGGGGFGFFFCD@7DGF58FFFFFGF3:>D:6>>GBBFF474<?FFFF?B?B0(:1:F?068>:79?28508?>>4<04>AA<09>0>F?:6<B?F0(4969<F:?:( -@GATTGGATAACGTTGTGGCAATTG.ba.1 -GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCCCTTCCCATCTGGGTCCCCAACGGCCTCTCCTGCGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGTC -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD7FEFBFGGGGGGGCFGGGFGGGGGCGEGGG=CFGGECBFCFACC7DDFGGC9FFGCCFFFFF<@F?FFEGGGGGG8:57ACC=@60;CC:7,CEEGCGG4CFCA<?C<<FGGAFCDC5:>6C?3C.76)0319:*4)57?F*5<?2=FFF:?328()7395?(,22)((79>?:7:93:B)1)21>99(489<1(((,751681(8-( -@GATTGGATAACGTTGTGGCAATTG.ba.1 -GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTTGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCCTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGCGCCCACTTCCCATCTGCGTCCCCACAGGCCTCTCCTGTTGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGTCC -+ -;,EACFFEGD6FFGGG8F<@<FEGGGGGGFCE7F@,EDG9<@:FCFCEF,CFGF>BF6+,9BFA+4EDFFFEFGCC,BFCB:FFGD==C?CFG,,CEE9E7CGGE@FCCGD+8:CC<9DFGG,@:B9:F9BC@5DD5FFG;@DFBCECEE7EC7,,?CFDCDGGGFFFFC://=?=4CF4C+C558DDF5EDGBB5/*.<DEE:))4()-*-)*948*6:74).4;9?F:1((>0?@B2?04)4)1699<<:>247667<340;>B?A71((-49@(-((4()) -@CCTCCCGGCAGTGCGAAAATGTCA.ab.1 -CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGGCCAGGCCGGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGTGGCTGCCCAGGCGGCCTGTTTTTTTGCAGGCTCCCTACGCTACGGGGTGGGCTTTTTCCGTTTCATCTTGGTGTTGCCGGCTGGGACGCCTTGCGCC -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG7FGGGGGGGGGGGC<FGGGDGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGDEC8C>C5*:/C:*:<+2/>C:*:*+*<>?+*+0<5:/>E5<35***<6293*935=DC)))1707C5)(1*))())()*06)(((0,(*(,(,(-4(9),4D6(4((5)4*(,).2))-).5)5:228))-1(-(((((-((,()5(-( -@CCTCCCGGCAGTGCGAAAATGTCA.ba.2 -CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGACGCGGGCAGTGTGTATGCAGTCATCCTCAGCTACGGGCTGGGCTTCTTCCTGTTTATCCTGGTGGTGGCGGCTGTGTCGCTCTGCCGTC -+ -CFGGAFCFCFGGGFDGDDDGGDGGGG;F:BFGEGFGGGGFF<FFDECG@CFDGGF@FECFAEGFGGGGGAFFEGGGEGF<?E@FFGFEFGEGG+BEF=<FGGCFCFGGGGGGGG8FDFGGDF@FFGGGEEG*88:C88AFEC>8A:@;EFG8>:EEGE0<CCF+<E:CE/C8C*8C*;;C:0*;=EFEDG*/0*7*:7*18*27:CFGD?>>7+CGG>?F:?4*7?FG6).-))7)/<BF0)6.)/--/)67.:F209304(((493(,:5-)(2;:<2).4(( -@CCTCCCGGCAGTGCGAAAATGTCA.ba.2 -CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGC?>DFGGFGGGGGGFFFFFFFFFF@FFFFCDFGF?FFAFFFDAAFFBFB9?FFD08<<6?BFFF;F?2<??6??<7>B>9 -@CCTCCCGGCAGTGCGAAAATGTCA.ba.2 -CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=8FGGEGDGGGGGGGGCFFGGGGGGGGGFFFFFFFFFGFFFFB5<BEFB>8AABAFF<9<5FBF?):F:B?:2@FFFF1.54<?:.323<?FF9 -@CCTCCCGGCAGTGCGAAAATGTCA.ab.2 -CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGTGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGAAGCCCACCCCGT -+ -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGBCGGGGGGGGGGGGGGCFGGGGGGGGEGGEGGGGGGGEGGGGGGGGGGGGGGGDGDDDEFDGGFFGFFFFFGFFFF>EFBFFFGFFFFF:BFFF?F?FFFFFF?F<BBF??BBFFFFBBFF -@CCTCCCGGCAGTGCGAAAATGTCA.ba.1 -CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCACCAGGAGGCCTGGCGGGCCGGCAGCTCAGAACCTGATATCTACTTTCTGTTAGCTGTCGCTCGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGTGGGAGCCTAGCCCTTTCTTGGGGTGGCTGCGCAGGCGGCAGAGCGTCACAGCTGCTACAACCAGGATGAACAGGAAGAGCCCCACCCCGTC -+ -FCF<9C@F8E9@FGC,,,,<,CF<,C@B@CC@<F,,@F::FD+FC@@F,CFFEEDFGD:C=<<B?FF:E8,B,B,AC<FA8C44++B=>F7F?+A7FF+==<F+:+@7+AFB,8C:F**>CC@F?CCFFCFC@C,26,3224@C@C,,?CG+<+2CFC*:*:);C7E*21*9CE**>DDFC7+:0=/))5C)1)(*)00>*9:(.4(,577:*=47)721),,),(-(4(47()((43460(.)(0..).))).4(()(,(,)6)((((,4((((4(-(((((( -@CCTCCCGGCAGTGCGAAAATGTCA.ba.1 -CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGCAGCCCAGCCCGT -+ -GGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGFGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGFEGDFGFGGGGFGFGGFGGGEG?FGCDGGEGGGGGGGGG6>FEGFDFGGFFGGGEE3DFF@=@FFGF2?>FB9FFFFFBFFFBFFFFFF9>>F>F68?>>?:BABFFFFF6B??:?BF5<>BB<49?:?:?(4?:0:0(.3399
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/SSCS_counts_test.json Mon Jan 18 09:49:15 2021 +0000 @@ -0,0 +1,1 @@ +[{"ACH_TDII_5regions#570": {"ab": 1, "ba": 1}, "ACH_TDII_5regions#504": {"ab": 2, "ba": 1}, "ACH_TDII_5regions#957": {"ab": 1, "ba": 1}}, {"ACH_TDII_5regions#570": {"ab": 2, "ba": 1}, "ACH_TDII_5regions#504": {"ab": 1, "ba": 1}, "ACH_TDII_5regions#957": {"ab": 1}}] \ No newline at end of file
--- a/test-data/SSCS_counts_test_data_VA.json Tue Oct 27 12:46:55 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -[{"ACH_TDII_5regions#505": {"ab": 2, "ba": 1}, "ACH_TDII_5regions#571": {"ab": 1, "ba": 1}, "ACH_TDII_5regions#958": {"ab": 1, "ba": 1}}, {"ACH_TDII_5regions#505": {"ab": 1, "ba": 1}, "ACH_TDII_5regions#571": {"ab": 2, "ba": 1}, "ACH_TDII_5regions#958": {"ab": 1}}] \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reference.fasta Mon Jan 18 09:49:15 2021 +0000 @@ -0,0 +1,2 @@ +>ACH_TDII_5regions +GCGGAGGGCCCTCAGCCGCGTGGCGGTGACCAAGTTGGCGGTGGCTGAGGAGTTGGTGGTGGCGGCGTTTTCCTTGCAGCGGCTGGATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGacggcgggcgctaacaccaccgacaaggagctagaggttctctccttgcacaacgtcacctttgaggacgccggggagtacacctgcctggcgggcaattctattgggttttctcatcactctgcgtggctggtggtgctgccagGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGCGCCTGGTGGCGGTGTGGGACTGGCTGGCTCTGCTGGGCTCCTTCTCTCCAGGGTCTGGCCCTCTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGccgaggaggagctggtggaggctgacgaggcgggcagtgtgtatgcaggcatcctcagctacggggtgggcttcttcctgttcatcctggtggtggcggctgtgacgctctgccgcctgcgcagcccccccaagaaaggcctgggctcccccaccgtgcacaagatctcccgcttcccgctcaagcgacagGTAACAGAAAGTAGATACCAGGTTCTGAGCTGCCTGCCCGCCAGGCCTCCTGGAGCCCCACCTCGGCCCACGCTGGTCCTGGGCTGTGTGAGCCCTCTCTGCAGCCAGGCGGGCTCCCCTCTCCTCGTCTCTGCTCACCATGTAGAGCCTAGGGTACTTTGGGGCACGAAACATTCTAAAAATCTTCATTCAATGCTGGTGGAAGTCAGAACGCCCCCCCTTCTGGCCCAGCACTGACCCCCGGCTGTACCTCCACGCCCTGTCGCCCACGCGGCGCCAACCTGCCCCTGCTGACCCAAGCAGgtgtccctggagtccaacgcgtccatgagctccaacacaccactggtgcgcatcgcaaggctgtcctcaggggagggccccacgctggccaatgtctccgagctcgagctgcctgccgaccccaaatgggagctgtctcgggcccgGTCAGTGGTGCTGAGGGCCAGCGTTGGCTGTAGGGGGCTTGGTGGTGGGGGTGAAACAGCCACCAGTCAGAGGCCCGGCTGGGTTTAGGGGCCGTCAGGGATGTGGCGGATGTTGGGTGTGGCTGGGGTTCTGTGGAGATGCTCCTGGGACGGGTGTATGGCAGGGACTGCCCCTCTCAAGGTGCCCTGTCTGGAGGGGCAGCAAGGGCGGGAGGCTGTGGGTGACACTCTTCGTCCTTACGAGCAGGCTGTAGGGGGAGCATGGAGGGCTTCCTGGAGGTGGTGGCTCTGGGCCTCAAGGGCTGGGCCAGGCTGGGGTGGGGACCGTGGTGGGCTGAGAGTGGGCGAGTTTGCACACTCATGGTCCCTCTGCCTCCACTGCCAGgctgaccctgggcaagccccttggggagggctgcttcggccaggtggtcatggcggaggccatcggcattgacaaggaccgggccgccaagcctgtcaccgtagccgtgaagatgctgaaagGTGAGGAGGGGGCGGCCAGGGGTGCAGAGCAGGGCTGGGGGCGCCGCCGCCGCCTGACACAGGCCCCCCGCTCCGTGCACAGacgatgccactgacaaggacctgtcggacctggtgtctgagatggagatgatgaagatgatcgggaaacacaaaaacatcatcaacctgctgggcgcctgcacgcagggcgGTAGGTGCGGTAGCGGCGGTGGTGCCGGCTGGGCGGCCCTCCTGGGCCTGGCAGCCCGTCTGAGGAGCCCGTGTCCCCAGggcccctgtacgtgctggtggagtacgcggccaagggtaacctgcgggagtttctgcgggcgcggcggcccccgggcctggactactccttcgacacctgcaagccgcccgaggagcagctcaccttcaaggacctggtgtcctgtgcctaccaggtggcccggggcatggagtacttggcctcccagaagGTGGGCAGGGCGGCAGGTGTGGGTGGAGTAGGCTGGGCCCTGCCCTGAGATGCTGGGAGCAGCGGGGAGAGGTGGAGAGGCTTCAGCCCTGCCTCCCACCCCTTCCCCAGtgcatccacagggacctggctgcccgcaatgtgctggtgaccgaggacaacgtgatgaagatcgcagacttcgggctggcccgggacgtgcacaacctcgactactacaagaagacgaccaacGTGAGCCCGGCCCTGGGGTGCGGGGGTGGGGGTCATGCCAGTAGGACGCCTGGCGCCAACACCGCCTTCCCACACCCTCCCAGggccggctgcccgtgaagtggatggcgcctgaggccttgtttgaccgagtctacactcaccagagtgacgtGTACGTGTCCTGCAGAGCTCAGGCTTCAGGGGTGGAGGCGGGAACTGGGCAGAGCCAGGACCCCAGCTGCAGTCCCCAGGCCTGTGCCCTGGAGCTCCTGGGTGTGGTTTCTACCCCTCCCTGGGGGCAGCAGCGCAGCCCTGGCCTATTCCCCTGGTGCCCGCCCAGGTGTCTGTCCTGGGAGTCTCAGGACAGCCTGACCTCACCTTCCCCTGCAGctggtcctttggggtcctgctctgggagatcttcacgctggggggctccccgtaccccggcatccctgtggaggagctcttcaagctgctgaaggagggccaccgcatggacaagcccgccaactgcacacacgacctGTGAGTGGCATCCCTG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tag_count_dict_test.json Mon Jan 18 09:49:15 2021 +0000 @@ -0,0 +1,1 @@ +[{"GATAACCTTGCTTCGTGATTAATC": {"ACH_TDII_5regions#504": "A"}, "GATTGGATAACGTTGTGGCAATTG": {"ACH_TDII_5regions#570": "T"}, "CCTCCCGGCAGTGCGAAAATGTCA": {"ACH_TDII_5regions#957": "C"}}, {"ACH_TDII_5regions#570": [1, 1, 143.0], "ACH_TDII_5regions#504": [1, 1, 173.0], "ACH_TDII_5regions#957": [0, 1, 195.0]}] \ No newline at end of file
--- a/test-data/tag_count_dict_test_data_VA.json Tue Oct 27 12:46:55 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -[{"GATAACCTTGCTTCGTGATTAATC": {"ACH_TDII_5regions#505": "A"}, "GATTGGATAACGTTGTGGCAATTG": {"ACH_TDII_5regions#571": "T"}, "CCTCCCGGCAGTGCGAAAATGTCA": {"ACH_TDII_5regions#958": "C"}}, {"ACH_TDII_5regions#505": [1, 1, 173.0], "ACH_TDII_5regions#571": [1, 1, 143.0], "ACH_TDII_5regions#958": [0, 1, 195.0]}] \ No newline at end of file
--- a/va_macros.xml Tue Oct 27 12:46:55 2020 +0000 +++ b/va_macros.xml Mon Jan 18 09:49:15 2021 +0000 @@ -1,20 +1,13 @@ <macros> <xml name="citation"> - <citations> - <citation type="bibtex"> -@misc{duplex, - author = {Povysil, Gundula and Heinzl, Monika and Salazar, Renato and Stoler, Nicholas and Nekrutenko, Anton and Tiemann-Boege, Irene}, - year = {2019}, - title = {{Variant Analyzer: a quality control for variant calling in duplex sequencing data (manuscript)}} - } - </citation> - </citations> - </xml> - <xml name="requirements"> - <requirements> - <requirement type="package" version="3.1.2">matplotlib</requirement> - <requirement type="package" version="0.15">pysam</requirement> - <yield/> - </requirements> - </xml> -</macros> + <citations> + <citation type="bibtex"> + @misc{duplex, + author = {Povysil, Gundula and Heinzl, Monika and Salazar, Renato and Stoler, Nicholas and Nekrutenko, Anton and Tiemann-Boege, Irene}, + year = {2019}, + title = {{Variant Analyzer: a quality control for variant calling in duplex sequencing data (manuscript)}} + } + </citation> + </citations> +</xml> +</macros> \ No newline at end of file