Mercurial > repos > iuc > medaka_variant_pipeline
changeset 10:7623e5888be9 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 0faf0ade3f13d7c78d93869823ea9fdf25c21b13"
author | iuc |
---|---|
date | Thu, 22 Apr 2021 20:24:49 +0000 |
parents | 336b3def9b2b |
children | 11fedf536104 |
files | annotateVCF.py convert_VCF_info_fields.py macros.xml medaka_variant.xml test-data/all_fasta.loc test-data/bwa-mem-mt-genome.fa test-data/bwa-mem-mt-genome.fa.fai |
diffstat | 7 files changed, 20 insertions(+), 664 deletions(-) [+] |
line wrap: on
line diff
--- a/annotateVCF.py Mon Mar 29 20:06:01 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,409 +0,0 @@ -#!/usr/bin/env python3 - -# Takes in VCF file and a samtools mpileup output file -# Fills in annotation for the VCF file including AF, DP -# SB, and DP4 -# -# Usage statement: -# python annotateVCF.py in_vcf.vcf in_mpileup.txt out_vcf.vcf -# -# Can generate in_mileup.txt with samtools mpileup (and can restrict which sites to generate pileups for with in_vcf.vcf) - -# 08/24/2020 - Nathan P. Roach, natproach@gmail.com - -import sys -from math import isnan, log10 - -from scipy.stats import fisher_exact - - -def pval_to_phredqual(pval): - return int(round(-10. * log10(pval))) - - -def parseSimpleSNPpileup(fields, ref_base, alt_base): - base_to_idx = { - 'A': 0, - 'a': 0, - 'T': 1, - 't': 1, - 'C': 2, - 'c': 2, - 'G': 3, - 'g': 3 - } - - base_to_idx_stranded = { - 'A': 0, - 'T': 1, - 'C': 2, - 'G': 3, - 'a': 4, - 't': 5, - 'c': 6, - 'g': 7 - } - ref_base2 = fields[2] - counts = [0, 0, 0, 0] - stranded_counts = [0, 0, 0, 0, 0, 0, 0, 0] - ref_idx = base_to_idx[fields[2]] - dp = int(fields[3]) - carrot_flag = False - ins_flag = False - ins_str = "" - ins_len = 0 - insertion = "" - del_flag = False - del_str = "" - del_len = 0 - deletion = "" - # dollar_flag = False - for base in fields[4]: - if carrot_flag: - carrot_flag = False - continue - if ins_len > 0: - insertion += base - ins_len -= 1 - continue - if del_len > 0: - deletion += base - del_len -= 1 - continue - if ins_flag: - if base.isdigit(): - ins_str += base - else: - ins_len = int(ins_str) - 1 - ins_str = "" - insertion = base - ins_flag = False - elif del_flag: - if base.isdigit(): - del_str += base - else: - del_len = int(del_str) - 1 - del_str = "" - deletion = base - del_flag = False - else: - if base == '^': - carrot_flag = True - continue - elif base == '$': - continue - elif base == '+': - ins_flag = True - elif base == '-': - del_flag = True - elif base == '.': - counts[ref_idx] += 1 - stranded_counts[base_to_idx_stranded[ref_base2]] += 1 - elif base == ',': - counts[ref_idx] += 1 - stranded_counts[base_to_idx_stranded[ref_base2.lower()]] += 1 - elif base == 'N' or base == 'n': - continue - elif base == '*': - continue - else: - counts[base_to_idx[base]] += 1 - stranded_counts[base_to_idx_stranded[base]] += 1 - if sum(counts) == 0: - af = float("nan") - else: - af = float(counts[base_to_idx[alt_base]]) / float(sum(counts)) - if float(sum(stranded_counts[0:4])) == 0: - faf = float("nan") - else: - faf = float(stranded_counts[base_to_idx_stranded[alt_base]]) / float(sum(stranded_counts[0:4])) - if float(sum(stranded_counts[4:])) == 0: - raf = float("nan") - else: - raf = float(stranded_counts[base_to_idx_stranded[alt_base.lower()]]) / float(sum(stranded_counts[4:])) - dp4 = [stranded_counts[base_to_idx_stranded[ref_base]], - stranded_counts[base_to_idx_stranded[ref_base.lower()]], - stranded_counts[base_to_idx_stranded[alt_base]], - stranded_counts[base_to_idx_stranded[alt_base.lower()]]] - return (dp, af, faf, raf, dp4) - - -def parseIndelPileup(fields, ref_base, alt_base): - counts = [0, 0, 0, 0, 0, 0, 0, 0, 0] # indel ref match, indel fwd ref match, indel rev ref match, indel alt match, indel fwd alt match, indel rev alt match, other, other fwd, other rev - ref_base2 = fields[2] - - carrot_flag = False - ins_flag = False - ins_str = "" - ins_len = 0 - del_flag = False - del_str = "" - del_len = 0 - first_iter = True - forward_flag = False - last_seq = "" - last_seq_code = 'b' - for base in fields[4]: - if ins_flag: - if base.isdigit(): - ins_str += base - else: - ins_len = int(ins_str) - ins_flag = False - if del_flag: - if base.isdigit(): - del_str += base - else: - del_len = int(del_str) - del_flag = False - if ins_len > 0: - last_seq += base - last_seq_code = 'i' - ins_len -= 1 - continue - if del_len > 0: - last_seq += base - last_seq_code = 'd' - del_len -= 1 - continue - if carrot_flag: - carrot_flag = False - continue - if base == '.' or base == ','\ - or base == 'A' or base == 'a'\ - or base == 'C' or base == 'c'\ - or base == 'G' or base == 'g'\ - or base == 'T' or base == 't'\ - or base == 'N' or base == 'n'\ - or base == '>' or base == '<'\ - or base == '*' or base == '#': - if first_iter: - first_iter = False - else: - if last_seq_code == 'i': - if last_seq.upper() == alt_base.upper(): - counts[3] += 1 - if forward_flag: - counts[4] += 1 - else: - counts[5] += 1 - else: - counts[6] += 1 - if forward_flag: - counts[7] += 1 - else: - counts[8] += 1 - elif last_seq_code == 'd': - if last_seq.upper() == ref_base.upper(): - counts[3] += 1 - if forward_flag: - counts[4] += 1 - else: - counts[5] += 1 - else: - counts[6] += 1 - if forward_flag: - counts[7] += 1 - else: - counts[8] += 1 - elif last_seq_code == 'b': - if last_seq.upper() == ref_base.upper(): - counts[0] += 1 - if forward_flag: - counts[1] += 1 - else: - counts[2] += 1 - elif last_seq.upper() == alt_base.upper(): - counts[3] += 1 - if forward_flag: - counts[4] += 1 - else: - counts[5] += 1 - else: - counts[6] += 1 - if forward_flag: - counts[7] += 1 - else: - counts[8] += 1 - if base == '.': - last_seq = ref_base2 - forward_flag = True - last_seq_code = 'b' - elif base == ',': - last_seq = ref_base2 - forward_flag = False - last_seq_code = 'b' - elif base == '>' or base == '<' or base == '*' or base == '#': - continue - else: - forward_flag = base.isupper() - last_seq = base.upper() - last_seq_code = 'b' - elif base == '+': - ins_flag = True - ins_str = "" - elif base == '-': - del_flag = True - del_str = "" - elif base == '^': - carrot_flag = True - elif base == '$': - continue - if first_iter: - first_iter = False - - if last_seq_code == 'i': - if last_seq.upper() == alt_base.upper(): - counts[3] += 1 - if forward_flag: - counts[4] += 1 - else: - counts[5] += 1 - else: - counts[6] += 1 - if forward_flag: - counts[7] += 1 - else: - counts[8] += 1 - elif last_seq_code == 'd': - if last_seq.upper() == ref_base.upper(): - counts[3] += 1 - if forward_flag: - counts[4] += 1 - else: - counts[5] += 1 - else: - counts[6] += 1 - if forward_flag: - counts[7] += 1 - else: - counts[8] += 1 - elif last_seq_code == 'b': - if last_seq.upper() == ref_base.upper(): - counts[0] += 1 - if forward_flag: - counts[1] += 1 - else: - counts[2] += 1 - elif last_seq.upper() == alt_base.upper(): - counts[3] += 1 - if forward_flag: - counts[4] += 1 - else: - counts[5] += 1 - else: - counts[6] += 1 - if forward_flag: - counts[7] += 1 - else: - counts[8] += 1 - dp = int(fields[3]) - if sum([counts[0], counts[3], counts[6]]) == 0: - af = float("nan") - else: - af = float(counts[3]) / float(sum([counts[0], counts[3], counts[6]])) - if sum([counts[1], counts[4], counts[7]]) == 0: - faf = float("nan") - else: - faf = float(counts[4]) / float(sum([counts[1], counts[4], counts[7]])) - if sum([counts[2], counts[5], counts[8]]) == 0: - raf = float("nan") - else: - raf = float(counts[5]) / float(sum([counts[2], counts[5], counts[8]])) - dp4 = [counts[1], counts[2], counts[4], counts[5]] - return (dp, af, faf, raf, dp4) - - -def annotateVCF(in_vcf_filepath, in_mpileup_filepath, out_vcf_filepath): - in_vcf = open(in_vcf_filepath, 'r') - in_mpileup = open(in_mpileup_filepath, 'r') - out_vcf = open(out_vcf_filepath, 'w') - - # First pass parsing of input vcf, output headerlines + new headerlines, add VCF sites we care about to to_examine (limits memory usage for sites that don't need annotation) - to_examine = {} - for line in in_vcf: - if line[0:2] == "##": - out_vcf.write(line) - elif line[0] == "#": - out_vcf.write("##annotateVCFVersion=0.2\n") - out_vcf.write("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw Depth\">\n") - out_vcf.write("##INFO=<ID=AF,Number=1,Type=Float,Description=\"Allele Frequency\">\n") - out_vcf.write("##INFO=<ID=FAF,Number=1,Type=Float,Description=\"Forward Allele Frequency\">\n") - out_vcf.write("##INFO=<ID=RAF,Number=1,Type=Float,Description=\"Reverse Allele Frequency\">\n") - out_vcf.write("##INFO=<ID=SB,Number=1,Type=Integer,Description=\"Phred-scaled strand bias at this position\">\n") - out_vcf.write("##INFO=<ID=DP4,Number=4,Type=Integer,Description=\"Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases\">\n") - out_vcf.write(line) - else: - fields = line.strip().split() - if fields[0] in to_examine: - to_examine[fields[0]][int(fields[1])] = (fields[3], fields[4]) - else: - to_examine[fields[0]] = {int(fields[1]): (fields[3], fields[4])} - in_vcf.close() - data = {} - - # Populate data dictionary, which relates chromosome and position to the following: - # depth of coverage - # allele frequency - # forward strand allele frequency - # reverse strand allele frequency - # dp4 - depth of coverage of ref allele fwd strand, DOC of ref allele rev strand, DOC of alt allele fwd strand, DOC of alt allele rev strand - for line in in_mpileup: - fields = line.strip().split() - if fields[0] not in to_examine: - continue - if int(fields[1]) not in to_examine[fields[0]]: - continue - (ref_base, alt_base) = to_examine[fields[0]][int(fields[1])] - if len(ref_base.split(',')) > 1: # Can't handle multiple ref alleles - continue - if len(alt_base.split(',')) > 1: # Can't handle multiple alt alleles - continue - if len(ref_base) > 1 or len(alt_base) > 1: - if len(ref_base) > 1 and len(alt_base) > 1: # Can't handle complex indels - continue - data[(fields[0], int(fields[1]))] = parseIndelPileup(fields, ref_base, alt_base) - if len(ref_base) == 1 and len(alt_base) == 1: - data[(fields[0], int(fields[1]))] = parseSimpleSNPpileup(fields, ref_base, alt_base) - in_mpileup.close() - # Reopen vcf, this time, skip header, annotate all the sites for which there is an entry in data dictionary - # (Sites without entries have either multiple ref or alt bases, or have complex indels. Not supported (for now), and not reported as a result) - in_vcf = open(in_vcf_filepath, 'r') - for line in in_vcf: - if line[0] == '#': - continue - fields = line.strip().split('\t') - if (fields[0], int(fields[1])) not in data: - continue - (dp, af, faf, raf, dp4) = data[(fields[0], int(fields[1]))] - dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]] - _, p_val = fisher_exact(dp2x2) - sb = pval_to_phredqual(p_val) - if fields[7] == "": - info = [] - else: - info = fields[7].split(';') - info.append("DP=%d" % (dp)) - if isnan(af): - info.append("AF=NaN") - else: - info.append("AF=%.6f" % (af)) - if isnan(faf): - info.append("FAF=NaN") - else: - info.append("FAF=%.6f" % (faf)) - if isnan(raf): - info.append("RAF=NaN") - else: - info.append("RAF=%.6f" % (raf)) - info.append("SB=%d" % (sb)) - info.append("DP4=%s" % (','.join([str(x) for x in dp4]))) - new_info = ';'.join(info) - fields[7] = new_info - out_vcf.write("%s\n" % ("\t".join(fields))) - in_vcf.close() - out_vcf.close() - - -if __name__ == "__main__": - annotateVCF(sys.argv[1], sys.argv[2], sys.argv[3])
--- a/convert_VCF_info_fields.py Mon Mar 29 20:06:01 2021 +0000 +++ b/convert_VCF_info_fields.py Thu Apr 22 20:24:49 2021 +0000 @@ -11,7 +11,8 @@ from collections import OrderedDict from math import log10 -from scipy.stats import fisher_exact +import scipy +import scipy.stats def pval_to_phredqual(pval): @@ -69,7 +70,7 @@ for j, i in enumerate(range(2, len(sr_list), 2)): dp4 = (sr_list[ref_fwd], sr_list[ref_rev], sr_list[i], sr_list[i + 1]) dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]] - _, p_val = fisher_exact(dp2x2) + _, p_val = scipy.stats.fisher_exact(dp2x2) sb = pval_to_phredqual(p_val) as_ = (sc_list[ref_fwd], sc_list[ref_rev], sc_list[i], sc_list[i + 1])
--- a/macros.xml Mon Mar 29 20:06:01 2021 +0000 +++ b/macros.xml Thu Apr 22 20:24:49 2021 +0000 @@ -1,7 +1,7 @@ <?xml version="1.0"?> <macros> - <token name="@TOOL_VERSION@">1.0.3</token> - <token name="@PROFILE@">18.01</token> + <token name="@TOOL_VERSION@">1.3.2</token> + <token name="@PROFILE@">20.01</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">medaka</requirement> @@ -48,29 +48,32 @@ </xml> <xml name="model" token_argument="-m" token_label="Select model"> <param argument="@ARGUMENT@" type="select" label="@LABEL@"> - <option value="r10_min_high_g303">r10_min_high_g303</option> - <option value="r10_min_high_g340">r10_min_high_g340</option> <option value="r103_min_high_g345">r103_min_high_g345</option> <option value="r103_min_high_g360">r103_min_high_g360</option> <option value="r103_prom_high_g360">r103_prom_high_g360</option> <option value="r103_prom_snp_g3210">r103_prom_snp_g3210</option> <option value="r103_prom_variant_g3210">r103_prom_variant_g3210</option> + <option value="r10_min_high_g303">r10_min_high_g303</option> + <option value="r10_min_high_g340">r10_min_high_g340</option> <option value="r941_min_fast_g303">r941_min_fast_g303</option> <option value="r941_min_high_g303">r941_min_high_g303</option> <option value="r941_min_high_g330">r941_min_high_g330</option> <option value="r941_min_high_g340_rle">r941_min_high_g340_rle</option> <option value="r941_min_high_g344">r941_min_high_g344</option> <option value="r941_min_high_g351">r941_min_high_g351</option> - <option value="r941_min_high_g360">r941_min_high_g360</option> + <option value="r941_min_high_g360" selected="true">r941_min_high_g360</option> <option value="r941_prom_fast_g303">r941_prom_fast_g303</option> <option value="r941_prom_high_g303">r941_prom_high_g303</option> <option value="r941_prom_high_g330">r941_prom_high_g330</option> <option value="r941_prom_high_g344">r941_prom_high_g344</option> - <option value="r941_prom_high_g360" selected="true">r941_prom_high_g360</option> + <option value="r941_prom_high_g360">r941_prom_high_g360</option> + <option value="r941_prom_high_g4011">r941_prom_high_g4011</option> <option value="r941_prom_snp_g303">r941_prom_snp_g303</option> <option value="r941_prom_snp_g322">r941_prom_snp_g322</option> + <option value="r941_prom_snp_g360">r941_prom_snp_g360</option> <option value="r941_prom_variant_g303">r941_prom_variant_g303</option> <option value="r941_prom_variant_g322">r941_prom_variant_g322</option> + <option value="r941_prom_variant_g360">r941_prom_variant_g360</option> </param> </xml> <xml name="reference">
--- a/medaka_variant.xml Mon Mar 29 20:06:01 2021 +0000 +++ b/medaka_variant.xml Thu Apr 22 20:24:49 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="medaka_variant_pipeline" name="medaka variant pipeline" version="@TOOL_VERSION@+galaxy2" profile="@PROFILE@"> +<tool id="medaka_variant_pipeline" name="medaka variant pipeline" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> <description>via neural networks</description> <macros> <import>macros.xml</import> @@ -41,8 +41,8 @@ <expand macro="model" argument="-s" label="Select model for initial SNP calling from mixed reads prior to phasing"/> <expand macro="model" argument="-m" label="Select model for final variant calling from phased reads"/> <expand macro="b"/> - <param argument="-N" type="integer" value="14" label="Set threshold for filtering indels in final VCF"/> - <param argument="-P" type="integer" value="12" label="Set threshold for filtering SNPs in final VCF"/> + <param argument="-N" type="integer" value="9" label="Set threshold for filtering indels in final VCF"/> + <param argument="-P" type="integer" value="8" label="Set threshold for filtering SNPs in final VCF"/> <param argument="-U" type="boolean" truevalue="-U" falsevalue="" label="Avoid filtering of final VCF?"/> <param argument="-S" type="boolean" truevalue="-S" falsevalue="" label="Stop after initial SNP calling from mixed reads prior to phasing?"/> <param name="out" type="select" multiple="true" optional="false" label="Select out file(s)5"> @@ -105,21 +105,21 @@ <tests> <!-- #1 default --> <test> - <param name="i" value="alignment.bam"/> + <param name="i" value="medaka_test.bam"/> <conditional name="reference_source"> <param name="reference_source_selector" value="cached"/> - <param name="ref_file" value="bwa-mem-mt-genome"/> + <param name="ref_file" value="ref_fasta"/> </conditional> <param name="out" value="round_0_hap_mixed_probs.hdf,round_0_hap_mixed_unphased.vcf,log"/> <output name="out_round_0_hap_mixed_unphased_vcf"> <assert_contents> - <has_n_lines n="6"/> + <has_n_lines n="7"/> <has_line line="##fileformat=VCFv4.1"/> </assert_contents> </output> <output name="out_round_0_hap_mixed_probs_hdf"> <assert_contents> - <has_size value="32624"/> + <has_size value="108753" delta="100"/> </assert_contents> </output> <output name="out_log">
--- a/test-data/all_fasta.loc Mon Mar 29 20:06:01 2021 +0000 +++ b/test-data/all_fasta.loc Thu Apr 22 20:24:49 2021 +0000 @@ -1,1 +1,1 @@ -bwa-mem-mt-genome bwa-mem-mt-genome bwa-mem-mt-genome ${__HERE__}/bwa-mem-mt-genome.fa \ No newline at end of file +ref_fasta ref_fasta ref_fasta ${__HERE__}/ref.fasta \ No newline at end of file
--- a/test-data/bwa-mem-mt-genome.fa Mon Mar 29 20:06:01 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,238 +0,0 @@ ->gi|251831106|ref|NC_012920.1| -GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGG -GTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTC -CTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAAAGTGTGTTA -ATTAATTAATGCTTGTAGGACATAATAATAACAATTGAATGTCTGCACAGCCACTTTCCACACAGACATC -ATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCA -AACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTTTGGCGGTATGCAC -TTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATA -CAACCCCCGCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCC -AAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC -ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAA -GCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGC -AATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAA -ACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGA -TTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAACT -CACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAACAC -ACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATC -AACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATC -CCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATATA -CCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTC -AAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTAT -GAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGA -AGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCA -TTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTA -GCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTA -GCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGGCG -ATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAAAATTATAACCAAGCATA -ATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCC -AAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCA -AAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGAT -AGAATCTTAGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTC -CAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAG -TAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAAC -ATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAATGTTAGTATAAG -TAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCC -AATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA -AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGC -ATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAA -AGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCT -TACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTA -TGGAGCTTTAATTTATTAATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATT -AAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAG -TCAAAGCGAACTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACA -GCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCG -ATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGG -AGTAATCCAGGTCGGTTTCTATCTACNTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCT -ACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTATACCCACACCCACCCAAGA -ACAGGGTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAGTCAGAGGTTCAAT -TCCTCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCA -TTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCC -CCTACGGGCTACTACAACCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCAC -ATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACCCCC -CTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAG -CCGTTTACTCAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGC -AGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGGC -TCCTTTAACCTCTCCACCCTTATCACAACACAAGAACACCTCTGATTACTCCTGCCATCATGACCCTTGG -CCATAATATGATTTATCTCCACACTAGCAGAGACCAACCGAACCCCCTTCGACCTTGCCGAAGGGGAGTC -CGAACTAGTCTCAGGCTTCAACATCGAATACGCCGCAGGCCCCTTCGCCCTATTCTTCATAGCCGAATAC -ACAAACATTATTATAATAAACACCCTCACCACTACAATCTTCCTAGGAACAACATATGACGCACTCTCCC -CTGAACTCTACACAACATATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAAC -AGCATACCCCCGATTCCGCTACGACCAACTCATACACCTCCTATGAAAAAACTTCCTACCACTCACCCTA -GCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATAT -GTCTGATAAAAGAGTTACTTTGATAGAGTAAATAATAGGAGCTTAAACCCCCTTATTTCTAGGACTATGA -GAATCGAACCCATCCCTGAGAATCCAAAATTCTCCGTGCCACCTATCACACCCCATCCTAAAGTAAGGTC -AGCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTGGTTATACCCTTCCCGTACTAATTAATCCCCT -GGCCCAACCCGTCATCTACTCTACCATCTTTGCAGGCACACTCATCACAGCGCTAAGCTCGCACTGATTT -TTTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAAAAATAAACCCTC -GTTCCACAGAAGCTGCCATCAAGTATTTCCTCACGCAAGCAACCGCATCCATAATCCTTCTAATAGCTAT -CCTCTTCAACAATATACTCTCCGGACAATGAACCATAACCAATACTACCAATCAATACTCATCATTAATA -ATCATAATAGCTATAGCAATAAAACTAGGAATAGCCCCCTTTCACTTCTGAGTCCCAGAGGTTACCCAAG -GCACCCCTCTGACATCCGGCCTGCTTCTTCTCACATGACAAAAACTAGCCCCCATCTCAATCATATACCA -AATCTCTCCCTCACTAAACGTAAGCCTTCTCCTCACTCTCTCAATCTTATCCATCATAGCAGGCAGTTGA -GGTGGATTAAACCAAACCCAGCTACGCAAAATCTTAGCATACTCCTCAATTACCCACATAGGATGAATAA -TAGCAGTTCTACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATCCTAACTACTAC -CGCATTCCTACTACTCAACTTAAACTCCAGCACCACGACCCTACTACTATCTCGCACCTGAAACAAGCTA -ACATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTT -TGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCATCATCCCCACCATCATAGCCAC -CATCACCCTCCTTAACCTCTACTTCTACCTACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATA -TCTAACAACGTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCCCACACTCATCG -CCCTTACCACGCTACTCCTACCTATCTCCCCTTTTATACTAATAATCTTATAGAAATTTAGGTTAAATAC -AGACCAAGAGCCTTCAAAGCCCTCAGTAAGTTGCAATACTTAATTTCTGTAACAGCTAAGGACTGCAAAA -CCCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTAATTAAGCTAAGCCCTTACTAGACCAATGGGA -CTTAAACCCACAAACACTTAGTTAACAGCTAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCC -GCCGGGAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGA -AAATCACCTCGGAGCTGGTAAAAAGAGGCCTAACCCCTGTCTTTAGATTTACAGTCCAATGCTTCACTCA -GCCATTTTACCTCACCCCCACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCACAAAGACATTGG -AACACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCTCTAAGCCTCCTTATTCGAGCC -GAGCTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCACATCTACAACGTTATCGTCACAGCCCATGCAT -TTGTAATAATCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAACTGACTAGTTCCCCTAAT -AATCGGTGCCCCCGATATGGCGTTTCCCCGCATAAACAACATAAGCTTCTGACTCTTACCTCCCTCTCTC -CTACTCCTGCTCGCATCTGCTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTAG -CAGGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTCCTTACACCTAGCAGGTGTCTC -CTCTATCTTAGGGGCCATCAATTTCATCACAACAATTATCAATATAAAACCCCCTGCCATAACCCAATAC -CAAACGCCCCTCTTCGTCTGATCCGTCCTAATCACAGCAGTCCTACTTCTCCTATCTCTCCCAGTCCTAG -CTGCTGGCATCACTATACTACTAACAGACCGCAACCTCAACACCACCTTCTTCGACCCCGCCGGAGGAGG -AGACCCCATTCTATACCAACACCTATTCTGATTTTTCGGTCACCCTGAAGTTTATATTCTTATCCTACCA -GGCTTCGGAATAATCTCCCATATTGTAACTTACTACTCCGGAAAAAAAGAACCATTTGGATACATAGGTA -TGGTCTGAGCTATGATATCAATTGGCTTCCTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGG -AATAGACGTAGACACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCCACCGGCGTCAAA -GTATTTAGCTGACTCGCCACACTCCACGGAAGCAATATGAAATGATCTGCTGCAGTGCTCTGAGCCCTAG -GATTCATCTTTCTTTTCACCGTAGGTGGCCTGACTGGCATTGTATTAGCAAACTCATCACTAGACATCGT -ACTACACGACACGTACTACGTTGTAGCCCACTTCCACTATGTCCTATCAATAGGAGCTGTATTTGCCATC -ATAGGAGGCTTCATTCACTGATTTCCCCTATTCTCAGGCTACACCCTAGACCAAACCTACGCCAAAATCC -ATTTCACTATCATATTCATCGGCGTAAATCTAACTTTCTTCCCACAACACTTTCTCGGCCTATCCGGAAT -GCCCCGACGTTACTCGGACTACCCCGATGCATACACCACATGAAACATCCTATCATCTGTAGGCTCATTC -ATTTCTCTAACAGCAGTAATATTAATAATTTTCATGATTTGAGAAGCCTTCGCTTCGAAGCGAAAAGTCC -TAATAGTAGAAGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCCCCCACCCTACCACACATTCGA -AGAACCCGTATACATAAAATCTAGACAAAAAAGGAAGGAATCGAACCCCCCAAAGCTGGTTTCAAGCCAA -CCCCATGGCCTCCATGACTTTTTCAAAAAGGTATTAGAAAAACCATTTCATAACTTTGTCAAAGTTAAAT -TATAGGCTAAATCCTATATATCTTAATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCT -ATCATAGAAGAGCTTATCACCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCC -TGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGA -AACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTAC -ATAACAGACGAGGTCAACGATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTACTGAACCTACG -AGTACACCGACTACGGCGGACTAATCTTCAACTCCTACATACTTCCCCCATTATTCCTAGAACCAGGCGA -CCTGCGACTCCTTGACGTTGACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACA -TCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTC -TAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGC -AAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTT -ACCCTATAGCACCCCCTCTACCCCCTCTAGAGCCCACTGTAAAGCTAACTTAGCATTAACCTTTTAAGTT -AAAGATTAAGAGAACCAACACCTCTTTACAGTGAAATGCCCCAACTAAATACTACCGTATGGCCCACCAT -AATTACCCCCATACTCCTTACACTATTCCTCATCACCCAACTAAAAATATTAAACACAAACTACCACCTA -CCTCCCTCACCAAAGCCCATAAAAATAAAAAATTATAACAAACCCTGAGAACCAAAATGAACGAAAATCT -GTTCGCTTCATTCATTGCCCCCACAATCCTAGGCCTACCCGCCGCAGTACTGATCATTCTATTTCCCCCT -CTATTGATCCCCACCTCCAAATATCTCATCAACAACCGACTAATCACCACCCAACAATGACTAATCAAAC -TAACCTCAAAACAAATGATAACCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTT -AATCATTTTTATTGCCACAACTAACCTCCTCGGACTCCTGCCTCACTCATTTACACCAACCACCCAACTA -TCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCACAGTGATTATAGGCTTTCGCTCTAAGATTA -AAAATGCCCTAGCCCACTTCTTACCACAAGGCACACCTACACCCCTTATCCCCATACTAGTTATTATCGA -AACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGTACGCCTAACCGCTAACATTACTGCAGGCCAC -CTACTCATGCACCTAATTGGAAGCGCCACCCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATCA -TCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTCAC -ACTTCTAGTAAGCCTCTACCTGCACGACAACACATAATGACCCACCAATCACATGCCTATCATATAGTAA -AACCCAGCCCATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTCCGGCCTAGCCATGTGATT -TCACTTCCACTCCATAACGCTCCTCATACTAGGCCTACTAACCAACACACTAACCATATACCAATGATGG -CGCGATGTAACACGAGAAAGCACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATACG -GGATAATCCTATTTATTACCTCAGAAGTTTTTTTCTTCGCAGGATTTTTCTGAGCCTTTTACCACTCCAG -CCTAGCCCCTACCCCCCAATTAGGAGGGCACTGGCCCCCAACAGGCATCACCCCGCTAAATCCCCTAGAA -GTCCCACTCCTAAACACATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCACCATAGTCTAA -TAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTTTACTGGGTCTCTATTTTACCCT -CCTACAAGCCTCAGAGTACTTCGAGTCTCCCTTCACCATTTCCGACGGCATCTACGGCTCAACATTTTTT -GTAGCCACAGGCTTCCACGGACTTCACGTCATTATTGGCTCAACTTTCCTCACTATCTGCTTCATCCGCC -AACTAATATTTCACTTTACATCCAAACATCACTTTGGCTTCGAAGCCGCCGCCTGATACTGGCATTTTGT -AGATGTGGTTTGACTATTTCTGTATGTCTCCATCTATTGATGAGGGTCTTACTCTTTTAGTATAAATAGT -ACCGTTAACTTCCAATTAACTAGTTTTGACAACATTCAAAAAAGAGTAATAAACTTCGCCTTAATTTTAA -TAATCAACACCCTCCTAGCCTTACTACTAATAATTATTACATTTTGACTACCACAACTCAACGGCTACAT -AGAAAAATCCACCCCTTACGAGTGCGGCTTCGACCCTATATCCCCCGCCCGCGTCCCTTTCTCCATAAAA -TTCTTCTTAGTAGCTATTACCTTCTTATTATTTGATCTAGAAATTGCCCTCCTTTTACCCCTACCATGAG -CCCTACAAACAACTAACCTGCCACTAATAGTTATGTCATCCCTCTTATTAATCATCATCCTAGCCCTAAG -TCTGGCCTATGAGTGACTACAAAAAGGATTAGACTGAACCGAATTGGTATATAGTTTAAACAAAACGAAT -GATTTCGACTCATTAAATTATGATAATCATATTTACCAAATGCCCCTCATTTACATAAATATTATACTAG -CATTTACCATCTCACTTCTAGGAATACTAGTATATCGCTCACACCTCATATCCTCCCTACTATGCCTAGA -AGGAATAATACTATCGCTGTTCATTATAGCTACTCTCATAACCCTCAACACCCACTCCCTCTTAGCCAAT -ATTGTGCCTATTGCCATACTAGTCTTTGCCGCCTGCGAAGCAGCGGTGGGCCTAGCCCTACTAGTCTCAA -TCTCCAACACATATGGCCTAGACTACGTACATAACCTAAACCTACTCCAATGCTAAAACTAATCGTCCCA -ACAATTATATTACTACCACTGACATGACTTTCCAAAAAACACATAATTTGAATCAACACAACCACCCACA -GCCTAATTATTAGCATCATCCCTCTACTATTTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCCC -AACCTTTTCCTCCGACCCCCTAACAACCCCCCTCCTAATACTAACTACCTGACTCCTACCCCTCACAATC -ATGGCAAGCCAACGCCACTTATCCAGTGAACCACTATCACGAAAAAAACTCTACCTCTCTATACTAATCT -CCCTACAAATCTCCTTAATTATAACATTCACAGCCACAGAACTAATCATATTTTATATCTTCTTCGAAAC -CACACTTATCCCCACCTTGGCTATCATCACCCGATGAGGCAACCAGCCAGAACGCCTGAACGCAGGCACA -TACTTCCTATTCTACACCCTAGTAGGCTCCCTTCCCCTACTCATCGCACTAATTTACACTCACAACACCC -TAGGCTCACTAAACATTCTACTACTCACTCTCACTGCCCAAGAACTATCAAACTCCTGAGCCAACAACTT -AATATGACTAGCTTACACAATAGCTTTTATAGTAAAGATACCTCTTTACGGACTCCACTTATGACTCCCT -AAAGCCCATGTCGAAGCCCCCATCGCTGGGTCAATAGTACTTGCCGCAGTACTCTTAAAACTAGGCGGCT -ATGGTATAATACGCCTCACACTCATTCTCAACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTACT -ATCCCTATGAGGCATAATTATAACAAGCTCCATCTGCCTACGACAAACAGACCTAAAATCGCTCATTGCA -TACTCTTCAATCAGCCACATAGCCCTCGTAGTAACAGCCATTCTCATCCAAACCCCCTGAAGCTTCACCG -GCGCAGTCATTCTCATAATCGCCCACGGGCTTACATCCTCATTACTATTCTGCCTAGCAAACTCAAACTA -CGAACGCACTCACAGTCGCATCATAATCCTCTCTCAAGGACTTCAAACTCTACTCCCACTAATAGCTTTT -TGATGACTTCTAGCAAGCCTCGCTAACCTCGCCTTACCCCCCACTATTAACCTACTGGGAGAACTCTCTG -TGCTAGTAACCACGTTCTCCTGATCAAATATCACTCTCCTACTTACAGGACTCAACATACTAGTCACAGC -CCTATACTCCCTCTACATATTTACCACAACACAATGGGGCTCACTCACCCACCACATTAACAACATAAAA -CCCTCATTCACACGAGAAAACACCCTCATGTTCATACACCTATCCCCCATTCTCCTCCTATCCCTCAACC -CCGACATCATTACCGGGTTTTCCTCTTGTAAATATAGTTTAACCAAAACATCAGATTGTGAATCTGACAA -CAGAGGCTTACGACCCCTTATTTACCGAGAAAGCTCACAAGAACTGCTAACTCATGCCCCCATGTCTAAC -AACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCA -ACTCCAAATAAAAGTAATAACCATGCACACTACTATAACCACCCTAACCCTGACTTCCCTAATTCCCCCC -ATCCTTACCACCCTCGTTAACCCTAACAAAAAAAACTCATACCCCCATTATGTAAAATCCATTGTCGCAT -CCACCTTTATTATCAGTCTCTTCCCCACAACAATATTCATGTGCCTAGACCAAGAAGTTATTATCTCGAA -CTGACACTGAGCCACAACCCAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAATA -TTCATCCCTGTAGCATTGTTCGTTACATGGTCCATCATAGAATTCTCACTGTGATATATAAACTCAGACC -CAAACATTAATCAGTTCTTCAAATATCTACTCATCTTCCTAATTACCATACTAATCTTAGTTACCGCTAA -CAACCTATTCCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCATCAGTTGATGA -TACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATCCTATACAACCGTATCGGCGATATCGGTT -TCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAA -CGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCAGGCAAATCAGCCCAATTAGGT -CTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTA -TAGTTGTAGCAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCACTAATCCAAAC -TCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGCAGCAGTCTGCGCCCTTACACAAAATGACATC -AAAAAAATCGTAGCCTTCTCCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAAC -CACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTC -CATCATCCACAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTC -ACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGGTTTCTACTCCA -AAGACCACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGCCCTATCTATTACTCTCATCGC -TACCTCCCTGACAAGCGCCTATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCCC -ACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTAAACGCCTGGCAGCCGGAAGCC -TATTCGCAGGATTTCTCATTACTAACAACATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTA -CCTAAAACTCACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCAACTACCTAACC -AACAAACTTAAAATAAAATCCCCACTATGCACATTTTATTTCTCCAACATACTCGGATTCTACCCTAGCA -TCACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACCT -AACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCA -ACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAA -TCACATAACCTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAA -CTACTACTAATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTGA -CCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATAC -TCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCC -CTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCC -CCCTAAATAAATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCG -ACCACACCGCTAACAATCAATACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACC -CCATTACTAAACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGAC -CAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATACGCAAAACTAAC -CCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAA -ACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATGCACTA -CTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATC -ATCCGCTACCTTCACGCCAATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCC -TATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGC -AACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAAC -TTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACA -GTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCTTGCCCTTCATTATTGCAGCCCTAGCAACACT -CCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATC -ACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACAT -TAACACTATTCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCC -TCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTA -GGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAAC -AACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCT -AACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATAC -TTCACAACAATCCTAATCCTAATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTC -CTTGTAGTATAAACTAATACACCAGTCTTGTAAACCGGAGATGAAAACCTTTTTCCAAGGACAAATCAGA -GAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTC -ATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTACA -TTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCA -ATCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCA -ACTGCAACTCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAG -TACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCC -TCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCG -CTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTC -ATAAAGCCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATG