# HG changeset patch # User portiahollyoak # Date 1461341354 14400 # Node ID bcdd1a35e545010cd058a4f99fe85452e6e5d683 planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty diff -r 000000000000 -r bcdd1a35e545 genbank_to_fasta.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genbank_to_fasta.py Fri Apr 22 12:09:14 2016 -0400 @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# coding: utf-8 + +import argparse +import doctest # This will test if the functions are working + + +def get_id(line): + """ + This function reads a line and returns the ID name + + >>> line = 'ID TE standard; DNA; INV; 7411 BP.' + >>> 'TE'== get_id(line) + True + + """ + if line.startswith("ID"): + id = line.split(" ")[1] #split line into 'ID' and rest of line, take rest of line and define as id + id = id.split(" ")[0] #split id into 'ID name' and rest of line, take ID name and define as id + return id + + +def get_seq(line): + """ + This function reads a sequence line from a genbank file + and returns a sequence with no spaces or digits + + >>> line = "AGTGACATAT TCACATACAA AACCACATAA CATAGAGTAA ACATATTGAA AAGCCGCATA 60" + >>> 'AGTGACATATTCACATACAAAACCACATAACATAGAGTAAACATATTGAAAAGCCGCATA' == get_seq(line) + True + + """ + seq = [] + for char in line: + if not char.isdigit() and not char == " ": # If a character is not a digit or space, + # it will be added to sequence. + seq.append(char) + seq = "".join(seq) + return seq + + +def make_seq_dictionary(input_file_handle): + """ + This function loops over a multi genbank file and returns + a collection of ID and corresponding sequence in a dictionary. + """ + seq_d = {} # dictionary with id as key and sequence as value + next_line_is_seq = False + for line in input_file_handle: + line = line.strip() # strips any leading or trailing whitespace + if line.startswith("ID"): + id = get_id(line) + seq_d[id]="" # We just create a new key + if line.startswith("SQ"): + next_line_is_seq = True # If line starts with 'SQ' then state is true + continue + if line.startswith("//"): # If line starts with '//' then state is false + next_line_is_seq = False + if next_line_is_seq: # Whatever has been read as true, this is copied to file + seq = get_seq(line) + seq_d[id] += seq + return seq_d + + +def write_seq_d_to_file(seq_d, output): + """ + This function will write the sequence dictionary to an output file + """ + for transposon, seq in seq_d.items(): + output.write(">%s\n" % transposon) + output.write("%s\n" % seq) + +description = ( "This script will extract ID names and sequences from a multigenbank" + "file and format them into a multifasta file." ) + + +parser = argparse.ArgumentParser(description) +parser.add_argument("input", help="A multi-genbank file.") +parser.add_argument("output", help="Name of the output fasta file.") +args = parser.parse_args() + +try: + with open(args.input, encoding = "utf-8") as input_file_handle: + # This will perform the tasks + seq_d = make_seq_dictionary(input_file_handle) +except TypeError: + with open(args.input) as input_file_handle: + seq_d = make_seq_dictionary(input_file_handle) + +with open(args.output, "w") as output: + write_seq_d_to_file(seq_d, output) + diff -r 000000000000 -r bcdd1a35e545 genbank_to_fasta.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genbank_to_fasta.xml Fri Apr 22 12:09:14 2016 -0400 @@ -0,0 +1,58 @@ + + + + + + python $__tool_directory__/genbank_to_fasta.py "$input" "$output" + + + + + + + + + + + + + + +This tool converts a multigenbank file into a multifasta file. + +----- + +**Example multi genbank file** :: + + ID DME9736 standard; DNA; INV; 7411 BP. + XX + CC Derived from AJ009736 (e1371475) (Rel. 58, Last updated, Version 1). + CC Takis Benos and Michael Ashburner, 1-Feb-1999. + CC Any changes to original sequence record are annotated in an FT line. + XX + SQ Sequence 7411 BP; 3047 A; 1363 C; 1109 G; 1892 T; 0 other; + GTGACATATC CATAAGTCCC TAAGACTTAA GCATATGCCT ACATACTAAT ACACTTACAA 60 + CACATACACC CCAATACAAC ATACACTACT CCGGATGTAC CCAACAGATA CCAGATAAGA 120 + ATAAGATTGT TATATGATCC TCGAGAATGG AAAAAACCCC AATTCTAGAT AAGTCACCCA 180 + CTGGTAGACT AAACATCCGT CCCCTAATTT AAACAATTCC TTGCTTAAGC CTCACCCCAT 240 + // + ID DMIS176 standard; DNA; INV; 7439 BP. + XX + CC Derived from X01472 (g8142) (Rel. 36, Last updated, Version 2). + CC Takis Benos and Michael Ashburner, 20-Aug-1997. + CC Any changes to original sequence record are annotated in an FT line. + XX + SQ Sequence 7439 BP; 2985 A; 1512 C; 1048 G; 1894 T; 0 other; + AGTGACATAT TCACATACAA AACCACATAA CATAGAGTAA ACATATTGAA AAGCCGCATA 60 + CGTAAACAAT AAGTGACCAC CATGCTAATG TGGATCAAAT AACAAAAATA TCCACTCTGC 120 + // + +**Example output fasta file** :: + + >DME9736 + GTGACATATCCATAAGTCCCTAAGACTTAAGCATATGCCTACATACTAATACACTTACAA.... + >DMIS176 + AGTGACATATTCACATACAAAACCACATAACATAGAGTAAACATATTGAAAAGCCGCATA + + + diff -r 000000000000 -r bcdd1a35e545 test-data/genbank_input.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genbank_input.txt Fri Apr 22 12:09:14 2016 -0400 @@ -0,0 +1,388 @@ +ID DME9736 standard; DNA; INV; 7411 BP. +XX +AC AJ009736; +XX +DR FLYBASE; FBgn0026065; Idefix. +XX +FT source AJ009736:1..7411 +FT SO_feature five_prime_LTR ; SO:0000425:1..600 +FT SO_feature three_prime_LTR ; SO:0000426:6841..7411 +FT SO_feature CDS ; SO:0000316:<988..2031 +FT /name="Idefix\gag" +FT /db_xref="FLYBASE:FBgn0027381" +FT /db_xref="SPTREMBL:O96739" +FT /db_xref="NCBI_PROTEIN:CAA08806.1" +FT /translation="ARKLKDIMAVPQLSETHLNQLLNQIKELNYYDGAPGKLSGFVNQV +FT EQLLSLYPTQEARQAHVIYGAVKRLLVDSALEVVTQERANTWLDMKKALAMAFKDHRPY +FT VTLIRQLEDISYPGSICKFIEKLETQYWIMFDKLELESDHVDKSNYTEMLNKTVKSVID +FT RKLPDRIYMSLARKDIDTIYKLKQASMELGLYDAIPENHRSNRTEMNKRRNRGNYNQNN +FT NQKYYNNRNHNYSNYYPSMNQNHNTQPPQNPTQPMTNQNQYSPRFIPNNQRGNYYAFRR +FT DLTQAQQNNPLNNTLNFQPSTSNNINRQGPVKRQRESQSDQSRMDVNFHQAASDTQMIE +FT KDIQVPM" +FT SO_feature CDS ; SO:0000316:<1950..5402 +FT /name="Idefix\pol" +FT /db_xref="FLYBASE:FBgn0027380" +FT /db_xref="SPTREMBL:O96740" +FT /db_xref="NCBI_PROTEIN:CAA08807.1" +FT /translation="PKQDGCKFSSSCLGHSNDREGHTSPYVKIIHHNKNYKGMIDTGSS +FT INIIRENFENLEEKEENLIVYTIKGPITLKRSIIIKPTSVCPSAQKFYIHKFSDNYDFL +FT LGRKYLEDTKAKIDYANETVTLGSKVFKFLYEEKKGETASKCLDPQEKNDSALVDRTKP +FT KMQKVKTAPKCLKPKHQQQKKETALPKCLISNVVKDTVDNDVTHLDPMSVDNDIVNFAI +FT NNELRECNEYRLEHLNAEEVECLKKFLYEYRDIQYKEGENLTFTSTIKHVIQTQHEDPV +FT YRKPYKYPQSVDQEVNKQIKEMIEQGIVRKSKSPYCSPIWVVPKKADASGKQKFRLVVD +FT YRNLNEITVNDKFPIPRMDEILDKLGRCQYFTTIDLAKGFHQIQMDENSIAKTAFSTKH +FT GHYEYTRMPFGLKNAPATFQRCMNNLLEDLIYKDCLVYLDDIIVYSTPLEEHILSLKKV +FT FEKLRDANLKLQLDKCEFMKKETEFLGHIVTTNGIKPNPNKTKAITNFPLPKTPKQIKS +FT FLGLCGFYRKFIPNFAKIVKPMTLKLKKGAIIDTKCKEYIESFEKLKVLITSDPILIYP +FT DFSKPFSLTTDASNVAIGAVLSQNHKPVCYASRTLNEHEINYATIEKELLAIVWATKYF +FT RSYLFGRPFEVLSDHKPLVWLNNIKEPNMKLQRWKIKLNEFDYKIKYLPGKENHVADAL +FT SRTKIEVMVGEVANSADATIHSAIEDNLNYIPITERPINYFSRQIEIEKGDNDTTSVQH +FT LFQKLKIKIVYKEMTPELAKNLIKEYVCTKKSAIYFPNDEDFLIFQRAFTEIISPNNFT +FT KLLRCTTKLIDILTYAEFKDLILKKHKELLHPGIEKTINLFKEEYYYPDSQKLIQTIIN +FT ECQICYLAKTEHQTQMTYETTPEIFNTREKYMIDFYLTGNQIFLSCIDIYSKFASLVEL +FT KSRDWLEAKRAITKIFNDMGKPQEIKADKDSAFMCLALQNWLRSEGVQISISTSKNGIS +FT DIERFHKTVNEKLRIIGSQQNVEDRCTKFERILYIYNHKTKHNSTKRFPADIFLYAGSP +FT DFNVQQNKIDRIEYLNKNRHDFEVDIKYRQAPLVKSKITNPFKKTGRIGQVDDKHFEET +FT NRGRKIVHYKSKFKKQKKFNKSKYDNSRPTKEAQSTQHTSNNA" +FT SO_feature CDS ; SO:0000316:5248..6780 +FT /name="Idefix\env" +FT /db_xref="FLYBASE:FBgn0027382" +FT /db_xref="SPTREMBL:O96741" +FT /db_xref="NCBI_PROTEIN:CAA08808.1" +FT /translation="MINISKKQIVAGRSFTISQNLRNRKSLIRANMIIPDQPKKHKVHN +FT ILLIMLSCILSLIITVKCNNIEVNPVNAKNGYLIFQTGTMEIPTSYEYHYLSINITKTM +FT LMFEDIVSEANNYPNVPQIQYLVDKLKREINGLRIISRSKRGLLNVVGKAYKYLFGTLD +FT EDDREELEEKINNMSEDSVKTHDLNTILDVINSGIDIINKLKVDKEQHQQIAVLIFNLE +FT QFTEYIEDIELGLQLTRLGIFNPRLLKHDYLKHVNSEKMLKIKTSTWLKTDTNEILIIS +FT HIPSEVTKVPIFQIVPYPDEHNYILTEQIFDKFYIFDNQVFHKDTNRDIFDKCIIGIIK +FT QEQTQCKYIKTHKNYQINYIEPNILLTWNIPETAVNQDCTHNKILISGNNIIKIKNCTI +FT QIDEFLISNNLADFTQTIYITNNVTRLEPINHLQTREMIETHVKHYNFFQIICITTFVI +FT MIISLTLYVAYKFKNIPKKIIVNIVSKKNTRTLKIMSMKIFNKEIILPYTQI" +XX +CC Derived from AJ009736 (e1371475) (Rel. 58, Last updated, Version 1). +CC Takis Benos and Michael Ashburner, 1-Feb-1999. +CC Any changes to original sequence record are annotated in an FT line. +XX +SQ Sequence 7411 BP; 3047 A; 1363 C; 1109 G; 1892 T; 0 other; + GTGACATATC CATAAGTCCC TAAGACTTAA GCATATGCCT ACATACTAAT ACACTTACAA 60 + CACATACACC CCAATACAAC ATACACTACT CCGGATGTAC CCAACAGATA CCAGATAAGA 120 + ATAAGATTGT TATATGATCC TCGAGAATGG AAAAAACCCC AATTCTAGAT AAGTCACCCA 180 + CTGGTAGACT AAACATCCGT CCCCTAATTT AAACAATTCC TTGCTTAAGC CTCACCCCAT 240 + CGTCACATTC CCACGTTCAA AGCTCGGAGC CGCAATCCCG AAAAACAAAA GTATCGATTT 300 + CAATAAACAA ATTATAAGAA TCTAAGAGCA CTTGTATCCA AGAGCAAATG CACTTGAATC 360 + CAAGAGAAAC GCAAAGCTTT TTCTCTTTAC GATCAGAATC CTAAAGTCTA AAGTCCATAT 420 + TAGAAAAGCT CGATACCGAG GCTTGAACGT CAACCAAATC AGAATAATTA TCAGAGTTCA 480 + GTTTGAGACC TAATTGTAAA AGGTTCGGTG TTCTTCTCAA ATAAAAAGAT TGTAATCATT 540 + TAGTGAAATA AAAATTATAT TTTTTTCACT TATAAATATT GCAAGTATTT AATTGGCGCA 600 + GTCGGTTAGG ATCCAATAAA ATAAAAGAGT CCTTTTAGTA CGGTACTGAT CAACTGAAGG 660 + ATATGCTATA CGACTAGCTA TCCAAGATCA GCGAATTAAA ATAGTGATTC AAAAATATTT 720 + TTTAATCCGC AAAAGAATCT ACGTGAAAGT AGTATTCAAA ATAAAATCCC GTGCGGTCGG 780 + AAACAAAAAT TAATTTAAAT TTTTTAATTC CGAAACTTAA AACCAAGTTT AAAGAAAACT 840 + TAAAATCAAG AAAACTTAAA ACCAAGTTTA AAGAAAACTT AAAATCAAGA AAACTTAAAA 900 + CCAAGTTTAA AGAAAACTTA AAATCAAGAA AACTTAAAAC CAAGTTTAAA GAAAACTCAA 960 + AATCAAGAAA ACTTAAAGCC AAAATAAGCT AGAAAACTAA AAGACATCAT GGCAGTCCCA 1020 + CAACTCTCAG AAACACACCT AAACCAACTG CTAAACCAAA TCAAAGAATT AAACTACTAC 1080 + GATGGCGCAC CTGGCAAATT ATCTGGATTC GTCAACCAAG TGGAACAACT GCTCAGTTTA 1140 + TACCCAACAC AGGAAGCAAG ACAGGCACAC GTCATATATG GAGCAGTGAA GCGGTTATTA 1200 + GTGGATTCAG CCTTAGAAGT CGTAACCCAG GAAAGAGCTA ACACATGGCT GGACATGAAG 1260 + AAAGCACTGG CAATGGCATT CAAAGACCAT AGACCTTATG TAACTCTCAT CAGACAATTA 1320 + GAAGACATAT CATACCCAGG AAGTATCTGT AAGTTTATAG AAAAATTAGA AACACAATAC 1380 + TGGATTATGT TCGATAAGTT AGAATTAGAA AGTGACCATG TTGATAAATC GAATTATACC 1440 + GAAATGTTAA ACAAAACTGT TAAATCAGTA ATAGATCGAA AACTGCCGGA TAGAATTTAT 1500 + ATGTCTTTGG CACGTAAAGA TATTGATACA ATTTATAAAT TAAAACAAGC ATCAATGGAA 1560 + TTAGGCCTTT ATGATGCTAT TCCAGAAAAT CACCGTTCTA ATAGAACAGA AATGAATAAA 1620 + CGTAGGAACA GGGGAAACTA TAATCAAAAT AATAATCAAA AATATTACAA TAATAGAAAT 1680 + CACAACTACA GTAATTATTA TCCTAGCATG AATCAGAATC ATAATACACA ACCACCTCAG 1740 + AATCCGACTC AACCTATGAC AAATCAAAAC CAATATTCAC CGCGTTTCAT ACCGAATAAT 1800 + CAAAGAGGGA ATTATTATGC ATTTAGACGA GACTTAACAC AAGCTCAGCA GAACAACCCA 1860 + CTTAATAACA CCCTTAACTT CCAACCTTCG ACATCGAATA ATATTAACAG ACAAGGGCCA 1920 + GTAAAAAGAC AACGCGAGAG TCAGAGTGAC CAAAGCAGGA TGGATGTAAA TTTTCATCAA 1980 + GCTGCCTCGG ACACTCAAAT GATAGAGAAG GACATACAAG TCCCTATGTA AAAATAATTC 2040 + ATCATAATAA AAATTATAAG GGAATGATCG ATACAGGATC ATCAATTAAC ATCATAAGAG 2100 + AAAATTTTGA GAACTTAGAA GAAAAGGAAG AAAACCTAAT AGTATACACT ATTAAAGGAC 2160 + CAATAACACT AAAGAGAAGT ATAATAATAA AACCTACTTC AGTATGTCCG TCTGCTCAAA 2220 + AATTCTACAT TCACAAATTT TCTGATAACT ATGATTTCTT GTTAGGTCGA AAGTATTTAG 2280 + AAGATACAAA AGCTAAAATA GATTATGCTA ACGAAACAGT AACACTAGGC TCAAAAGTAT 2340 + TTAAGTTTCT CTATGAAGAA AAGAAGGGCG AGACCGCATC CAAATGCCTT GACCCACAAG 2400 + AAAAGAATGA TTCCGCTCTA GTGGACAGAA CCAAACCAAA AATGCAAAAG GTTAAGACCG 2460 + CACCTAAGTG CCTTAAACCA AAGCATCAAC AGCAGAAGAA AGAGACCGCA TTACCCAAAT 2520 + GCCTCATTTC AAATGTTGTT AAAGACACAG TGGACAATGA TGTAACACAT CTCGATCCCA 2580 + TGTCCGTTGA CAACGATATA GTCAACTTCG CGATTAACAA TGAGTTACGC GAATGTAACG 2640 + AGTATAGACT CGAACACTTA AATGCAGAGG AAGTTGAATG TTTAAAGAAG TTCCTATACG 2700 + AATATAGAGA CATTCAGTAC AAAGAGGGCG AAAATTTGAC CTTCACCAGT ACTATTAAAC 2760 + ATGTCATCCA GACTCAACAC GAAGACCCAG TATACCGTAA ACCCTACAAG TACCCTCAAA 2820 + GCGTTGACCA AGAAGTTAAC AAACAAATTA AAGAAATGAT AGAACAAGGG ATTGTTCGCA 2880 + AATCGAAGTC CCCTTATTGT TCTCCTATTT GGGTGGTCCC CAAGAAGGCA GACGCCTCTG 2940 + GGAAACAAAA ATTCAGGTTG GTAGTCGATT ACAGGAACCT AAATGAGATA ACTGTTAACG 3000 + ACAAATTTCC CATTCCCCGA ATGGATGAGA TATTGGACAA ACTAGGTAGA TGCCAATACT 3060 + TTACCACTAT AGATCTAGCC AAGGGTTTTC ACCAAATCCA AATGGATGAA AATTCTATTG 3120 + CAAAAACAGC TTTTTCAACT AAGCATGGGC ATTATGAATA TACTCGTATG CCCTTTGGTT 3180 + TAAAAAACGC TCCAGCTACT TTTCAGAGAT GCATGAATAA TCTTCTGGAA GATTTAATCT 3240 + ACAAAGACTG TTTAGTCTAT TTAGACGATA TTATTGTTTA TTCCACTCCA TTGGAAGAAC 3300 + ACATTTTATC CCTAAAGAAA GTCTTTGAAA AACTGAGAGA CGCTAATTTA AAGTTGCAAC 3360 + TAGATAAATG TGAATTCATG AAGAAAGAAA CTGAATTCCT AGGACACATC GTCACAACAA 3420 + ATGGCATCAA ACCAAATCCA AATAAAACTA AAGCAATTAC AAATTTTCCA TTACCCAAGA 3480 + CACCTAAGCA AATAAAATCA TTTTTGGGAT TATGTGGATT CTATCGCAAG TTTATTCCTA 3540 + ACTTTGCCAA AATAGTTAAA CCCATGACCC TCAAATTAAA GAAAGGTGCT ATAATAGACA 3600 + CCAAATGTAA AGAATACATC GAATCATTTG AAAAATTAAA AGTTTTGATA ACTTCAGACC 3660 + CGATATTAAT CTATCCTGAT TTTTCAAAAC CTTTTTCTTT GACAACTGAT GCTAGCAACG 3720 + TAGCTATTGG TGCAGTGTTA TCACAAAATC ACAAGCCAGT TTGTTATGCC AGTAGAACGC 3780 + TAAACGAACA TGAAATCAAC TATGCTACGA TTGAAAAAGA ATTGTTAGCT ATAGTTTGGG 3840 + CTACAAAATA TTTCAGGTCA TACTTATTCG GCAGACCATT TGAAGTATTA AGTGATCACA 3900 + AGCCACTGGT ATGGCTCAAC AACATTAAAG AACCAAACAT GAAATTGCAA AGATGGAAAA 3960 + TAAAACTTAA TGAATTCGAT TATAAAATCA AATATCTTCC AGGCAAAGAA AACCATGTCG 4020 + CGGATGCTCT TTCCCGCACG AAAATAGAAG TTATGGTTGG CGAGGTCGCA AATAGCGCAG 4080 + ACGCAACTAT ACACAGTGCC ATTGAAGATA ATCTAAATTA CATACCCATA ACAGAAAGAC 4140 + CAATAAATTA CTTCTCTAGA CAAATAGAGA TAGAAAAAGG CGATAACGAT ACAACAAGTG 4200 + TACAACATTT GTTTCAAAAA TTAAAGATTA AGATAGTCTA TAAAGAAATG ACACCTGAAC 4260 + TCGCCAAAAA CCTCATTAAG GAATATGTGT GCACCAAAAA GAGTGCAATT TATTTCCCTA 4320 + ATGACGAAGA TTTTCTGATC TTCCAGAGAG CGTTTACCGA AATTATAAGC CCTAACAATT 4380 + TCACAAAACT CTTGAGATGT ACCACAAAGT TAATTGATAT ACTAACGTAT GCAGAATTCA 4440 + AAGATTTAAT CTTAAAGAAA CATAAGGAAC TTTTACATCC GGGTATAGAA AAAACAATCA 4500 + ATTTATTTAA AGAAGAATAT TACTATCCTG ATAGTCAAAA GCTTATTCAA ACCATTATCA 4560 + ATGAATGTCA AATTTGTTAT CTAGCAAAAA CGGAACATCA AACACAAATG ACATATGAGA 4620 + CTACACCAGA AATATTTAAC ACAAGAGAAA AATACATGAT AGATTTTTAT CTCACAGGAA 4680 + ACCAGATCTT CTTATCTTGC ATTGATATCT ATTCGAAATT TGCATCACTA GTTGAATTAA 4740 + AAAGTAGAGA TTGGCTAGAA GCAAAAAGAG CCATTACTAA AATATTCAAT GACATGGGAA 4800 + AACCGCAAGA AATTAAAGCA GACAAAGACT CAGCTTTTAT GTGTTTAGCC TTACAAAATT 4860 + GGTTAAGATC TGAAGGTGTA CAAATTTCTA TAAGCACTAG CAAAAATGGT ATATCTGATA 4920 + TAGAAAGATT CCACAAGACC GTAAACGAAA AGCTAAGAAT CATTGGTAGC CAACAAAATG 4980 + TTGAAGATAG GTGCACAAAA TTCGAAAGAA TTCTATACAT ATACAATCAC AAAACTAAAC 5040 + ATAATAGTAC TAAAAGATTT CCAGCAGACA TTTTCCTATA TGCAGGCAGT CCAGATTTTA 5100 + ATGTACAACA AAACAAAATC GATAGGATAG AATACCTCAA TAAGAATAGA CACGATTTTG 5160 + AAGTTGATAT AAAATATAGA CAAGCCCCAC TTGTAAAAAG TAAAATAACC AATCCATTTA 5220 + AAAAGACAGG AAGAATTGGA CAAGTAGATG ATAAACATTT CGAAGAACAA AATCGTGGCA 5280 + GGAAGATCGT TCACTATAAG TCAAAATTTA AGAAACAGAA AAAGTTTAAT AAGAGCAAAT 5340 + ATGATAATTC CAGACCAACC AAAGAAGCAC AAAGTACACA ACATACTTCT AATAATGCTT 5400 + AGTTGCATAC TATCACTTAT CATCACGGTC AAGTGCAACA ATATAGAAGT AAATCCAGTA 5460 + AACGCGAAAA ATGGATACCT TATATTCCAA ACAGGAACAA TGGAAATTCC AACCAGCTAT 5520 + GAATACCATT ATTTAAGCAT AAACATAACA AAGACAATGC TCATGTTCGA AGATATAGTA 5580 + AGTGAAGCAA ACAACTATCC TAATGTACCA CAAATACAAT ATTTAGTCGA CAAATTAAAA 5640 + CGAGAAATAA ATGGGTTAAG AATTATTAGT CGAAGTAAAA GAGGTCTTTT AAACGTAGTA 5700 + GGAAAAGCAT ACAAATACTT ATTCGGCACA TTAGATGAGG ATGACAGAGA AGAGTTAGAA 5760 + GAAAAAATAA ACAACATGTC AGAAGACTCT GTAAAAACCC ATGACCTAAA CACGATTCTA 5820 + GATGTAATCA ATAGTGGTAT AGATATAATT AATAAGCTCA AAGTAGATAA AGAACAACAC 5880 + CAACAAATTG CGGTACTAAT ATTTAACCTA GAGCAATTTA CAGAATATAT AGAAGACATA 5940 + GAATTGGGTC TGCAATTAAC CAGACTAGGA ATTTTCAATC CAAGATTACT AAAGCATGAC 6000 + TATTTAAAAC ATGTAAATTC AGAAAAAATG CTAAAGATAA AAACGTCAAC CTGGCTTAAA 6060 + ACAGACACGA ACGAAATTTT GATTATTTCC CATATTCCTA GCGAAGTTAC TAAAGTTCCA 6120 + ATATTCCAAA TTGTTCCGTA CCCAGATGAA CATAATTATA TTCTAACCGA GCAAATATTC 6180 + GATAAATTCT ACATATTTGA TAACCAAGTA TTCCATAAAG ATACCAATAG GGATATATTC 6240 + GACAAATGTA TTATTGGAAT CATCAAACAA GAGCAAACTC AATGCAAATA TATTAAAACA 6300 + CATAAAAATT ACCAAATAAA TTATATAGAA CCAAATATAC TATTAACATG GAATATTCCT 6360 + GAAACAGCTG TTAACCAAGA CTGTACACAC AATAAAATAT TAATTTCAGG AAACAACATC 6420 + ATTAAAATTA AAAATTGTAC CATACAAATA GATGAATTCT TAATCTCTAA TAATCTAGCA 6480 + GACTTTACAC AAACAATTTA TATCACCAAC AATGTAACAC GTCTAGAACC AATAAATCAC 6540 + TTACAAACGA GAGAAATGAT AGAAACCCAT GTAAAACACT ATAACTTTTT TCAAATTATA 6600 + TGCATTACAA CGTTCGTCAT AATGATAATT AGTTTGACTC TGTATGTAGC ATATAAGTTT 6660 + AAAAATATAC CTAAGAAAAT TATTGTCAAT ATCGTAAGCA AAAAGAACAC ACGCACCTTG 6720 + AAAATAATGT CAATGAAAAT ATTCAACAAG GAAATAATAT TACCTTATAC CCAAATTTAA 6780 + CGACCTGAGG ACAGGCCAAA TTCAAAGGTT GGGGGAGTGA CATATCCATA AGTCCCTAAG 6840 + ACTTAAGCAT ATGCCTACAT ACTAATACAC TTACAACACA TACACCCCAA TACAACATAC 6900 + ACTACTCCGG ATGTACCCAA CAGATACCAG ATAAGAATAA GATTGTTATA TGATCCTCGA 6960 + GAATGGAAAA AACCCCAATT CTAGATAAGT CACCCACTGG TAGACTAAAC ATCCGTTCCC 7020 + CTAATTTAAA CAATTCCTTG CTTAAGCCTC ACCCCATCGT CACATTCCCA CGTTCAAAGC 7080 + TCGGAGCCGC AATCCCGAAA AACAAAAGTA TCGATTTCAA TAAACAAATT ATAAGAATCT 7140 + AAGAGCACTT GTATCCAAGA GCAAATGCAC TTGAATCCAA GAGAAACGCA AAGCTTTTTC 7200 + TCTTTACGAT CAGAATCCTA AAGTCTAAAG TCCATATTAG AAAAGCTCGA TACCGAGGCT 7260 + TGAACGTCAA CCAAATCAGA ATAATTATCA GAGTTCAGTT TGAGACCTAA TTGTAAAAGG 7320 + TTCGGTGTTC TTCTCAAATA AAAAGATTGT AATCATTTAG TGAAATAAAA ATTATATTTT 7380 + TTTCACTTAT AAATATTGCA AGTATTTAAT T 7411 +// +ID DMIS176 standard; DNA; INV; 7439 BP. +XX +AC X01472; J01060; J01061; +XX +DR FLYBASE; FBgn0000004; 17.6. +XX +FT source X01472:1..7439 +FT SO_feature five_prime_LTR ; SO:0000425:1..512 +FT SO_feature three_prime_LTR ; SO:0000426:6928..7439 +FT SO_feature TATA_box ; SO:0000174:372..377 +FT SO_feature TATA_box ; SO:0000174:7271..7277 +FT SO_feature primer_binding_site ; SO:0005850:511..529 +FT SO_feature polyA_signal_sequence ; SO:0000551:372..377 +FT SO_feature polyA_signal_sequence ; SO:0000551:7299.7304 +FT SO_feature RR_tract ; SO:0000435:6917..6927 +FT SO_feature CDS ; SO:0000316:1074..2393 +FT /name="17.6\gag" +FT /db_xref="FLYBASE:FBgn0044339" +FT /db_xref="SWISS-PROT:P04282" +FT /db_xref="NCBI_PROTEIN:CAA25701.1" +FT /translation="MAQEPAIVPPLSDSNMTQVAYQIGNVEKFNGDPGSLYTFVSRIDY +FT ILALYATGDERQQQIIFGHIERSISGEVMRCIGAYDMYTWQQLRRQLVLNYKPQTPNHV +FT LLEEFRKTPFRGNVRAFLEEAESRRQTLTSKLELEQDLEEKTFYLKLIKSSIESLIEKL +FT PTHIYLRINNHNIPDLRSLINLLQEKGMYEQINHTSTHVQKQNFSDKPQKSFNQNTNQS +FT NNIRKYPTPFLHYNSPIPYQAPQIYQTPPTNNPLYRHPIPYHPNPNNVFQPSQQNNVFQ +FT PSQQNNAFQPNQRTNFTSRPIFNTNRNNAFDQNRFGQQPQYQNQQSTQNSSSYVPNRPI +FT KRLRPANSGQTGMSVDETLYQEDAFYQQCVPYDYFYYPTYDHSDYYPENQYQIDENNQN +FT LQRTQQLQQINTDETNNDNQEPNVEQAENFQPQALENPNI" +FT SO_feature CDS ; SO:0000316:2345..5518 +FT /name="17.6\pol" +FT /db_xref="FLYBASE:FBgn0014453" +FT /db_xref="SWISS-PROT:P04323" +FT /db_xref="NCBI_PROTEIN:CAA25702.1" +FT /translation="TGRKFSATSLGKPQYITIKYKENNLKCLIDTGSTVNMTSKNIFDL +FT PIQNTSTFIHTSNGPLIVNKSIIIPSKILFPTTNEFLLHPFSENYDLLLGRKLLAEAKA +FT TISYRDQEVTLYNNKYKLIEGIATHEQSHFQNVNMIPDTMLRQPNKISPILESDLYRLE +FT HLNNEEKQRLCALLQKYHDIQYHEGDKLTFTNQTKHTINTKHNLPLYSKYSYPQAYEQE +FT VESQIQDMLNQGIIRTSNSPYNSPIWVVPKKQDASGKQKFRIVIDYRKLNEITVGDRHP +FT IPNMDEILGKLGRCNYFTTIDLAKGFHQIEMDPESVSKTAFSTKHGHYEYLRMPFGLKN +FT APATFQRCMNDILRPLLNKHCLVYLDDIIVFSTSLDEHLQSLGLVFEKLAKANLKLQLD +FT KCEFLKQETTFLGHVLTPDGIKPNPEKIEAIQKYPIPTKPKEIKAFLGLTGYYRKFIPN +FT FADIAKPMTKCLKKNMKIDTTNPEYDSAFKKLKYLISEDPILKVPDFTKKFTLTTDASD +FT VALGAVLSQDGHPLSYISRTLNEHEINYSTIEKELLAIVWATKTFRHYLLGRHFEISSD +FT HQPLSWLYRMKDPNSKLTRWRVKLSEFDFDIKYIKGKENCVADALSRIKLEETYLSEQT +FT QHSAEEDNSDLIFITERPLNTFNRQVIFSKGPPDIKVTKYFKKHITQIFYDIMTREKAE +FT QYLIDHFCGKKSALYIESDADFEVIQAAHKLAINTKYTKILRSTILLKNITTYAEFKEL +FT ILTAHEKLLHPGIQKTTKLFGETYYFPNSQLLIQNIINECSICNLAKTEHRNTDMPTKT +FT TPKPEHCREKFMIDIYSSEGKHYVSCIDIYSKFATLEEIKTKDWIECKNALMRIFNQLG +FT KPKLLKADRDGAFSSLALKRWLESEEVELQLNTTKTGVADIERLHKTINEKIRIIKTSD +FT DEETKLSKMETVLNIYNHKTKHDTTGQTPAHIFLYAGQPILDTQQNKENKINKINNDRV +FT EYEVDTRYRKGPLQKGKLENPFKPTKNVEQTDSDHYKITNRNRITHYYKTQFKKRKKNN +FT QLSISQAPGT" +FT SO_feature CDS ; SO:0000316:5488..6903 +FT /name="17.6\env" +FT /db_xref="FLYBASE:FBgn0027624" +FT /db_xref="SWISS-PROT:P04283" +FT /db_xref="NCBI_PROTEIN:CAA25703.1" +FT /translation="SALNFTGTWHLITLLLMLITTVHGQQIEINNIDTNHGYLLFSDKP +FT VQIPSSFEHHCLRINLTEIDTIADYFEQRLRTDYHAPQVKFLYNKMRRELAGIALRHRN +FT KRGLINIVGSVFKYLFGTLDENDRVDIQRKLETNAHNSVNLHELNDAIQLINDGMQKIQ +FT NYENNSNIINSLLYELMQFTEYIEDVEMGMQLSRLGLFNPKLLNYDKLENVNSQNILNI +FT KTSTWINYNDNQLLIISHIPINFSLINTVKIIPYPDSNGYQLEYTDTQSYFERENKVYN +FT NENKEINNECVTNIIKHLKPICNFESIHTDEIIKYIEPNTIVTWNLTQTSLKQNCQNSF +FT NNIKIKGNKMIKVTQCKIEINSIILSENLFKPEIDLTPLYTPLNITKIKTVKHNDINEM +FT ISQNNITLYIFMTTVIIILILLYLYLRYVSFNPFMMLYAKLKLRKNQNQNTAQQIEMED +FT VPLPLLYPSIPAQV" +XX +CC Derived from X01472 (g8142) (Rel. 36, Last updated, Version 2). +CC Takis Benos and Michael Ashburner, 20-Aug-1997. +CC Any changes to original sequence record are annotated in an FT line. +XX +SQ Sequence 7439 BP; 2985 A; 1512 C; 1048 G; 1894 T; 0 other; + AGTGACATAT TCACATACAA AACCACATAA CATAGAGTAA ACATATTGAA AAGCCGCATA 60 + CGTAAACAAT AAGTGACCAC CATGCTAATG TGGATCAAAT AACAAAAATA TCCACTCTGC 120 + ATTTTGACAC CCCCATACTG TATGCCATCT GCGCAGTATG CATTCTAATA AACAAATTCT 180 + TTGACAGCGG CACTTAGCCA TTCTTGTAAA CAAATCTTAA AGTCTGCCTG CTCTCTCTGA 240 + GGCTTCTCCT CCACTTAAGA ATCCAAGAGC AATGCTCTCC CAAAAACACT AACATATTCT 300 + TTAAGCAAGC ACAGAGGCTT CTCCTCATTT TCACTTTCAT TTGATTTTCA GTCTTAAGCT 360 + GAACGTTAAT CAATAAACAA CACAATCGAT ACCGAAATTT TGATTCGTTT TATTTTGGCA 420 + AAACTCAATT TTCAGCGTTG GTCTTAGTTC ATATTCGGAA CGGTCCATTT AATAGACTCA 480 + AAACTATTTA TTGCAACCAT TTATTTGCAA TTGGCGCAGT CGATGTGATC AGTGTTAAAG 540 + TTCCTTGATG CGGTAACCAG ATTTGCCAAT TCCTGTGTTC TTTTTGTTCT CTGACAAAAG 600 + TACCACGATA ACGGGCACCC ACGTGACGGT TAATATCGCT TTAAGTTTTT AATTAAACCT 660 + CGACAATAAA GTGAAACCGA AAAATCACAA TTTGCCTAAA CAAACCTGAA TTTATTATCA 720 + GGAAGACGCT ATTGAATTTG TGAGAGGCTG TAAATCCAAT TGGTTACCTC AAAGACCCAC 780 + GAAAAAGCTA TAGTGCAACC CTTGCGAAAA TCAAAACCTA TCTTAAAAAA AAAAAAAAAA 840 + TATAAATAAT AAATTAATAA GCGAAAATTA AAACGTATTA AAAGTAAGAA TAATAAATAA 900 + ATAAGTGAAA ATTCTATATG ATAAAAATTA AAAATAAGAA TAATAAATAA AAAGACAACA 960 + TTTTAAATTA AACAATATTA AAAAAATATA AAAATATTAA AAACTATATT AAAAAAAAAA 1020 + AAAAAACAAA AAAACAAAAA AAAAAAAATA AATAAATAAT CCAAAAATCA AAAATGGCTC 1080 + AAGAACCAGC AATTGTGCCA CCACTATCAG ACAGCAACAT GACCCAGGTT GCCTACCAGA 1140 + TTGGCAATGT GGAGAAATTC AACGGTGATC CAGGCTCACT ATACACCTTT GTGAGTCGAA 1200 + TTGATTACAT ACTGGCTCTT TATGCTACCG GAGATGAACG CCAACAGCAG ATCATATTTG 1260 + GGCATATTGA ACGCAGCATC AGCGGAGAAG TTATGCGCTG CATTGGAGCC TATGACATGT 1320 + ACACCTGGCA GCAGCTTAGA AGACAATTGG TACTCAACTA TAAACCCCAG ACCCCTAACC 1380 + ACGTTCTTTT AGAAGAGTTT CGAAAGACCC CATTTCGAGG CAATGTACGA GCATTCCTGG 1440 + AAGAAGCAGA AAGCCGCAGA CAAACACTTA CTAGTAAGCT TGAATTAGAG CAAGATCTTG 1500 + AAGAAAAGAC TTTTTATTTG AAATTAATAA AATCCAGTAT AGAATCACTA ATTGAAAAAT 1560 + TACCTACACA CATTTATTTA AGAATAAATA ACCACAACAT ACCAGATTTG CGATCACTTA 1620 + TAAACCTTTT ACAAGAGAAG GGCATGTACG AACAAATAAA TCATACAAGT ACACATGTCC 1680 + AAAAACAAAA TTTCTCTGAT AAGCCACAAA AGTCCTTTAA TCAAAATACT AATCAGTCTA 1740 + ACAATATCAG AAAATATCCA ACACCTTTCC TACATTATAA TTCACCAATA CCATATCAAG 1800 + CTCCACAAAT TTATCAAACA CCACCAACTA ATAACCCACT TTATCGTCAT CCAATACCCT 1860 + ACCACCCTAA TCCAAACAAT GTTTTTCAAC CAAGCCAACA AAACAATGTT TTCCAACCAA 1920 + GCCAACAAAA CAATGCTTTT CAACCAAATC AACGAACAAA CTTTACATCT CGACCAATTT 1980 + TTAACACCAA TCGAAACAAT GCATTCGATC AGAATAGGTT CGGACAACAA CCCCAATATC 2040 + AAAATCAACA ATCAACACAA AATTCAAGTT CCTATGTACC CAATCGACCA ATAAAACGAT 2100 + TAAGACCAGC TAATAGTGGA CAGACTGGGA TGAGTGTTGA CGAAACATTA TATCAAGAGG 2160 + ACGCTTTTTA TCAGCAGTGT GTTCCATATG ACTATTTTTA TTATCCAACT TACGACCATT 2220 + CAGACTATTA TCCAGAAAAT CAATATCAAA TTGACGAAAA CAACCAAAAT TTACAAAGAA 2280 + CACAACAGTT ACAGCAGATT AATACAGACG AGACAAACAA TGACAACCAA GAACCCAATG 2340 + TTGAACAGGC CGAAAATTTT CAGCCACAAG CCTTGGAAAA CCCCAATATA TAACAATTAA 2400 + ATACAAAGAA AATAATTTGA AATGCCTTAT TGATACCGGA TCAACAGTTA ACATGACATC 2460 + TAAAAATATA TTTGATTTAC CAATCCAGAA TACTAGTACT TTTATTCATA CCAGCAATGG 2520 + ACCGCTCATT GTCAACAAAA GTATAATCAT ACCTTCAAAG ATTTTGTTCC CAACAACAAA 2580 + TGAATTTTTA TTGCACCCTT TCTCTGAGAA TTACGATCTT TTATTAGGAA GAAAACTTTT 2640 + AGCAGAAGCA AAAGCAACAA TAAGTTACCG CGATCAAGAG GTAACTCTTT ACAACAACAA 2700 + ATACAAATTA ATAGAAGGAA TAGCAACACA TGAACAGAGT CATTTTCAAA ATGTAAATAT 2760 + GATACCTGAC ACCATGCTCA GACAGCCAAA TAAAATTTCA CCCATTTTAG AATCAGACCT 2820 + ATACAGATTG GAACATTTAA ATAACGAAGA AAAACAAAGA TTGTGCGCAC TCCTGCAGAA 2880 + ATACCATGAC ATACAGTACC ATGAAGGTGA TAAGTTGACA TTTACTAATC AAACCAAACA 2940 + TACTATCAAT ACAAAGCACA ATCTACCACT TTACTCTAAA TACAGTTACC CACAGGCTTA 3000 + TGAACAGGAG GTCGAAAGCC AAATACAAGA TATGCTAAAT CAAGGTATTA TACGTACCAG 3060 + TAATTCACCT TACAATAGCC CCATCTGGGT GGTTCCAAAG AAACAAGATG CATCAGGCAA 3120 + ACAGAAATTT AGAATTGTAA TAGACTACCG AAAATTAAAT GAAATAACAG TAGGAGACAG 3180 + ACACCCAATC CCAAACATGG ACGAAATCTT GGGAAAATTG GGCAGATGTA ATTACTTCAC 3240 + AACTATAGAC TTGGCAAAGG GTTTCCACCA GATCGAAATG GATCCAGAAT CAGTTTCAAA 3300 + GACAGCCTTT TCTACCAAGC ACGGTCATTA TGAATATTTG CGCATGCCAT TCGGATTAAA 3360 + AAACGCGCCA GCCACCTTTC AACGGTGCAT GAATGATATT TTAAGACCAC TCTTAAACAA 3420 + ACACTGTCTT GTGTATTTGG ACGACATAAT TGTATTCTCG ACATCCCTTG ATGAACACCT 3480 + GCAATCGCTC GGACTAGTTT TCGAAAAATT AGCAAAAGCC AACCTTAAAT TACAACTTGA 3540 + CAAATGTGAG TTTCTCAAGC AAGAAACCAC ATTTTTAGGA CATGTTCTAA CACCAGATGG 3600 + AATAAAACCA AACCCTGAAA AAATTGAAGC CATTCAAAAA TATCCAATTC CCACTAAACC 3660 + AAAAGAAATA AAAGCTTTTC TTGGACTGAC AGGATATTAT CGTAAATTTA TTCCAAACTT 3720 + TGCAGACATA GCCAAACCCA TGACTAAGTG TTTAAAAAAG AACATGAAAA TTGACACTAC 3780 + CAACCCAGAA TATGACTCTG CATTTAAAAA ATTAAAATAT CTAATATCAG AAGACCCAAT 3840 + TCTTAAAGTA CCCGACTTTA CAAAGAAATT CACTTTAACC ACAGACGCAA GTGATGTCGC 3900 + TTTGGGGGCA GTACTGTCAC AAGATGGACA CCCACTTAGC TACATTAGCC GAACACTTAA 3960 + TGAACACGAA ATAAATTACA GCACAATTGA AAAAGAACTC TTAGCAATTG TATGGGCGAC 4020 + AAAGACTTTT CGACACTACC TACTTGGAAG ACACTTTGAA ATATCCAGTG ACCATCAACC 4080 + ATTGAGCTGG TTGTACCGTA TGAAAGACCC AAATTCAAAA CTGACCCGAT GGAGAGTAAA 4140 + ATTATCCGAA TTCGATTTTG ATATAAAATA TATAAAAGGA AAAGAAAATT GCGTGGCGGA 4200 + TGCTCTGTCC AGAATAAAAC TTGAGGAGAC ATATTTGAGC GAACAAACCC AACATAGTGC 4260 + AGAAGAGGAC AATAGTGATT TAATTTTTAT TACAGAAAGA CCTCTAAATA CATTTAACAG 4320 + ACAAGTTATA TTTTCAAAAG GACCACCAGA CATTAAAGTT ACGAAATATT TCAAAAAACA 4380 + CATCACCCAA ATATTTTACG ACATTATGAC CAGGGAAAAA GCCGAACAAT ATTTGATAGA 4440 + CCATTTTTGT GGTAAGAAAA GTGCGTTGTA TATTGAGAGT GACGCTGATT TCGAAGTCAT 4500 + TCAAGCCGCA CATAAATTAG CCATAAACAC CAAATATACA AAAATCCTGC GTAGCACGAT 4560 + TTTGTTAAAA AACATAACCA CTTATGCGGA ATTTAAGGAA TTGATCTTGA CTGCTCATGA 4620 + AAAACTTCTA CACCCAGGCA TACAGAAAAC TACTAAACTT TTCGGAGAAA CTTACTATTT 4680 + CCCTAATAGC CAGCTACTTA TTCAGAATAT AATAAATGAG TGCAGTATTT GCAATCTGGC 4740 + AAAAACAGAG CACCGAAATA CAGACATGCC AACGAAAACC ACACCCAAAC CAGAACATTG 4800 + CCGCGAAAAA TTCATGATAG ACATTTACTC ATCCGAAGGC AAACATTACG TTAGTTGCAT 4860 + AGACATTTAT TCGAAATTTG CCACATTAGA AGAAATAAAA ACAAAAGACT GGATAGAATG 4920 + CAAAAACGCG CTTATGCGCA TATTCAACCA GCTTGGCAAG CCAAAGTTAC TAAAGGCGGA 4980 + CAGAGACGGC GCATTTTCCA GTTTAGCCCT CAAGAGATGG CTGGAGAGTG AGGAAGTCGA 5040 + ATTGCAGCTT AACACAACAA AAACTGGTGT GGCGGACATA GAAAGACTAC ATAAAACAAT 5100 + TAATGAAAAG ATTCGCATAA TCAAAACATC CGATGACGAA GAAACCAAAT TGAGCAAAAT 5160 + GGAAACAGTA CTTAACATAT ACAATCATAA AACCAAACAC GACACCACTG GACAGACCCC 5220 + TGCACACATA TTTCTCTACG CTGGACAACC AATATTAGAT ACCCAACAAA ACAAAGAAAA 5280 + CAAAATAAAC AAAATAAATA ATGACAGAGT GGAGTACGAA GTCGACACAA GATACAGAAA 5340 + AGGTCCACTA CAGAAAGGCA AATTAGAAAA TCCTTTTAAG CCAACAAAAA ATGTGGAGCA 5400 + GACTGACTCT GATCATTATA AAATTACTAA TAGAAATAGA ATTACTCACT ACTACAAAAC 5460 + ACAATTCAAA AAACGAAAGA AAAATAATCA GCTCTCAATT TCACAGGCAC CTGGCACTTG 5520 + ATAACATTGC TGCTGATGCT GATCACAACA GTTCATGGAC AACAAATTGA AATTAATAAT 5580 + ATTGACACAA ACCACGGATA TCTCCTTTTT TCTGATAAAC CAGTCCAGAT ACCATCATCC 5640 + TTTGAACATC ATTGCTTGAG AATCAATTTA ACTGAAATAG ACACCATAGC TGATTATTTT 5700 + GAGCAAAGAC TACGTACCGA CTACCATGCA CCCCAGGTCA AATTTTTATA CAACAAAATG 5760 + AGAAGAGAAC TAGCTGGAAT AGCCTTGCGA CATAGAAATA AACGGGGACT TATTAACATT 5820 + GTAGGTTCAG TTTTTAAATA CCTATTTGGC ACACTTGACG AAAATGATCG AGTGGATATA 5880 + CAGAGGAAAC TTGAAACAAA CGCCCATAAC TCGGTAAATT TACATGAACT CAATGACGCT 5940 + ATTCAATTAA TAAATGACGG AATGCAAAAG ATACAGAATT ATGAAAACAA CAGCAACATC 6000 + ATTAACAGTC TTTTATATGA ACTCATGCAG TTTACAGAAT ACATAGAAGA TGTGGAAATG 6060 + GGAATGCAGC TTTCCAGACT CGGTCTATTT AATCCCAAAC TACTAAACTA CGATAAACTT 6120 + GAGAATGTAA ACAGCCAAAA TATTTTAAAC ATTAAAACAT CCACTTGGAT TAATTACAAT 6180 + GATAACCAAT TATTAATCAT ATCTCACATA CCTATTAACT TTTCATTAAT AAATACAGTA 6240 + AAAATAATCC CTTACCCAGA CTCGAACGGC TATCAGCTAG AATACACAGA CACACAATCA 6300 + TATTTTGAAA GAGAAAATAA AGTTTACAAT AACGAAAATA AAGAAATAAA CAATGAGTGT 6360 + GTCACCAACA TTATTAAACA TTTAAAACCA ATTTGTAATT TTGAGTCAAT CCACACAGAT 6420 + GAAATAATAA AATACATAGA ACCAAACACA ATTGTAACCT GGAATTTAAC CCAAACAAGT 6480 + CTCAAACAAA ATTGTCAAAA TTCATTTAAT AATATAAAAA TAAAAGGAAA CAAAATGATA 6540 + AAAGTAACCC AATGTAAAAT AGAAATCAAT AGCATAATTC TAAGTGAAAA TCTCTTTAAA 6600 + CCAGAAATAG ATTTGACACC ATTATACACA CCACTTAACA TAACAAAAAT AAAAACTGTT 6660 + AAACACAACG ACATTAATGA AATGATTTCA CAAAACAATA TTACACTTTA CATATTTATG 6720 + ACTACTGTCA TCATTATACT TATTTTATTG TACTTATATT TAAGATACGT ATCATTTAAC 6780 + CCATTCATGA TGCTGTATGC AAAACTAAAA TTAAGAAAAA ATCAAAATCA AAACACAGCA 6840 + CAACAAATAG AAATGGAAGA CGTTCCATTA CCCCTACTAT ATCCATCAAT CCCAGCCCAA 6900 + GTATAGGCTT CTCTTTAAGG GAAGGGAAGT GACATATTCA CATACAAAAC CACATAACGT 6960 + AGAGTAAACA TATTGAAAAG CCGCATACGT CAACAATAAG TGACCACCAT GCTAATGTGG 7020 + ATCAAATAAC AAAAATATCC ACTCTGCATT TTGACACCCC CATACTGTAT GCCATCTGCG 7080 + CAGTATGCAT TCTAATAAAC AAATTCTTTG ACAGCGGCAC TTAGCCATTC TTGTAAACAA 7140 + ATCTTAAAGT CTGCCTGCTC TCTCTGAGGC TTCTCCTCCA CTTAAGAATC CAAGAGCAAT 7200 + GCTCTCCCAA AAACACTAAC ATATTCTTTA AGCAAGCACA GAGGCTTCTC CTCATTTTCA 7260 + CTTTCATTTG ATTTTCAGTC TTAAGCTGAA CGTTAATCAA TAAACAACAC AATCGATACC 7320 + GAAATTTTGA TTCGTTTTAT TTTGGCAAAA CTCAATTTTC AGCGTTGGTC TTAGTTCATA 7380 + TTCGGAACGG TCCATTTAAT AGACTCAAAA CTATTTATTG CAACCATTTA TTTGCAATT 7439 +// \ No newline at end of file diff -r 000000000000 -r bcdd1a35e545 test-data/output.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fasta Fri Apr 22 12:09:14 2016 -0400 @@ -0,0 +1,4 @@ +>DME9736 +GTGACATATCCATAAGTCCCTAAGACTTAAGCATATGCCTACATACTAATACACTTACAACACATACACCCCAATACAACATACACTACTCCGGATGTACCCAACAGATACCAGATAAGAATAAGATTGTTATATGATCCTCGAGAATGGAAAAAACCCCAATTCTAGATAAGTCACCCACTGGTAGACTAAACATCCGTCCCCTAATTTAAACAATTCCTTGCTTAAGCCTCACCCCATCGTCACATTCCCACGTTCAAAGCTCGGAGCCGCAATCCCGAAAAACAAAAGTATCGATTTCAATAAACAAATTATAAGAATCTAAGAGCACTTGTATCCAAGAGCAAATGCACTTGAATCCAAGAGAAACGCAAAGCTTTTTCTCTTTACGATCAGAATCCTAAAGTCTAAAGTCCATATTAGAAAAGCTCGATACCGAGGCTTGAACGTCAACCAAATCAGAATAATTATCAGAGTTCAGTTTGAGACCTAATTGTAAAAGGTTCGGTGTTCTTCTCAAATAAAAAGATTGTAATCATTTAGTGAAATAAAAATTATATTTTTTTCACTTATAAATATTGCAAGTATTTAATTGGCGCAGTCGGTTAGGATCCAATAAAATAAAAGAGTCCTTTTAGTACGGTACTGATCAACTGAAGGATATGCTATACGACTAGCTATCCAAGATCAGCGAATTAAAATAGTGATTCAAAAATATTTTTTAATCCGCAAAAGAATCTACGTGAAAGTAGTATTCAAAATAAAATCCCGTGCGGTCGGAAACAAAAATTAATTTAAATTTTTTAATTCCGAAACTTAAAACCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTCAAAATCAAGAAAACTTAAAGCCAAAATAAGCTAGAAAACTAAAAGACATCATGGCAGTCCCACAACTCTCAGAAACACACCTAAACCAACTGCTAAACCAAATCAAAGAATTAAACTACTACGATGGCGCACCTGGCAAATTATCTGGATTCGTCAACCAAGTGGAACAACTGCTCAGTTTATACCCAACACAGGAAGCAAGACAGGCACACGTCATATATGGAGCAGTGAAGCGGTTATTAGTGGATTCAGCCTTAGAAGTCGTAACCCAGGAAAGAGCTAACACATGGCTGGACATGAAGAAAGCACTGGCAATGGCATTCAAAGACCATAGACCTTATGTAACTCTCATCAGACAATTAGAAGACATATCATACCCAGGAAGTATCTGTAAGTTTATAGAAAAATTAGAAACACAATACTGGATTATGTTCGATAAGTTAGAATTAGAAAGTGACCATGTTGATAAATCGAATTATACCGAAATGTTAAACAAAACTGTTAAATCAGTAATAGATCGAAAACTGCCGGATAGAATTTATATGTCTTTGGCACGTAAAGATATTGATACAATTTATAAATTAAAACAAGCATCAATGGAATTAGGCCTTTATGATGCTATTCCAGAAAATCACCGTTCTAATAGAACAGAAATGAATAAACGTAGGAACAGGGGAAACTATAATCAAAATAATAATCAAAAATATTACAATAATAGAAATCACAACTACAGTAATTATTATCCTAGCATGAATCAGAATCATAATACACAACCACCTCAGAATCCGACTCAACCTATGACAAATCAAAACCAATATTCACCGCGTTTCATACCGAATAATCAAAGAGGGAATTATTATGCATTTAGACGAGACTTAACACAAGCTCAGCAGAACAACCCACTTAATAACACCCTTAACTTCCAACCTTCGACATCGAATAATATTAACAGACAAGGGCCAGTAAAAAGACAACGCGAGAGTCAGAGTGACCAAAGCAGGATGGATGTAAATTTTCATCAAGCTGCCTCGGACACTCAAATGATAGAGAAGGACATACAAGTCCCTATGTAAAAATAATTCATCATAATAAAAATTATAAGGGAATGATCGATACAGGATCATCAATTAACATCATAAGAGAAAATTTTGAGAACTTAGAAGAAAAGGAAGAAAACCTAATAGTATACACTATTAAAGGACCAATAACACTAAAGAGAAGTATAATAATAAAACCTACTTCAGTATGTCCGTCTGCTCAAAAATTCTACATTCACAAATTTTCTGATAACTATGATTTCTTGTTAGGTCGAAAGTATTTAGAAGATACAAAAGCTAAAATAGATTATGCTAACGAAACAGTAACACTAGGCTCAAAAGTATTTAAGTTTCTCTATGAAGAAAAGAAGGGCGAGACCGCATCCAAATGCCTTGACCCACAAGAAAAGAATGATTCCGCTCTAGTGGACAGAACCAAACCAAAAATGCAAAAGGTTAAGACCGCACCTAAGTGCCTTAAACCAAAGCATCAACAGCAGAAGAAAGAGACCGCATTACCCAAATGCCTCATTTCAAATGTTGTTAAAGACACAGTGGACAATGATGTAACACATCTCGATCCCATGTCCGTTGACAACGATATAGTCAACTTCGCGATTAACAATGAGTTACGCGAATGTAACGAGTATAGACTCGAACACTTAAATGCAGAGGAAGTTGAATGTTTAAAGAAGTTCCTATACGAATATAGAGACATTCAGTACAAAGAGGGCGAAAATTTGACCTTCACCAGTACTATTAAACATGTCATCCAGACTCAACACGAAGACCCAGTATACCGTAAACCCTACAAGTACCCTCAAAGCGTTGACCAAGAAGTTAACAAACAAATTAAAGAAATGATAGAACAAGGGATTGTTCGCAAATCGAAGTCCCCTTATTGTTCTCCTATTTGGGTGGTCCCCAAGAAGGCAGACGCCTCTGGGAAACAAAAATTCAGGTTGGTAGTCGATTACAGGAACCTAAATGAGATAACTGTTAACGACAAATTTCCCATTCCCCGAATGGATGAGATATTGGACAAACTAGGTAGATGCCAATACTTTACCACTATAGATCTAGCCAAGGGTTTTCACCAAATCCAAATGGATGAAAATTCTATTGCAAAAACAGCTTTTTCAACTAAGCATGGGCATTATGAATATACTCGTATGCCCTTTGGTTTAAAAAACGCTCCAGCTACTTTTCAGAGATGCATGAATAATCTTCTGGAAGATTTAATCTACAAAGACTGTTTAGTCTATTTAGACGATATTATTGTTTATTCCACTCCATTGGAAGAACACATTTTATCCCTAAAGAAAGTCTTTGAAAAACTGAGAGACGCTAATTTAAAGTTGCAACTAGATAAATGTGAATTCATGAAGAAAGAAACTGAATTCCTAGGACACATCGTCACAACAAATGGCATCAAACCAAATCCAAATAAAACTAAAGCAATTACAAATTTTCCATTACCCAAGACACCTAAGCAAATAAAATCATTTTTGGGATTATGTGGATTCTATCGCAAGTTTATTCCTAACTTTGCCAAAATAGTTAAACCCATGACCCTCAAATTAAAGAAAGGTGCTATAATAGACACCAAATGTAAAGAATACATCGAATCATTTGAAAAATTAAAAGTTTTGATAACTTCAGACCCGATATTAATCTATCCTGATTTTTCAAAACCTTTTTCTTTGACAACTGATGCTAGCAACGTAGCTATTGGTGCAGTGTTATCACAAAATCACAAGCCAGTTTGTTATGCCAGTAGAACGCTAAACGAACATGAAATCAACTATGCTACGATTGAAAAAGAATTGTTAGCTATAGTTTGGGCTACAAAATATTTCAGGTCATACTTATTCGGCAGACCATTTGAAGTATTAAGTGATCACAAGCCACTGGTATGGCTCAACAACATTAAAGAACCAAACATGAAATTGCAAAGATGGAAAATAAAACTTAATGAATTCGATTATAAAATCAAATATCTTCCAGGCAAAGAAAACCATGTCGCGGATGCTCTTTCCCGCACGAAAATAGAAGTTATGGTTGGCGAGGTCGCAAATAGCGCAGACGCAACTATACACAGTGCCATTGAAGATAATCTAAATTACATACCCATAACAGAAAGACCAATAAATTACTTCTCTAGACAAATAGAGATAGAAAAAGGCGATAACGATACAACAAGTGTACAACATTTGTTTCAAAAATTAAAGATTAAGATAGTCTATAAAGAAATGACACCTGAACTCGCCAAAAACCTCATTAAGGAATATGTGTGCACCAAAAAGAGTGCAATTTATTTCCCTAATGACGAAGATTTTCTGATCTTCCAGAGAGCGTTTACCGAAATTATAAGCCCTAACAATTTCACAAAACTCTTGAGATGTACCACAAAGTTAATTGATATACTAACGTATGCAGAATTCAAAGATTTAATCTTAAAGAAACATAAGGAACTTTTACATCCGGGTATAGAAAAAACAATCAATTTATTTAAAGAAGAATATTACTATCCTGATAGTCAAAAGCTTATTCAAACCATTATCAATGAATGTCAAATTTGTTATCTAGCAAAAACGGAACATCAAACACAAATGACATATGAGACTACACCAGAAATATTTAACACAAGAGAAAAATACATGATAGATTTTTATCTCACAGGAAACCAGATCTTCTTATCTTGCATTGATATCTATTCGAAATTTGCATCACTAGTTGAATTAAAAAGTAGAGATTGGCTAGAAGCAAAAAGAGCCATTACTAAAATATTCAATGACATGGGAAAACCGCAAGAAATTAAAGCAGACAAAGACTCAGCTTTTATGTGTTTAGCCTTACAAAATTGGTTAAGATCTGAAGGTGTACAAATTTCTATAAGCACTAGCAAAAATGGTATATCTGATATAGAAAGATTCCACAAGACCGTAAACGAAAAGCTAAGAATCATTGGTAGCCAACAAAATGTTGAAGATAGGTGCACAAAATTCGAAAGAATTCTATACATATACAATCACAAAACTAAACATAATAGTACTAAAAGATTTCCAGCAGACATTTTCCTATATGCAGGCAGTCCAGATTTTAATGTACAACAAAACAAAATCGATAGGATAGAATACCTCAATAAGAATAGACACGATTTTGAAGTTGATATAAAATATAGACAAGCCCCACTTGTAAAAAGTAAAATAACCAATCCATTTAAAAAGACAGGAAGAATTGGACAAGTAGATGATAAACATTTCGAAGAACAAAATCGTGGCAGGAAGATCGTTCACTATAAGTCAAAATTTAAGAAACAGAAAAAGTTTAATAAGAGCAAATATGATAATTCCAGACCAACCAAAGAAGCACAAAGTACACAACATACTTCTAATAATGCTTAGTTGCATACTATCACTTATCATCACGGTCAAGTGCAACAATATAGAAGTAAATCCAGTAAACGCGAAAAATGGATACCTTATATTCCAAACAGGAACAATGGAAATTCCAACCAGCTATGAATACCATTATTTAAGCATAAACATAACAAAGACAATGCTCATGTTCGAAGATATAGTAAGTGAAGCAAACAACTATCCTAATGTACCACAAATACAATATTTAGTCGACAAATTAAAACGAGAAATAAATGGGTTAAGAATTATTAGTCGAAGTAAAAGAGGTCTTTTAAACGTAGTAGGAAAAGCATACAAATACTTATTCGGCACATTAGATGAGGATGACAGAGAAGAGTTAGAAGAAAAAATAAACAACATGTCAGAAGACTCTGTAAAAACCCATGACCTAAACACGATTCTAGATGTAATCAATAGTGGTATAGATATAATTAATAAGCTCAAAGTAGATAAAGAACAACACCAACAAATTGCGGTACTAATATTTAACCTAGAGCAATTTACAGAATATATAGAAGACATAGAATTGGGTCTGCAATTAACCAGACTAGGAATTTTCAATCCAAGATTACTAAAGCATGACTATTTAAAACATGTAAATTCAGAAAAAATGCTAAAGATAAAAACGTCAACCTGGCTTAAAACAGACACGAACGAAATTTTGATTATTTCCCATATTCCTAGCGAAGTTACTAAAGTTCCAATATTCCAAATTGTTCCGTACCCAGATGAACATAATTATATTCTAACCGAGCAAATATTCGATAAATTCTACATATTTGATAACCAAGTATTCCATAAAGATACCAATAGGGATATATTCGACAAATGTATTATTGGAATCATCAAACAAGAGCAAACTCAATGCAAATATATTAAAACACATAAAAATTACCAAATAAATTATATAGAACCAAATATACTATTAACATGGAATATTCCTGAAACAGCTGTTAACCAAGACTGTACACACAATAAAATATTAATTTCAGGAAACAACATCATTAAAATTAAAAATTGTACCATACAAATAGATGAATTCTTAATCTCTAATAATCTAGCAGACTTTACACAAACAATTTATATCACCAACAATGTAACACGTCTAGAACCAATAAATCACTTACAAACGAGAGAAATGATAGAAACCCATGTAAAACACTATAACTTTTTTCAAATTATATGCATTACAACGTTCGTCATAATGATAATTAGTTTGACTCTGTATGTAGCATATAAGTTTAAAAATATACCTAAGAAAATTATTGTCAATATCGTAAGCAAAAAGAACACACGCACCTTGAAAATAATGTCAATGAAAATATTCAACAAGGAAATAATATTACCTTATACCCAAATTTAACGACCTGAGGACAGGCCAAATTCAAAGGTTGGGGGAGTGACATATCCATAAGTCCCTAAGACTTAAGCATATGCCTACATACTAATACACTTACAACACATACACCCCAATACAACATACACTACTCCGGATGTACCCAACAGATACCAGATAAGAATAAGATTGTTATATGATCCTCGAGAATGGAAAAAACCCCAATTCTAGATAAGTCACCCACTGGTAGACTAAACATCCGTTCCCCTAATTTAAACAATTCCTTGCTTAAGCCTCACCCCATCGTCACATTCCCACGTTCAAAGCTCGGAGCCGCAATCCCGAAAAACAAAAGTATCGATTTCAATAAACAAATTATAAGAATCTAAGAGCACTTGTATCCAAGAGCAAATGCACTTGAATCCAAGAGAAACGCAAAGCTTTTTCTCTTTACGATCAGAATCCTAAAGTCTAAAGTCCATATTAGAAAAGCTCGATACCGAGGCTTGAACGTCAACCAAATCAGAATAATTATCAGAGTTCAGTTTGAGACCTAATTGTAAAAGGTTCGGTGTTCTTCTCAAATAAAAAGATTGTAATCATTTAGTGAAATAAAAATTATATTTTTTTCACTTATAAATATTGCAAGTATTTAATT +>DMIS176 +AGTGACATATTCACATACAAAACCACATAACATAGAGTAAACATATTGAAAAGCCGCATACGTAAACAATAAGTGACCACCATGCTAATGTGGATCAAATAACAAAAATATCCACTCTGCATTTTGACACCCCCATACTGTATGCCATCTGCGCAGTATGCATTCTAATAAACAAATTCTTTGACAGCGGCACTTAGCCATTCTTGTAAACAAATCTTAAAGTCTGCCTGCTCTCTCTGAGGCTTCTCCTCCACTTAAGAATCCAAGAGCAATGCTCTCCCAAAAACACTAACATATTCTTTAAGCAAGCACAGAGGCTTCTCCTCATTTTCACTTTCATTTGATTTTCAGTCTTAAGCTGAACGTTAATCAATAAACAACACAATCGATACCGAAATTTTGATTCGTTTTATTTTGGCAAAACTCAATTTTCAGCGTTGGTCTTAGTTCATATTCGGAACGGTCCATTTAATAGACTCAAAACTATTTATTGCAACCATTTATTTGCAATTGGCGCAGTCGATGTGATCAGTGTTAAAGTTCCTTGATGCGGTAACCAGATTTGCCAATTCCTGTGTTCTTTTTGTTCTCTGACAAAAGTACCACGATAACGGGCACCCACGTGACGGTTAATATCGCTTTAAGTTTTTAATTAAACCTCGACAATAAAGTGAAACCGAAAAATCACAATTTGCCTAAACAAACCTGAATTTATTATCAGGAAGACGCTATTGAATTTGTGAGAGGCTGTAAATCCAATTGGTTACCTCAAAGACCCACGAAAAAGCTATAGTGCAACCCTTGCGAAAATCAAAACCTATCTTAAAAAAAAAAAAAAAATATAAATAATAAATTAATAAGCGAAAATTAAAACGTATTAAAAGTAAGAATAATAAATAAATAAGTGAAAATTCTATATGATAAAAATTAAAAATAAGAATAATAAATAAAAAGACAACATTTTAAATTAAACAATATTAAAAAAATATAAAAATATTAAAAACTATATTAAAAAAAAAAAAAAAACAAAAAAACAAAAAAAAAAAAATAAATAAATAATCCAAAAATCAAAAATGGCTCAAGAACCAGCAATTGTGCCACCACTATCAGACAGCAACATGACCCAGGTTGCCTACCAGATTGGCAATGTGGAGAAATTCAACGGTGATCCAGGCTCACTATACACCTTTGTGAGTCGAATTGATTACATACTGGCTCTTTATGCTACCGGAGATGAACGCCAACAGCAGATCATATTTGGGCATATTGAACGCAGCATCAGCGGAGAAGTTATGCGCTGCATTGGAGCCTATGACATGTACACCTGGCAGCAGCTTAGAAGACAATTGGTACTCAACTATAAACCCCAGACCCCTAACCACGTTCTTTTAGAAGAGTTTCGAAAGACCCCATTTCGAGGCAATGTACGAGCATTCCTGGAAGAAGCAGAAAGCCGCAGACAAACACTTACTAGTAAGCTTGAATTAGAGCAAGATCTTGAAGAAAAGACTTTTTATTTGAAATTAATAAAATCCAGTATAGAATCACTAATTGAAAAATTACCTACACACATTTATTTAAGAATAAATAACCACAACATACCAGATTTGCGATCACTTATAAACCTTTTACAAGAGAAGGGCATGTACGAACAAATAAATCATACAAGTACACATGTCCAAAAACAAAATTTCTCTGATAAGCCACAAAAGTCCTTTAATCAAAATACTAATCAGTCTAACAATATCAGAAAATATCCAACACCTTTCCTACATTATAATTCACCAATACCATATCAAGCTCCACAAATTTATCAAACACCACCAACTAATAACCCACTTTATCGTCATCCAATACCCTACCACCCTAATCCAAACAATGTTTTTCAACCAAGCCAACAAAACAATGTTTTCCAACCAAGCCAACAAAACAATGCTTTTCAACCAAATCAACGAACAAACTTTACATCTCGACCAATTTTTAACACCAATCGAAACAATGCATTCGATCAGAATAGGTTCGGACAACAACCCCAATATCAAAATCAACAATCAACACAAAATTCAAGTTCCTATGTACCCAATCGACCAATAAAACGATTAAGACCAGCTAATAGTGGACAGACTGGGATGAGTGTTGACGAAACATTATATCAAGAGGACGCTTTTTATCAGCAGTGTGTTCCATATGACTATTTTTATTATCCAACTTACGACCATTCAGACTATTATCCAGAAAATCAATATCAAATTGACGAAAACAACCAAAATTTACAAAGAACACAACAGTTACAGCAGATTAATACAGACGAGACAAACAATGACAACCAAGAACCCAATGTTGAACAGGCCGAAAATTTTCAGCCACAAGCCTTGGAAAACCCCAATATATAACAATTAAATACAAAGAAAATAATTTGAAATGCCTTATTGATACCGGATCAACAGTTAACATGACATCTAAAAATATATTTGATTTACCAATCCAGAATACTAGTACTTTTATTCATACCAGCAATGGACCGCTCATTGTCAACAAAAGTATAATCATACCTTCAAAGATTTTGTTCCCAACAACAAATGAATTTTTATTGCACCCTTTCTCTGAGAATTACGATCTTTTATTAGGAAGAAAACTTTTAGCAGAAGCAAAAGCAACAATAAGTTACCGCGATCAAGAGGTAACTCTTTACAACAACAAATACAAATTAATAGAAGGAATAGCAACACATGAACAGAGTCATTTTCAAAATGTAAATATGATACCTGACACCATGCTCAGACAGCCAAATAAAATTTCACCCATTTTAGAATCAGACCTATACAGATTGGAACATTTAAATAACGAAGAAAAACAAAGATTGTGCGCACTCCTGCAGAAATACCATGACATACAGTACCATGAAGGTGATAAGTTGACATTTACTAATCAAACCAAACATACTATCAATACAAAGCACAATCTACCACTTTACTCTAAATACAGTTACCCACAGGCTTATGAACAGGAGGTCGAAAGCCAAATACAAGATATGCTAAATCAAGGTATTATACGTACCAGTAATTCACCTTACAATAGCCCCATCTGGGTGGTTCCAAAGAAACAAGATGCATCAGGCAAACAGAAATTTAGAATTGTAATAGACTACCGAAAATTAAATGAAATAACAGTAGGAGACAGACACCCAATCCCAAACATGGACGAAATCTTGGGAAAATTGGGCAGATGTAATTACTTCACAACTATAGACTTGGCAAAGGGTTTCCACCAGATCGAAATGGATCCAGAATCAGTTTCAAAGACAGCCTTTTCTACCAAGCACGGTCATTATGAATATTTGCGCATGCCATTCGGATTAAAAAACGCGCCAGCCACCTTTCAACGGTGCATGAATGATATTTTAAGACCACTCTTAAACAAACACTGTCTTGTGTATTTGGACGACATAATTGTATTCTCGACATCCCTTGATGAACACCTGCAATCGCTCGGACTAGTTTTCGAAAAATTAGCAAAAGCCAACCTTAAATTACAACTTGACAAATGTGAGTTTCTCAAGCAAGAAACCACATTTTTAGGACATGTTCTAACACCAGATGGAATAAAACCAAACCCTGAAAAAATTGAAGCCATTCAAAAATATCCAATTCCCACTAAACCAAAAGAAATAAAAGCTTTTCTTGGACTGACAGGATATTATCGTAAATTTATTCCAAACTTTGCAGACATAGCCAAACCCATGACTAAGTGTTTAAAAAAGAACATGAAAATTGACACTACCAACCCAGAATATGACTCTGCATTTAAAAAATTAAAATATCTAATATCAGAAGACCCAATTCTTAAAGTACCCGACTTTACAAAGAAATTCACTTTAACCACAGACGCAAGTGATGTCGCTTTGGGGGCAGTACTGTCACAAGATGGACACCCACTTAGCTACATTAGCCGAACACTTAATGAACACGAAATAAATTACAGCACAATTGAAAAAGAACTCTTAGCAATTGTATGGGCGACAAAGACTTTTCGACACTACCTACTTGGAAGACACTTTGAAATATCCAGTGACCATCAACCATTGAGCTGGTTGTACCGTATGAAAGACCCAAATTCAAAACTGACCCGATGGAGAGTAAAATTATCCGAATTCGATTTTGATATAAAATATATAAAAGGAAAAGAAAATTGCGTGGCGGATGCTCTGTCCAGAATAAAACTTGAGGAGACATATTTGAGCGAACAAACCCAACATAGTGCAGAAGAGGACAATAGTGATTTAATTTTTATTACAGAAAGACCTCTAAATACATTTAACAGACAAGTTATATTTTCAAAAGGACCACCAGACATTAAAGTTACGAAATATTTCAAAAAACACATCACCCAAATATTTTACGACATTATGACCAGGGAAAAAGCCGAACAATATTTGATAGACCATTTTTGTGGTAAGAAAAGTGCGTTGTATATTGAGAGTGACGCTGATTTCGAAGTCATTCAAGCCGCACATAAATTAGCCATAAACACCAAATATACAAAAATCCTGCGTAGCACGATTTTGTTAAAAAACATAACCACTTATGCGGAATTTAAGGAATTGATCTTGACTGCTCATGAAAAACTTCTACACCCAGGCATACAGAAAACTACTAAACTTTTCGGAGAAACTTACTATTTCCCTAATAGCCAGCTACTTATTCAGAATATAATAAATGAGTGCAGTATTTGCAATCTGGCAAAAACAGAGCACCGAAATACAGACATGCCAACGAAAACCACACCCAAACCAGAACATTGCCGCGAAAAATTCATGATAGACATTTACTCATCCGAAGGCAAACATTACGTTAGTTGCATAGACATTTATTCGAAATTTGCCACATTAGAAGAAATAAAAACAAAAGACTGGATAGAATGCAAAAACGCGCTTATGCGCATATTCAACCAGCTTGGCAAGCCAAAGTTACTAAAGGCGGACAGAGACGGCGCATTTTCCAGTTTAGCCCTCAAGAGATGGCTGGAGAGTGAGGAAGTCGAATTGCAGCTTAACACAACAAAAACTGGTGTGGCGGACATAGAAAGACTACATAAAACAATTAATGAAAAGATTCGCATAATCAAAACATCCGATGACGAAGAAACCAAATTGAGCAAAATGGAAACAGTACTTAACATATACAATCATAAAACCAAACACGACACCACTGGACAGACCCCTGCACACATATTTCTCTACGCTGGACAACCAATATTAGATACCCAACAAAACAAAGAAAACAAAATAAACAAAATAAATAATGACAGAGTGGAGTACGAAGTCGACACAAGATACAGAAAAGGTCCACTACAGAAAGGCAAATTAGAAAATCCTTTTAAGCCAACAAAAAATGTGGAGCAGACTGACTCTGATCATTATAAAATTACTAATAGAAATAGAATTACTCACTACTACAAAACACAATTCAAAAAACGAAAGAAAAATAATCAGCTCTCAATTTCACAGGCACCTGGCACTTGATAACATTGCTGCTGATGCTGATCACAACAGTTCATGGACAACAAATTGAAATTAATAATATTGACACAAACCACGGATATCTCCTTTTTTCTGATAAACCAGTCCAGATACCATCATCCTTTGAACATCATTGCTTGAGAATCAATTTAACTGAAATAGACACCATAGCTGATTATTTTGAGCAAAGACTACGTACCGACTACCATGCACCCCAGGTCAAATTTTTATACAACAAAATGAGAAGAGAACTAGCTGGAATAGCCTTGCGACATAGAAATAAACGGGGACTTATTAACATTGTAGGTTCAGTTTTTAAATACCTATTTGGCACACTTGACGAAAATGATCGAGTGGATATACAGAGGAAACTTGAAACAAACGCCCATAACTCGGTAAATTTACATGAACTCAATGACGCTATTCAATTAATAAATGACGGAATGCAAAAGATACAGAATTATGAAAACAACAGCAACATCATTAACAGTCTTTTATATGAACTCATGCAGTTTACAGAATACATAGAAGATGTGGAAATGGGAATGCAGCTTTCCAGACTCGGTCTATTTAATCCCAAACTACTAAACTACGATAAACTTGAGAATGTAAACAGCCAAAATATTTTAAACATTAAAACATCCACTTGGATTAATTACAATGATAACCAATTATTAATCATATCTCACATACCTATTAACTTTTCATTAATAAATACAGTAAAAATAATCCCTTACCCAGACTCGAACGGCTATCAGCTAGAATACACAGACACACAATCATATTTTGAAAGAGAAAATAAAGTTTACAATAACGAAAATAAAGAAATAAACAATGAGTGTGTCACCAACATTATTAAACATTTAAAACCAATTTGTAATTTTGAGTCAATCCACACAGATGAAATAATAAAATACATAGAACCAAACACAATTGTAACCTGGAATTTAACCCAAACAAGTCTCAAACAAAATTGTCAAAATTCATTTAATAATATAAAAATAAAAGGAAACAAAATGATAAAAGTAACCCAATGTAAAATAGAAATCAATAGCATAATTCTAAGTGAAAATCTCTTTAAACCAGAAATAGATTTGACACCATTATACACACCACTTAACATAACAAAAATAAAAACTGTTAAACACAACGACATTAATGAAATGATTTCACAAAACAATATTACACTTTACATATTTATGACTACTGTCATCATTATACTTATTTTATTGTACTTATATTTAAGATACGTATCATTTAACCCATTCATGATGCTGTATGCAAAACTAAAATTAAGAAAAAATCAAAATCAAAACACAGCACAACAAATAGAAATGGAAGACGTTCCATTACCCCTACTATATCCATCAATCCCAGCCCAAGTATAGGCTTCTCTTTAAGGGAAGGGAAGTGACATATTCACATACAAAACCACATAACGTAGAGTAAACATATTGAAAAGCCGCATACGTCAACAATAAGTGACCACCATGCTAATGTGGATCAAATAACAAAAATATCCACTCTGCATTTTGACACCCCCATACTGTATGCCATCTGCGCAGTATGCATTCTAATAAACAAATTCTTTGACAGCGGCACTTAGCCATTCTTGTAAACAAATCTTAAAGTCTGCCTGCTCTCTCTGAGGCTTCTCCTCCACTTAAGAATCCAAGAGCAATGCTCTCCCAAAAACACTAACATATTCTTTAAGCAAGCACAGAGGCTTCTCCTCATTTTCACTTTCATTTGATTTTCAGTCTTAAGCTGAACGTTAATCAATAAACAACACAATCGATACCGAAATTTTGATTCGTTTTATTTTGGCAAAACTCAATTTTCAGCGTTGGTCTTAGTTCATATTCGGAACGGTCCATTTAATAGACTCAAAACTATTTATTGCAACCATTTATTTGCAATT