# HG changeset patch # User public-health-bioinformatics # Date 1573616856 18000 # Node ID c917ef6807d7dab73705e63ea109022ede2145c6 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/match_plasmid_to_reference commit 0f3fff91eb329adf437224eb8f7449853083b01e" diff -r 000000000000 -r c917ef6807d7 match_plasmid_to_reference.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/match_plasmid_to_reference.py Tue Nov 12 22:47:36 2019 -0500 @@ -0,0 +1,156 @@ +#!/usr/bin/env python + +from __future__ import division, print_function + +import argparse +import csv +import errno +import os +import re +import shutil + + +MOB_TYPER_FIELDNAMES = [ + "file_id", + "num_contigs", + "total_length", + "gc", + "rep_type(s)", + "rep_type_accession(s)", + "relaxase_type(s)", + "relaxase_type_accession(s)", + "mpf_type", + "mpf_type_accession(s)", + "orit_type(s)", + "orit_accession(s)", + "PredictedMobility", + "mash_nearest_neighbor", + "mash_neighbor_distance", + "mash_neighbor_cluster", + "NCBI-HR-rank", + "NCBI-HR-Name", + "LitRepHRPlasmClass", + "LitPredDBHRRank", + "LitPredDBHRRankSciName", + "LitRepHRRankInPubs", + "LitRepHRNameInPubs", + "LitMeanTransferRate", + "LitClosestRefAcc", + "LitClosestRefDonorStrain", + "LitClosestRefRecipientStrain", + "LitClosestRefTransferRate", + "LitClosestConjugTemp", + "LitPMIDs", + "LitPMIDsNumber", +] + + +def parse_mob_typer_report(mob_typer_report_path): + mob_typer_report = [] + + with open(mob_typer_report_path) as f: + reader = csv.DictReader(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES) + for row in reader: + mob_typer_report.append(row) + return mob_typer_report + + +def parse_genbank_accession(genbank_path): + with open(genbank_path, 'r') as f: + while True: + line = f.readline() + if line.startswith('ACCESSION'): + return line.strip().split()[1] + + +def parse_fasta_accession(fasta_path): + with open(fasta_path, 'r') as f: + while True: + line = f.readline() + if line.startswith('>'): + return line.strip().split()[0][1:] + + +def count_fasta_contigs(fasta_path): + contigs = 0 + with open(fasta_path, 'r') as f: + for line in f: + if line.startswith('>'): + contigs += 1 + return contigs + + +def count_fasta_bases(fasta_path): + bases = 0 + with open(fasta_path, 'r') as f: + for line in f: + line = line.strip() + if not line.startswith('>'): + bases += len(line) + return bases + + +def compute_fasta_gc_percent(fasta_path): + gc_count = 0 + total_bases_count = 0 + with open(fasta_path, 'r') as f: + for line in f: + if not line.startswith('>'): + line = line.strip() + line_c_count = line.count('c') + line.count('C') + line_g_count = line.count('g') + line.count('G') + line_total_bases_count = len(line) + gc_count += line_c_count + line_g_count + total_bases_count += line_total_bases_count + return 100 * (gc_count / total_bases_count) + + +def main(args): + + # create output directory + try: + os.mkdir(args.outdir) + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(args.outdir): + pass + else: + raise + + # parse mob_typer report + mob_typer_report = parse_mob_typer_report(args.mob_typer_report) + num_plasmid_contigs = count_fasta_contigs(args.plasmid) + num_plasmid_bases = count_fasta_bases(args.plasmid) + plasmid_gc_percent = compute_fasta_gc_percent(args.plasmid) + + with open(os.path.join(args.outdir, 'mob_typer_record.tsv'), 'w') as f: + mob_typer_record_writer = csv.DictWriter(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES) + mob_typer_record_writer.writeheader() + for record in mob_typer_report: + # match the plasmid against three properties in the MOB-Typer report: + # 1. number of contigs + # 2. total length of all contigs + # 3. G/C percent (within +/-0.1%) + if num_plasmid_contigs == int(record['num_contigs']) and \ + num_plasmid_bases == int(record['total_length']) and \ + abs(plasmid_gc_percent - float(record['gc'])) < 0.1: + for reference_plasmid in args.reference_plasmids_genbank: + if parse_genbank_accession(reference_plasmid) == record['mash_nearest_neighbor']: + shutil.copy2(reference_plasmid, os.path.join(args.outdir, "reference_plasmid.gbk")) + + for reference_plasmid in args.reference_plasmids_fasta: + if re.match(record['mash_nearest_neighbor'], parse_fasta_accession(reference_plasmid)) is not None: + shutil.copy2(reference_plasmid, os.path.join(args.outdir, "reference_plasmid.fasta")) + mob_typer_record_writer.writerow(record) + + shutil.copy2(args.plasmid, os.path.join(args.outdir, "plasmid.fasta")) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--plasmid", help="plasmid assembly (fasta)") + parser.add_argument("--reference_plasmids_genbank", nargs='+', help="reference plasmids (genbank)") + parser.add_argument("--reference_plasmids_fasta", nargs='+', help="reference plasmids (fasta)") + parser.add_argument("--mob_typer_report", help="mob_typer reports (tsv)") + parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory") + args = parser.parse_args() + main(args) diff -r 000000000000 -r c917ef6807d7 match_plasmid_to_reference.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/match_plasmid_to_reference.xml Tue Nov 12 22:47:36 2019 -0500 @@ -0,0 +1,57 @@ + + Pick plasmids containing specific genes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r c917ef6807d7 test-data/CP008719.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CP008719.fa Tue Nov 12 22:47:36 2019 -0500 @@ -0,0 +1,33 @@ +>CP008719.1 Escherichia coli strain ST648 plasmid pEC648_5, complete sequence +TAGATTTAAACGGTATCAAGTTTGGATTTTTAAGAACGCATTCTTAGTTCTGGAAAAGAGCCAGCGGCAG +GCTGAGGTGATAGGTACGAGATTGCATGCAATCTCTAGTGCTCTGTCTATCCTGCATTATCCTCAGCATT +ATCCTCAGCATTATCCTCAGCCTTGCCAACTCGACACCAATGCAGGATAGACAATCCGATGTCAAATGTT +AACACTCTGCGAGTGGTACATTTTCCCCGGATTATCGTCCTGAGCCTGCCGCTGGCTCTCTTTCTACCGC +CTCGCTTTGCTCGTTGCTCAACGCCTCACAGACACGGATTAAAATCCGCATCCGTTCACCGTTTTTTAAA +GTCCGTTAAAAGCATGATGCCATCTCCGAGAGTTAATCTCGTCAAATGCTAAATCGTGGGGGTCCCCTTT +GGGGTTCCGATTTAGTGATTGACGACACCACCGATTAAAAAACTTATGCGGGGTGGATGGTTTCACGAAG +TGAGGCCATCCACCTGTAAGACAGGGTTTTGTTTTTATTCCCTGTTTTGGTGATCGGGTGTGTGGAAAAG +GTTGGGGTAAGCCGTTCGGGGGTGCTTGTTTTGGGGGGTTAAAATTGTGGTTATTTTTTGCGCAATTCTC +GCGCGTGATCCTTGTATTTATACTTAAGGGATAAATGGCGGATATGAAATAGTGGTTTAGCCCAGTAATG +ACGAGGCTTTGAGTGGGTTTTGACAGGTCAAAGAAAATGGAGCAGAATTGAGGCGTTTTTAATCGGCGTT +GGGGAGTGCGTCAACACTCCCCAACATTTCGAATGTGTCACCTCAGCGGCAAACTCTGGTGACATGTACT +GGCTCGCAATGCACAGGTACGTGATGAATATACCACATCAAATCACAGCCTGCCCAGATCGGAGCAGGCT +TAATGTCAGAAGATAAATTCCTTTCGGACTACAGCCCCCGTGATGCAGTTTGGGATACCCAGCGCACGCT +TACCGATTCTGTCGGGGGTATCTACCAGACTGCTGCTGAATTCGAGCGCTATGCACTCCGTATGGCCTCC +TGTAGCGGTTTGTTACGTTTTGGTTGGTCTACCATCATGGAAACCGGAGAAACGCGCCTACGGCTTCGTA +GTGCGCAATTTTGCCGTGTCCGTCATTGCCCTGTCTGCCAGTGGAGAAGAACCCTCATGTGGCAAGCCCG +TTTTTATCAGGCTCTACCGAAAATCGTTGTGGATTACCCGTCTTCCCGATGGTTGTTTCTGACGTTAACT +GTCAGGAACTGCGAGATAGGTGAACTTGGAACAGTCCTTACAGCAATGAATGCGGCGTTTAAGCGAATGG +AAAAGCGAAAGGAGCTATCACCTGTTCAGGGGTGGATCAGGGCTACGGAGGTGACGCGAGGTAAGGATGG +CAGCGCACATCCGCATTTTCACTGTCTGCTGATGGTGCAACCTTCTTGGTTTAAAGGGAAGAACTACGTT +AAGCACGAACGTTGGGTAGAACTCTGGCGCGATTGCTTGCGGGTGAACTATGAGCCGAATATCGATATTC +GGGCAGTAAAAACTAAGACAGGTGAGGTTGTGGCCAACGTTGCCGAGCAACTGCAAAGCGCGGTTGCTGA +AACGCTGAAATACTCCGTTAAACCGGAAGATATGGCAAACGATCCTGAGTGGTTTCTTGAGCTGACGCGG +CAGCTTCACAAGCGCCGTTTTATCTCGACCGGTGGGGCGCTAAAAAACGTCCTCCAGTTGGATCGAGAAA +CCAATGAGGATCTTGTCATTGCCGACGATGTAGGGGATGGCACTGATGACGGGAAGCGGACGGCGTTTGT +CTGGGATTCAGGTAAACGGCGTTACAAACGCGCCCCTGAGAAGGATAAATCGGATTAACGTATGAATATT +AATATTGAATACCTGAATGGAAATAAGACTATTGGTTTATTTTTTTTAAGAAGTGAAGCGGTGATTCCTG +ACAGGTTTAAAAACCTTATTTTGCTTATTGATGGATTAAGTTTTGGCACATTTGGTTTTCATCCGCACGA +AGGTTTTGAGGATGAATTAATTTTATATATTCAGAAAACAAACGAGAGGGTAAAAACTCTTTTTGTGAAA +A + diff -r 000000000000 -r c917ef6807d7 test-data/CP008719.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CP008719.gbk Tue Nov 12 22:47:36 2019 -0500 @@ -0,0 +1,130 @@ +LOCUS CP008719 2101 bp DNA circular BCT 05-JAN-2016 +DEFINITION Escherichia coli strain ST648 plasmid pEC648_5, complete sequence. +ACCESSION CP008719 +VERSION CP008719.1 +DBLINK BioProject: PRJNA248607 + BioSample: SAMN02800875 +KEYWORDS . +SOURCE Escherichia coli + ORGANISM Escherichia coli + Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; + Enterobacteriaceae; Escherichia. +REFERENCE 1 (bases 1 to 2101) + AUTHORS Gao,Z. + TITLE Complete Genome Sequence of Multiple Antibiotic Resistant + Escherichia coli Isolated from Pleural Effusion of Patients with + Empyema Thoracis + JOURNAL Unpublished +REFERENCE 2 (bases 1 to 2101) + AUTHORS Gao,Z. + TITLE Direct Submission + JOURNAL Submitted (09-JUN-2014) Department of Respiratory and Critical care + medicine, Peking University People's hospital, Xizhimen South + Street, Beijing, Beijing 100101, China +COMMENT Annotation was added by the NCBI Prokaryotic Genome Annotation + Pipeline (released 2013). Information about the Pipeline can be + found here: http://www.ncbi.nlm.nih.gov/genome/annotation_prok/ + + ##Genome-Assembly-Data-START## + Assembly Method :: Newbler v. 2.3; Consed + Genome Coverage :: 40x; 350x + Sequencing Technology :: Roche 454 GS FLX; Illumina Hiseq 2000 + ##Genome-Assembly-Data-END## + + ##Genome-Annotation-Data-START## + Annotation Provider :: NCBI + Annotation Date :: 06/13/2014 09:01:46 + Annotation Pipeline :: NCBI Prokaryotic Genome Annotation + Pipeline + Annotation Method :: Best-placed reference protein set; + GeneMarkS+ + Annotation Software revision :: 2.6 (rev. 437579) + Features Annotated :: Gene; CDS; rRNA; tRNA; ncRNA; + repeat_region + Genes :: 4,807 + CDS :: 4,578 + Pseudo Genes :: 132 + CRISPR Arrays :: 2 + rRNAs :: 21 (5S, 16S, 23S) + tRNAs :: 72 + ncRNA :: 4 + Frameshifted Genes :: 116 + ##Genome-Annotation-Data-END## +FEATURES Location/Qualifiers + source 1..2101 + /organism="Escherichia coli" + /mol_type="genomic DNA" + /strain="ST648" + /db_xref="taxon:562" + /plasmid="pEC648_5" + gene join(1882..2101,1..47) + /locus_tag="FH07_00765" + CDS join(1882..2101,1..47) + /locus_tag="FH07_00765" + /inference="EXISTENCE: similar to AA + sequence:RefSeq:WP_001024536.1" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /codon_start=1 + /transl_table=11 + /product="hypothetical protein" + /protein_id="ALV71377.1" + /translation="MNINIEYLNGNKTIGLFFLRSEAVIPDRFKNLILLIDGLSFGTF + GFHPHEGFEDELILYIQKTNERVKTLFVKIDLNGIKFGFLRTHS" + gene 913..1878 + /locus_tag="FH07_00775" + CDS 913..1878 + /locus_tag="FH07_00775" + /inference="EXISTENCE: similar to AA + sequence:RefSeq:WP_012421200.1" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /codon_start=1 + /transl_table=11 + /product="Replication protein" + /protein_id="ALV71376.1" + /translation="MSEDKFLSDYSPRDAVWDTQRTLTDSVGGIYQTAAEFERYALRM + ASCSGLLRFGWSTIMETGETRLRLRSAQFCRVRHCPVCQWRRTLMWQARFYQALPKIV + VDYPSSRWLFLTLTVRNCEIGELGTVLTAMNAAFKRMEKRKELSPVQGWIRATEVTRG + KDGSAHPHFHCLLMVQPSWFKGKNYVKHERWVELWRDCLRVNYEPNIDIRAVKTKTGE + VVANVAEQLQSAVAETLKYSVKPEDMANDPEWFLELTRQLHKRRFISTGGALKNVLQL + DRETNEDLVIADDVGDGTDDGKRTAFVWDSGKRRYKRAPEKDKSD" +ORIGIN + 1 tagatttaaa cggtatcaag tttggatttt taagaacgca ttcttagttc tggaaaagag + 61 ccagcggcag gctgaggtga taggtacgag attgcatgca atctctagtg ctctgtctat + 121 cctgcattat cctcagcatt atcctcagca ttatcctcag ccttgccaac tcgacaccaa + 181 tgcaggatag acaatccgat gtcaaatgtt aacactctgc gagtggtaca ttttccccgg + 241 attatcgtcc tgagcctgcc gctggctctc tttctaccgc ctcgctttgc tcgttgctca + 301 acgcctcaca gacacggatt aaaatccgca tccgttcacc gttttttaaa gtccgttaaa + 361 agcatgatgc catctccgag agttaatctc gtcaaatgct aaatcgtggg ggtccccttt + 421 ggggttccga tttagtgatt gacgacacca ccgattaaaa aacttatgcg gggtggatgg + 481 tttcacgaag tgaggccatc cacctgtaag acagggtttt gtttttattc cctgttttgg + 541 tgatcgggtg tgtggaaaag gttggggtaa gccgttcggg ggtgcttgtt ttggggggtt + 601 aaaattgtgg ttattttttg cgcaattctc gcgcgtgatc cttgtattta tacttaaggg + 661 ataaatggcg gatatgaaat agtggtttag cccagtaatg acgaggcttt gagtgggttt + 721 tgacaggtca aagaaaatgg agcagaattg aggcgttttt aatcggcgtt ggggagtgcg + 781 tcaacactcc ccaacatttc gaatgtgtca cctcagcggc aaactctggt gacatgtact + 841 ggctcgcaat gcacaggtac gtgatgaata taccacatca aatcacagcc tgcccagatc + 901 ggagcaggct taatgtcaga agataaattc ctttcggact acagcccccg tgatgcagtt + 961 tgggataccc agcgcacgct taccgattct gtcgggggta tctaccagac tgctgctgaa + 1021 ttcgagcgct atgcactccg tatggcctcc tgtagcggtt tgttacgttt tggttggtct + 1081 accatcatgg aaaccggaga aacgcgccta cggcttcgta gtgcgcaatt ttgccgtgtc + 1141 cgtcattgcc ctgtctgcca gtggagaaga accctcatgt ggcaagcccg tttttatcag + 1201 gctctaccga aaatcgttgt ggattacccg tcttcccgat ggttgtttct gacgttaact + 1261 gtcaggaact gcgagatagg tgaacttgga acagtcctta cagcaatgaa tgcggcgttt + 1321 aagcgaatgg aaaagcgaaa ggagctatca cctgttcagg ggtggatcag ggctacggag + 1381 gtgacgcgag gtaaggatgg cagcgcacat ccgcattttc actgtctgct gatggtgcaa + 1441 ccttcttggt ttaaagggaa gaactacgtt aagcacgaac gttgggtaga actctggcgc + 1501 gattgcttgc gggtgaacta tgagccgaat atcgatattc gggcagtaaa aactaagaca + 1561 ggtgaggttg tggccaacgt tgccgagcaa ctgcaaagcg cggttgctga aacgctgaaa + 1621 tactccgtta aaccggaaga tatggcaaac gatcctgagt ggtttcttga gctgacgcgg + 1681 cagcttcaca agcgccgttt tatctcgacc ggtggggcgc taaaaaacgt cctccagttg + 1741 gatcgagaaa ccaatgagga tcttgtcatt gccgacgatg taggggatgg cactgatgac + 1801 gggaagcgga cggcgtttgt ctgggattca ggtaaacggc gttacaaacg cgcccctgag + 1861 aaggataaat cggattaacg tatgaatatt aatattgaat acctgaatgg aaataagact + 1921 attggtttat tttttttaag aagtgaagcg gtgattcctg acaggtttaa aaaccttatt + 1981 ttgcttattg atggattaag ttttggcaca tttggttttc atccgcacga aggttttgag + 2041 gatgaattaa ttttatatat tcagaaaaca aacgagaggg taaaaactct ttttgtgaaa + 2101 a +// + diff -r 000000000000 -r c917ef6807d7 test-data/JQ739157.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/JQ739157.fa Tue Nov 12 22:47:36 2019 -0500 @@ -0,0 +1,96 @@ +>JQ739157.2 Acinetobacter pittii strain ABCA95 plasmid pABCA95, complete sequence +AAGCTTATAGCAGTGTCACAGATGCGAAAAAGCAATTAAGTGCATATTTTGAGTTTTATAATTTGAAACG +ACCTCATTCGAGTCTAGACAAAATGACACCAAATGAGTTTTACTATGATCAGCTACCCCAACAAAACAAG +GTGGCTTAACTAGAGCGGAATATCACTTATAAATACGCTTTTAGTTGTTCAAACAAGTGGGACCACCTCT +CTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTTGATGGAATTGCCCAATATTATTCAACA +ATTTATTGGAAACAGCGTTTTAGAGCCAAATAAAATTGGTCAGTCGCCATCGGATGTTTATTCTTTTAAT +CGAAATAATGAAACTTTTTTTCTTAAGCGATCTAGCACTTTATATACAGAGACCACATACAGTGTCTCTC +GCGAAGCGAAAATGTTGAGTTGGCTCTCTGAGAAATTAAAGGTGCCTGAACTCATCATGACTTTTCAGGA +TGAGCAGTTTGAATTAATGATCACTAAAGCGATCAATGCAAAACCAATTTCAGCGCTTTTTTTAACAGAC +CAAGAATTGCTTGCTATCTATAAGGAGGCACTCAATCTGTTAAATTCAGTTGCTATTATTGATTGTCCAT +TTATTTCAAACATTGATCATCGGTTAAAAGAGTCAAAATTTTTTATTGATAACCAACTCCTTGACGATAT +AGATCAAGATGATTTTGACGCTGAATTATGGGGAGACCATAGAACTTACCTAAGTCTATGGAATGAGTTA +ACTGAGACTCGTGTTGAAGAAAGATTGGTTTTTTCTCATGGCGATATCACGGATAGTAATATTTTTATAG +ATAAATTCAATGAAATTTACTTTTTAGATCTTGGCCGTGCTGGGTTAGCTGATGAATTTGTAGATATATC +CTTTGTTGAACGTTGCCTAAGAGAGGATGCCTCGGAGGAAACTGCTAAAATATTTTTAAAGCATTTAAAA +AATGATAGACCTGACAAAAGGAATTATTTTTTAAAACTTGATGAATTGAATTGATTCTAAGCATTATCTA +AAAATACTTAATTGTCTTTTAACGTCGCTAAATTTTAAATAAATAAGTGAAGAGTGTTAGTGGAGCCACT +GATTTAAAGTTGGCAGAGTAAAACTTGAAGTGCGACATAAACCACCTAATTAATTTAAAGGGTTTATGGA +GTATATAAAATTGTCATACCATCATCTTAACTTTGAAGATCGTACTGCATTAATGCTTGAGTCAAGAAAA +GAAGGCTTTTCAGCCAGAAAATTTGCTGAACTCATTAAAAGACATCCTAGTACGATCTATCGTGAGCTTA +AAAGAAATAGCATCAATGACGTTTATCAAGCTCGATATGCTTCTGATAACACCTTCGCTAGACGTAGACG +TGGTCACAGAAAACTCAAAATCGATTCAATCCTCTGGAAATTTATTGTTGAAGCGATCCGTTGTTTATGG +TCTCCTCAGCAAATAGCAAAGCGTTTAAAGACATTTCCTGATTTGGATCAAACAATGAATGTAAGCCATA +CAACGATTTATTCAACGATACGAGCATTACCAAAGGGTGAGTTGAAAAAAGACTTATTATCCTGTCTACG +TCATGAAAATAAAAAGCGAAAAGCTAACGGTGAACCTAAAAAAGATTCTATATTACAGGATATTAAAACT +ATTCATGAGCGCCCAGCCGAAGTTCAAGAAAGAAAAATACCGGGTCATTGGGAAGCTGATTTAATTAAAG +GTAAAGACAATAAAAGTTCGATAGCAACACTTATTGAACGAAATACACGGCTCTGTATCTTGGCAACATT +ACCTGATGCAAAGGCAGAATCAGTGCGCAAGGCTTTAACTGAAGCTCTGAAATATTTACCTGCAGAACTG +CGTAAAACGTTGACCTATGACCGTGGACGTGAGATGTCAGAACATAAAATACTCGAAGAAGATTTAGGCA +TAGATGTATATTTCTGTGACCCACATTCACCCTGGCAAAAAGGCACATGCGAAAATATGAATGGTTTAAT +TAGGCAATATTTACCTAAAGGGATTGATTTAAATCAGGCAGATCAGCATTATTTAAATCAAGTTGCCATG +TCACTGAATACTCGTCCTAGAAAGGCGTTAGATTGGCTTACACCATTAGAGAAATTTGCTCAGCTTGTTG +ATTATCATATGGCTTTTGAAACTGTCGCACCTCATGTTTGAATTCGCCCCATATTTTTGCTACAGTGAAC +CAAATTAAGATCATCTATTTACTAGGCCTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTT +GATGGAATTGCCCAATATTATGCACCCGGTCGCGAAGCTGAGCACCGCATTAGCCGCTGCATTGATGCTG +AGCGGGTGCATGCCCGGTGAAATCCGCCCGACGATTGGCCAGCAAATGGAAACTGGCGACCAACGGTTTG +GCGATCTGGTTTTCCGCCAGCTCGCACCGAATGTCTGGCAGCACACTTCCTATCTCGACATGCCGGGTTT +CGGGGCAGTCGCTTCCAACGGTTTGATCGTCAGGGATGGCGGCCGCGTGCTGGTGGTCGATACCGCCTGG +ACCGATGACCAGACCGCCCAGATCCTCAACTGGATCAAGCAGGAGATCAACCTGCCGGTCGCGCTGGCGG +TGGTGACTCACGCGCATCAGGACAAGATGGGCGGTATGGACGCGCTGCATGCGGCGGGGATTGCGACTTA +TGCCAATGCGTTGTCGAACCAGCTTGCCCCGCAAGAGGGGATGGTTGCGGCGCAACACAGCCTGACTTTC +GCCGCCAATGGCTGGGTCGAACCAGCAACCGCGCCCAACTTTGGCCCGCTCAAGGTATTTTACCCCGGCC +CCGGCCACACCAGTGACAATATCACCGTTGGGATCGACGGCACCGACATCGCTTTTGGTGGCTGCCTGAT +CAAGGACAGCAAGGCCAAGTCGCTCGGCAATCTCGGTGATGCCGACACTGAGCACTACGCCGCGTCAGCG +CGCGCGTTTGGTGCGGCGTTCCCCAAGGCCAGCATGATCGTGATGAGCCATTCCGCCCCCGATAGCCGCG +CCGCAATCACTCATACGGCCCGCATGGCCGACAAGCTGCGCTGAGCCATGGCTGACCACGTCACCCCCAA +TCTGCCATCGCGCGATTTCGATGTGACAGAGGCGTTTTATGCGAAGCTGGGCTTTGCGACGAGTTGGAAG +GATCGCGGCTGGATGATCCTGCAGCGCGGCGGTTTGCAGCTCGAATTCTTCCCCTATCCTGACCTCGACC +CAGCTACGAGCTCGTTCGGCTGTTGCCTGCGGTTGGATGATCTCGATGCCATGGTGGCATTGGTGAACGC +GGCGGGAGCCGAGGAAAAAAGCACCGGCTGGCCGCGCTTCAAAGCTCCGCAACTGGAGGCGAGCGGCCTG +AGGATCGGCTACCTGATCGATCCCGACTGCACGCTGGTGCGGCTGATCCAGAACCCCGACTGACCGCATG +CCCGCGAAAATCAAGATTTGCGGGATCAGCACACCCGAGGCGCTCGATGCGACCATCGCGGCGCGGGCGG +ACTATGCCGGGTTGGTGTTCTATCCAGCGTCGCCCCGTGCGGTTACGTCGAATGTCGCGGGCGCTTTGAC +ATCGCGCGCAGCTGGCCAGATCGCCATGGTCGGTTTGTTCGTCGATGCGGATGATGCTGTCATCGCCGAC +GCACTGGTGGCAGCCAAGCTGAACGCGCTGCAGCTGCACGGTTCGGAATCGCCCGAACGCGTGGCCCAGT +TGCGCGCGCGGTTTGGCAAGCCGGTGTGGAAGGCGCTGCCCGTCGCCAGCGCCAGCGATGTCGCACGCGC +CGCAGCCTATGCCGGGGCGGCGGACTTGATCTTGTTCGACGCCAAGACCCCCAAAGGCGCGCTGCCCGGC +GGCATGGGGTTGGCGTTCGACTGGTCGCTGCTGGCCGGATATCGCGGTGCCTTGCCGTGGGGGCTGGCAG +GCGGGCTAAATCCGACGAATGTTGCCGAGGCGATTGCGCGCACCGGAGCGCCGCTGGTCGATACCTCCAG +CGGCGTCGAAAGCGCGCCGGGCGTCAAGGATACCGACAAGATTACCAATTTCGCCTTTGCGGTGCGCTTG +GCCTAAATCGCGTCGATCAATAGGCGTCGTTCAGCGCAAAGATCGGCTTGCGGGTGCGCCACTGCCCTCG +GGTGAAGTCGGGAAAATCTAACGTGCGATTGCCCTCAGCAATCGATTGTTCCGACAGAGGCGTGATCGCG +CTCCAGGCCAGCGCGTCGTAAATGTCGATTGGCATCGGGGCCTTGGCCTTCAGCGCCTCGACAAAAGCGT +GGATCACGAACCAGTCCATCCCGCCATGCCCGGCCCCTGCCGCCAGATCGGCGTAGCGTTTCCATAGCGG +GTGATCGTATTTCGCAAACCAGCCCTCGGCAGGCTCCCAGCGGTGCGGCTGTGGGCTCTTGCCCTCCAGA +TAGATCGACTTGTTGACGTCCATCCACAGCCCCTCGGTGCCTTGCACCCGAAAGCCGAGAGAATAGGGGC +GCGGCAGCGAGGTGTCGTGGCACAGCATGATCGTTTCACCATTAGTGCAGCCGATCATGGTGTTGACCAC +ATCACCCAGTGCGAATTTCACCTCGGCGTTGGGATGATCGGCAGAGCCGTTCTTGACGACATAATCATGC +AGCCCGCGCGCCTTACAGCCGAAGCCGCCAGCGCCCGCTTCGCCCGGCAACGCGACCTTCAGGGTGCGGG +TCTGCGGCGGGTAGCACACGCCGGCATCGGCGCAGCCCTGGTACTTCACGGTCAGGGTGGTCGCGCTCGC +GCCGGCCGCGGGCGTGCCGGTGAGGGTGCCGAGCAATTCCTTGCGGTAGGTTTCGACGTCGCCGAAGAAT +TCGTCGCGGTAGGCCTTGCCCTTCGGCAGCGCCATGGTCGCGCCGGTGAAGGCGGCATCGGCCTTGACCG +AGGTGCGGTGCCGGTACAGGTAATAGCCGTCGGCGATCCGCCAGCGCACCTCGATGCGGTCCGGCGCGGT +GGCCTGCGCGGACAGGACGAAGACCTCGTCGACCGGCGGCAGTTCGAAGTCCTGGGCGACGGCCGAGGTC +GCGGGCAGCGCAAGCAGCAGGGCGAGCCCGGCCAGCCAGCGGCGCAGGCGGATCGTGGATGCGGTCATTG +GCTCAGTTTACCGGTCGGCTCTCGGCGGCCAGCCATTGCAGGTATTCGGGCAGGCCGGACGCGGCTTCGA +CCGCGAGCAGCTCCGGGAGTTCGTAGGGATATAGTATTTTATTAAATTCTTATGGGAAATGACGAATGTT +AAATTATCTTAAGAGCTTTAATAATATCAATACTTATTTGATTTTATCGATAATTCTGCTGTTAATCATA +ATATCTCTAGATTATTTCTAAACTGAATGAATGTTTATAATGAGTGATTCATATTGCTATTGAAATCGCC +TTCTCACTTTGAAAGAAGGCGAGGATGAGGGACTTTTATGTTGAATTATCATTTTAAAAATGCCTTATAA +AAGAAGCTTAATGTGTTTTCTTATATAGGTTTAAACATAATTGTTGTATATCTTAAATCCAATTGATCTT +AAAATTTTCCTTTATTTTTTGTTATGAGTGCGAGAAAATTGTCAAAAAGGTCAATCAGACTGGGCGTTAA +TTTGTTTTGCATACTTTTTCCTATATCGAATTAAAGTCATATAACTAACACCATAATCTTTAGCTATTTG +AGTGAAAGGGTATGAATCGTCCTTATTTTTAAGGGTATGAATTAACTCTTTTAGTTTTTCTTCTGTAATC +GCAGGCGATCTTCCCTTGTATTTACCTTTCTTTTTTTTAGCTAATTTAATTCCCTCTGCTTGATTCTCAC +TAATAATACCCCTTTCAAGTTCAGCTACAGCGCCTAATACATGGAGTTGAAACTTATCGAACTTGTCATC +TGAATTGGGGGTAAAGTTCAGGTTATTTTTGACAATATGAACAGACACTCCTTTTTTATTTAGCTTTTGA +ACAATGGTTACAAGGTCAATCAAGCTACGTGCCAATCTAAAAACATCATGAGCGTACACAATGTCCCCAC +TACGGACATAATCGAACATTTCCTGAAGTGCAGGGCGTTTGGCAGTCTTTCCGCTAAAATGATCAATAAA +AGTTTTATCTAGCTCAAAGGGTAGATCATGGAGCTGTCTTTCAGGGTTTTGGTCTTTAGTGGATACACGG +ATATACCCCACTCTTTGAAAGGGTGTGTTTTTAATTTGATCTTCAATATCTAAATTTTCTTTTTCCATAA +CCAGTATAACAAAATTAGATAACCTCAATGTTATATCACATTAGATTAACAAAACAACCCTATTGTTATA +GGGTTTTTAGGGTGTATTATTATATAACAATAGGGTATACCCTATTGTTATATATCTTCAGGTATAAGGA +AAAATAACGATGATTAATTTTAATGATCTAAGCGAATCTGAATTATTAAGGATTGCACAGACTGGCATAT +CAAACCGTATAGGATTGCGTACTTCAGGACATTG + diff -r 000000000000 -r c917ef6807d7 test-data/JQ739157.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/JQ739157.gbk Tue Nov 12 22:47:36 2019 -0500 @@ -0,0 +1,214 @@ +LOCUS JQ739157 6544 bp DNA circular BCT 26-JUL-2016 +DEFINITION Acinetobacter pittii strain ABCA95 plasmid pABCA95, complete + sequence. +ACCESSION JQ739157 +VERSION JQ739157.2 +KEYWORDS . +SOURCE Acinetobacter pittii + ORGANISM Acinetobacter pittii + Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; + Moraxellaceae; Acinetobacter; Acinetobacter calcoaceticus/baumannii + complex. +REFERENCE 1 (bases 1 to 6544) + AUTHORS Fu,Y., Du,X., Ji,J., Chen,Y., Jiang,Y. and Yu,Y. + TITLE Epidemiological characteristics and genetic structure of blaNDM-1 + in non-baumannii Acinetobacter spp. in China + JOURNAL J. Antimicrob. Chemother. 67 (9), 2114-2122 (2012) + PUBMED 22604448 +REFERENCE 2 (bases 1 to 6544) + AUTHORS Fu,Y., Du,X., Ji,J., Chen,Y., Jiang,Y. and Yu,Y. + TITLE Direct Submission + JOURNAL Submitted (05-MAR-2012) Department of Infectious Diseases, Sir Run + Run Shaw Hospital, Affiliated with School of Medicine, Zhejiang + University, 3 East Qingchun Road, Hangzhou, Zhejiang 310016, China +REFERENCE 3 (bases 1 to 6544) + AUTHORS Fu,Y., Du,X., Ji,J., Chen,Y., Jiang,Y. and Yu,Y. + TITLE Direct Submission + JOURNAL Submitted (12-MAY-2014) Department of Infectious Diseases, Sir Run + Run Shaw Hospital, Affiliated with School of Medicine, Zhejiang + University, 3 East Qingchun Road, Hangzhou, Zhejiang 310016, China + REMARK Sequence update by submitter +COMMENT On May 12, 2014 this sequence version replaced JQ739157.1. +FEATURES Location/Qualifiers + source 1..6544 + /organism="Acinetobacter pittii" + /mol_type="genomic DNA" + /strain="ABCA95" + /db_xref="taxon:48296" + /plasmid="pABCA95" + gene 375..1031 + /gene="aphA6" + misc_feature 375..1031 + /gene="aphA6" + /note="similar to aphA6; aminoglycoside + 3'-phosphotransferase; 5' end of gene is missing" + mobile_element 1129..2215 + /mobile_element_type="insertion sequence:ISAba125" + repeat_region 1138..1154 + /note="IRL" + /rpt_type=inverted + CDS 1186..2211 + /codon_start=1 + /transl_table=11 + /product="Transposase" + /protein_id="AFK73837.1" + /translation="MEYIKLSYHHLNFEDRTALMLESRKEGFSARKFAELIKRHPSTI + YRELKRNSINDVYQARYASDNTFARRRRGHRKLKIDSILWKFIVEAIRCLWSPQQIAK + RLKTFPDLDQTMNVSHTTIYSTIRALPKGELKKDLLSCLRHENKKRKANGEPKKDSIL + QDIKTIHERPAEVQERKIPGHWEADLIKGKDNKSSIATLIERNTRLCILATLPDAKAE + SVRKALTEALKYLPAELRKTLTYDRGREMSEHKILEEDLGIDVYFCDPHSPWQKGTCE + NMNGLIRQYLPKGIDLNQADQHYLNQVAMSLNTRPRKALDWLTPLEKFAQLVDYHMAF + ETVAPHV" + repeat_region 2190..2206 + /note="IRL" + /rpt_type=inverted + gene 2312..3124 + /gene="blaNDM-1" + CDS 2312..3124 + /gene="blaNDM-1" + /codon_start=1 + /transl_table=11 + /product="metallo-beta-lactamase" + /protein_id="AFK73838.1" + /translation="MELPNIMHPVAKLSTALAAALMLSGCMPGEIRPTIGQQMETGDQ + RFGDLVFRQLAPNVWQHTSYLDMPGFGAVASNGLIVRDGGRVLVVDTAWTDDQTAQIL + NWIKQEINLPVALAVVTHAHQDKMGGMDALHAAGIATYANALSNQLAPQEGMVAAQHS + LTFAANGWVEPATAPNFGPLKVFYPGPGHTSDNITVGIDGTDIAFGGCLIKDSKAKSL + GNLGDADTEHYAASARAFGAAFPKASMIVMSHSAPDSRAAITHTARMADKLR" + gene 3128..3493 + /gene="bleo" + CDS 3128..3493 + /gene="bleo" + /note="involved in resistance to bleomycin" + /codon_start=1 + /transl_table=11 + /product="bleomycin resistance protein" + /protein_id="AFK73839.1" + /translation="MADHVTPNLPSRDFDVTEAFYAKLGFATSWKDRGWMILQRGGLQ + LEFFPYPDLDPATSSFGCCLRLDDLDAMVALVNAAGAEEKSTGWPRFKAPQLEASGLR + IGYLIDPDCTLVRLIQNPD" + gene 3498..4133 + /gene="trpF" + misc_feature 3498..4133 + /gene="trpF" + /note="similar to TrpF; phosphoribosyl anthranilate + isomerase; 3' end not present" + gene complement(4150..4830) + /gene="tat" + misc_feature complement(4150..4830) + /gene="tat" + /note="similar to TAT; twin-arginine translocation pathway + signal sequence protein; 5' end of gene is missing." +ORIGIN + 1 aagcttatag cagtgtcaca gatgcgaaaa agcaattaag tgcatatttt gagttttata + 61 atttgaaacg acctcattcg agtctagaca aaatgacacc aaatgagttt tactatgatc + 121 agctacccca acaaaacaag gtggcttaac tagagcggaa tatcacttat aaatacgctt + 181 ttagttgttc aaacaagtgg gaccacctct ctcgcatttg cggggttttt aatgctgaat + 241 aaaaggaaaa cttgatggaa ttgcccaata ttattcaaca atttattgga aacagcgttt + 301 tagagccaaa taaaattggt cagtcgccat cggatgttta ttcttttaat cgaaataatg + 361 aaactttttt tcttaagcga tctagcactt tatatacaga gaccacatac agtgtctctc + 421 gcgaagcgaa aatgttgagt tggctctctg agaaattaaa ggtgcctgaa ctcatcatga + 481 cttttcagga tgagcagttt gaattaatga tcactaaagc gatcaatgca aaaccaattt + 541 cagcgctttt tttaacagac caagaattgc ttgctatcta taaggaggca ctcaatctgt + 601 taaattcagt tgctattatt gattgtccat ttatttcaaa cattgatcat cggttaaaag + 661 agtcaaaatt ttttattgat aaccaactcc ttgacgatat agatcaagat gattttgacg + 721 ctgaattatg gggagaccat agaacttacc taagtctatg gaatgagtta actgagactc + 781 gtgttgaaga aagattggtt ttttctcatg gcgatatcac ggatagtaat atttttatag + 841 ataaattcaa tgaaatttac tttttagatc ttggccgtgc tgggttagct gatgaatttg + 901 tagatatatc ctttgttgaa cgttgcctaa gagaggatgc ctcggaggaa actgctaaaa + 961 tatttttaaa gcatttaaaa aatgatagac ctgacaaaag gaattatttt ttaaaacttg + 1021 atgaattgaa ttgattctaa gcattatcta aaaatactta attgtctttt aacgtcgcta + 1081 aattttaaat aaataagtga agagtgttag tggagccact gatttaaagt tggcagagta + 1141 aaacttgaag tgcgacataa accacctaat taatttaaag ggtttatgga gtatataaaa + 1201 ttgtcatacc atcatcttaa ctttgaagat cgtactgcat taatgcttga gtcaagaaaa + 1261 gaaggctttt cagccagaaa atttgctgaa ctcattaaaa gacatcctag tacgatctat + 1321 cgtgagctta aaagaaatag catcaatgac gtttatcaag ctcgatatgc ttctgataac + 1381 accttcgcta gacgtagacg tggtcacaga aaactcaaaa tcgattcaat cctctggaaa + 1441 tttattgttg aagcgatccg ttgtttatgg tctcctcagc aaatagcaaa gcgtttaaag + 1501 acatttcctg atttggatca aacaatgaat gtaagccata caacgattta ttcaacgata + 1561 cgagcattac caaagggtga gttgaaaaaa gacttattat cctgtctacg tcatgaaaat + 1621 aaaaagcgaa aagctaacgg tgaacctaaa aaagattcta tattacagga tattaaaact + 1681 attcatgagc gcccagccga agttcaagaa agaaaaatac cgggtcattg ggaagctgat + 1741 ttaattaaag gtaaagacaa taaaagttcg atagcaacac ttattgaacg aaatacacgg + 1801 ctctgtatct tggcaacatt acctgatgca aaggcagaat cagtgcgcaa ggctttaact + 1861 gaagctctga aatatttacc tgcagaactg cgtaaaacgt tgacctatga ccgtggacgt + 1921 gagatgtcag aacataaaat actcgaagaa gatttaggca tagatgtata tttctgtgac + 1981 ccacattcac cctggcaaaa aggcacatgc gaaaatatga atggtttaat taggcaatat + 2041 ttacctaaag ggattgattt aaatcaggca gatcagcatt atttaaatca agttgccatg + 2101 tcactgaata ctcgtcctag aaaggcgtta gattggctta caccattaga gaaatttgct + 2161 cagcttgttg attatcatat ggcttttgaa actgtcgcac ctcatgtttg aattcgcccc + 2221 atatttttgc tacagtgaac caaattaaga tcatctattt actaggcctc gcatttgcgg + 2281 ggtttttaat gctgaataaa aggaaaactt gatggaattg cccaatatta tgcacccggt + 2341 cgcgaagctg agcaccgcat tagccgctgc attgatgctg agcgggtgca tgcccggtga + 2401 aatccgcccg acgattggcc agcaaatgga aactggcgac caacggtttg gcgatctggt + 2461 tttccgccag ctcgcaccga atgtctggca gcacacttcc tatctcgaca tgccgggttt + 2521 cggggcagtc gcttccaacg gtttgatcgt cagggatggc ggccgcgtgc tggtggtcga + 2581 taccgcctgg accgatgacc agaccgccca gatcctcaac tggatcaagc aggagatcaa + 2641 cctgccggtc gcgctggcgg tggtgactca cgcgcatcag gacaagatgg gcggtatgga + 2701 cgcgctgcat gcggcgggga ttgcgactta tgccaatgcg ttgtcgaacc agcttgcccc + 2761 gcaagagggg atggttgcgg cgcaacacag cctgactttc gccgccaatg gctgggtcga + 2821 accagcaacc gcgcccaact ttggcccgct caaggtattt taccccggcc ccggccacac + 2881 cagtgacaat atcaccgttg ggatcgacgg caccgacatc gcttttggtg gctgcctgat + 2941 caaggacagc aaggccaagt cgctcggcaa tctcggtgat gccgacactg agcactacgc + 3001 cgcgtcagcg cgcgcgtttg gtgcggcgtt ccccaaggcc agcatgatcg tgatgagcca + 3061 ttccgccccc gatagccgcg ccgcaatcac tcatacggcc cgcatggccg acaagctgcg + 3121 ctgagccatg gctgaccacg tcacccccaa tctgccatcg cgcgatttcg atgtgacaga + 3181 ggcgttttat gcgaagctgg gctttgcgac gagttggaag gatcgcggct ggatgatcct + 3241 gcagcgcggc ggtttgcagc tcgaattctt cccctatcct gacctcgacc cagctacgag + 3301 ctcgttcggc tgttgcctgc ggttggatga tctcgatgcc atggtggcat tggtgaacgc + 3361 ggcgggagcc gaggaaaaaa gcaccggctg gccgcgcttc aaagctccgc aactggaggc + 3421 gagcggcctg aggatcggct acctgatcga tcccgactgc acgctggtgc ggctgatcca + 3481 gaaccccgac tgaccgcatg cccgcgaaaa tcaagatttg cgggatcagc acacccgagg + 3541 cgctcgatgc gaccatcgcg gcgcgggcgg actatgccgg gttggtgttc tatccagcgt + 3601 cgccccgtgc ggttacgtcg aatgtcgcgg gcgctttgac atcgcgcgca gctggccaga + 3661 tcgccatggt cggtttgttc gtcgatgcgg atgatgctgt catcgccgac gcactggtgg + 3721 cagccaagct gaacgcgctg cagctgcacg gttcggaatc gcccgaacgc gtggcccagt + 3781 tgcgcgcgcg gtttggcaag ccggtgtgga aggcgctgcc cgtcgccagc gccagcgatg + 3841 tcgcacgcgc cgcagcctat gccggggcgg cggacttgat cttgttcgac gccaagaccc + 3901 ccaaaggcgc gctgcccggc ggcatggggt tggcgttcga ctggtcgctg ctggccggat + 3961 atcgcggtgc cttgccgtgg gggctggcag gcgggctaaa tccgacgaat gttgccgagg + 4021 cgattgcgcg caccggagcg ccgctggtcg atacctccag cggcgtcgaa agcgcgccgg + 4081 gcgtcaagga taccgacaag attaccaatt tcgcctttgc ggtgcgcttg gcctaaatcg + 4141 cgtcgatcaa taggcgtcgt tcagcgcaaa gatcggcttg cgggtgcgcc actgccctcg + 4201 ggtgaagtcg ggaaaatcta acgtgcgatt gccctcagca atcgattgtt ccgacagagg + 4261 cgtgatcgcg ctccaggcca gcgcgtcgta aatgtcgatt ggcatcgggg ccttggcctt + 4321 cagcgcctcg acaaaagcgt ggatcacgaa ccagtccatc ccgccatgcc cggcccctgc + 4381 cgccagatcg gcgtagcgtt tccatagcgg gtgatcgtat ttcgcaaacc agccctcggc + 4441 aggctcccag cggtgcggct gtgggctctt gccctccaga tagatcgact tgttgacgtc + 4501 catccacagc ccctcggtgc cttgcacccg aaagccgaga gaataggggc gcggcagcga + 4561 ggtgtcgtgg cacagcatga tcgtttcacc attagtgcag ccgatcatgg tgttgaccac + 4621 atcacccagt gcgaatttca cctcggcgtt gggatgatcg gcagagccgt tcttgacgac + 4681 ataatcatgc agcccgcgcg ccttacagcc gaagccgcca gcgcccgctt cgcccggcaa + 4741 cgcgaccttc agggtgcggg tctgcggcgg gtagcacacg ccggcatcgg cgcagccctg + 4801 gtacttcacg gtcagggtgg tcgcgctcgc gccggccgcg ggcgtgccgg tgagggtgcc + 4861 gagcaattcc ttgcggtagg tttcgacgtc gccgaagaat tcgtcgcggt aggccttgcc + 4921 cttcggcagc gccatggtcg cgccggtgaa ggcggcatcg gccttgaccg aggtgcggtg + 4981 ccggtacagg taatagccgt cggcgatccg ccagcgcacc tcgatgcggt ccggcgcggt + 5041 ggcctgcgcg gacaggacga agacctcgtc gaccggcggc agttcgaagt cctgggcgac + 5101 ggccgaggtc gcgggcagcg caagcagcag ggcgagcccg gccagccagc ggcgcaggcg + 5161 gatcgtggat gcggtcattg gctcagttta ccggtcggct ctcggcggcc agccattgca + 5221 ggtattcggg caggccggac gcggcttcga ccgcgagcag ctccgggagt tcgtagggat + 5281 atagtatttt attaaattct tatgggaaat gacgaatgtt aaattatctt aagagcttta + 5341 ataatatcaa tacttatttg attttatcga taattctgct gttaatcata atatctctag + 5401 attatttcta aactgaatga atgtttataa tgagtgattc atattgctat tgaaatcgcc + 5461 ttctcacttt gaaagaaggc gaggatgagg gacttttatg ttgaattatc attttaaaaa + 5521 tgccttataa aagaagctta atgtgttttc ttatataggt ttaaacataa ttgttgtata + 5581 tcttaaatcc aattgatctt aaaattttcc tttatttttt gttatgagtg cgagaaaatt + 5641 gtcaaaaagg tcaatcagac tgggcgttaa tttgttttgc atactttttc ctatatcgaa + 5701 ttaaagtcat ataactaaca ccataatctt tagctatttg agtgaaaggg tatgaatcgt + 5761 ccttattttt aagggtatga attaactctt ttagtttttc ttctgtaatc gcaggcgatc + 5821 ttcccttgta tttacctttc ttttttttag ctaatttaat tccctctgct tgattctcac + 5881 taataatacc cctttcaagt tcagctacag cgcctaatac atggagttga aacttatcga + 5941 acttgtcatc tgaattgggg gtaaagttca ggttattttt gacaatatga acagacactc + 6001 cttttttatt tagcttttga acaatggtta caaggtcaat caagctacgt gccaatctaa + 6061 aaacatcatg agcgtacaca atgtccccac tacggacata atcgaacatt tcctgaagtg + 6121 cagggcgttt ggcagtcttt ccgctaaaat gatcaataaa agttttatct agctcaaagg + 6181 gtagatcatg gagctgtctt tcagggtttt ggtctttagt ggatacacgg atatacccca + 6241 ctctttgaaa gggtgtgttt ttaatttgat cttcaatatc taaattttct ttttccataa + 6301 ccagtataac aaaattagat aacctcaatg ttatatcaca ttagattaac aaaacaaccc + 6361 tattgttata gggtttttag ggtgtattat tatataacaa tagggtatac cctattgtta + 6421 tatatcttca ggtataagga aaaataacga tgattaattt taatgatcta agcgaatctg + 6481 aattattaag gattgcacag actggcatat caaaccgtat aggattgcgt acttcaggac + 6541 attg +// + diff -r 000000000000 -r c917ef6807d7 test-data/SRR9113487_plasmid_2719.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/SRR9113487_plasmid_2719.fasta Tue Nov 12 22:47:36 2019 -0500 @@ -0,0 +1,4 @@ +>53_length=9674_depth=5.42x +GTTTACTCATACCTAGATTCTACGTCAGTACTTCAAAAAGCATAATCAAAGCCTTGATAAATATGCATTCCTTCGAAATTCAGCTTTCACCCATTGGGTGAAAGAAAAGTGCTCAACATAAAATACTCGAAGAAGATTTAGGCATAGATGTATATTTCTGTGACCCACATTCACCCTGGCAAAAAGGCACATGCGAAAATATGAATGGTTTAATTAGGCAATATTTACCTAAAGGGATTGATTTAAATCAGGCAGATCAGCATTATTTAAATCAAGTTGCCATGTCACTGAATACTCGTCCTAGAAAGGCGTTAGATTGGCTTACACCATTAGAGAAATTTGCTCAGCTTGTTGATTATCATATGGCTTTTGAAACTGTCGCACCTCATGTTTGAATTCGCCCCATATTTTTGCTACAGTGAACCAAATTAAGATCATCTATTTACTAGGCCTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTTGATGGAATTGCCCAATATTATGCACCCGGTCGCGAAGCTGAGCACCGCATTAGCCGCTGCATTGATGCTGAGCGGGTGCATGCCCGGTGAAATCCGCCCGACGATTGGCCAGCAAATGGAAACTGGCGACCAACGGTTTGGCGATCTGGTTTTCCGCCAGCTCGCACCGAATGTCTGGCAGCACACTTCCTATCTCGACATGCCGGGTTTCGGGGCAGTCGCTTCCAACGGTTTGATCGTCAGGGATGGCGGCCGCGTGCTGTTGGTCGATACCGCCTGGACCGATGACCAGACCGCCCAGATCCTCAACTGGATCAAGCAGGAGATCAACCTGCCGGTCGCGCTGGCGGTGGTGACTCACGCGCATCAGGACAAGATGGGCGGTATGGACGCGCTGCATGCGGCGGGGATTGCGACTTATGCCAATGCGTTGTCGAACCAGCTTGCCCCGCAAGAGGGGCTGGTTGCGGCGCAACACAGCCTGACTTTCGCCGCCAATGGCTGGGTCGAACCAGCAACCGCGCCCAACTTTGGCCCGCTCAAGGTATTTTACCCCGGCCCCGGCCACACCAGTGACAATATCACCGTTGGGATCGACGGCACCGACATCGCTTTTGGTGGCTGCCTGATCAAGGACAGCAAGGCCAAGTCGCTCGGCAATCTCGGTGATGCCGACACTGAGCACTACGCCGCGTCAGCGCGCGCGTTTGGTGCGGCGTTCCCCAAGGCCAGCATGATCGTGATGAGCCATTCCGCCCCCGATAGCCGCGCCGCAATCACTCATACGGCCCGCATGGCCGACAAGCTGCGCTGAGCCATGGCTGACCACGTCACCCCCAATCTGCCATCGCGCGATTTCGATGTGACAGAGGCGTTTTATGCGAAGCTGGGCTTTGCGACGAGTTGGAAGGATCGCGGCTGGATGATCCTGCAGCGCGGCGGTTTGCAGCTCGAATTCTTCCCCTATCCTGACCTCGACCCAGCTACGAGCTCGTTCGGCTGTTGCCTGCGGTTGGATGATCTCGATGCCATGGTGGCATTGGTGAACGCGGCGGGAGCCGAGGAAAAAAGCACCGGCTGGCCGCGCTTCAAAGCTCCGCAACTGGAGGCGAGCGGCCTGAGGATCGGCTACCTGATCGATCCCGACTGCACGCTGGTGCGGCTGATCCAGAACCCCGACTGACCGCATGCCCGCGAAAATCAAGATTTGCGGGATCAGCACACCCGAGGCGCTCGATGCGACCATCGCGGCGCGGGCGGACTATGCCGGGTTGGTGTTCTATCCAGCGTCGCCCCGTGCGGTTACGTCGAATGTCGCGGGCGCTTTGACATCGCGCGCAGCTGGCCAGATCGCCATGGTCGGTTTGTTCGTCGATGCGGATGATGCTGTCATCGCCGACGCACTGGTGGCAGCCAAGCTGAACGCGCTGCAGCTGCACGGTTCGGAATCGCCCGAACGCGTGGCCCAGTTGCGCGCGCGGTTTGGCAAGCCGGTGTGGAAGGCGCTGCCCGTCGCCAGCGCCAGCGATGTCGCACGCGCCGCAGCCTATGCCGGGGCGGCGGACTTGATCTTGTTCGACGCCAAGACCCCCAAAGGCGCGCTGCCCGGCGGCATGGGGTTGGCGTTCGACTGGTCGCTGCTGGCCGGATATCGCGGTGCCTTGCCGTGGGGGCTGGCAGGCGGGCTAAATCCGACGAATGTTGCCGAGGCGATTGCGCGCACCGGAGCGCCGCTGGTCGATACCTCCAGCGGCGTCGAAAGCGCGCCGGGCGTCAAGGATACCGACAAGATTACCAATTTCGCCTTTGCGGTGCGCTTGGCCTAAATCGCGTCGATCAATAGGCGTCGTTCAGCGCAAAGATCGGCTTGCGGGTGCGCCACTGCCCTCGGGTGAAGTCGGGAAAATCTAACGTGCGATTGCCCTCAGCAATCGATTGTTCCGACAGAGGCGTGATCGCGCTCCAGGCCAGCGCGTCGTAAATGTCGATTGGCATCGGGGCCTTGGCCTTCAGCGCCTCGACAAAAGCGTGGATCACGAACCAGTCCATCCCGCCATGCCCGGCCCCTGCCGCCAGATCGGCGTAGCGTTTCCATAGCGGGTGATCGTATTTCGCAAACCAGCCCTCGGCAGGCTCCCAGCGGTGCGGCTGTGGGCTCTTGCCCTCCAGATAGATCGACTTGTTGACGTCCATCCACAGCCCCTCGGTGCCTTGCACCCGAAAGCCGAGAGAATAGGGGCGCGGCAGCGAGGTGTCGTGGCACAGCATGATCGTTTCACCATTAGTGCAGCCGATCATGGTGTTGACCACATCACCCAGTGCGAATTTCACCTCGGCGTTGGGATGATCGGCAGAGCCGTTCTTGACGACATAATCATGCAGCCCGCGCGCCTTACAGCCGAAGCCGCCAGCGCCCGCTTCGCCCGGCAACGCGACCTTCAGGGTGCGGGTCTGCGGCGGGTAGCACACGCCGGCATCGGCGCAGCCCTGGTACTTCACGGTCAGGGTGGTCGCGCTCGCGCCGGCCGCGGGCGTGCCGGTGAGGGTGCCGAGCAATTCCTTGCGGTAGGTTTCGACGTCGCCGAAGAATTCGTCGCGGTAGGCCTTGCCCTTCGGCAGCGCCATGGTCGCGCCGGTGAAGGCGGCATCGGCCTTGACCGAGGTGCGGTGCCGGTACAGGTAATAGCCGTCGGCGATCCGCCAGCGCACCTCGATGCGGTCCGGCGCGGTGGCCTGCGCGGACAGGACGAAGACCTCGTCGACCGGCGGCAGTTCGAAGTCCTGGGCGACGGCCGAGGTCGCGGGCAGCGCAAGCAGCAGGGCGAGCCCGGCCAGCCAGCGGCGCAGGCGGATCGTGGATGCGGTCATTGGCTCAGTTTACCGGTCGGCTCTCGGCGGCCAGCCATTGCAGGTATTCGGGCAGGCCGGACGCGGCTTCGACCGCGAGCGGGTATAGGAAGTATAAACCACCTTTTTGCTCCTCATCCGAAGTATCTTACCTGAAATTCCCTCACTCGTTTACCGCTCAAGCCCCAATTTTAACTGCCGGTCCAGCCTAAACCGCTCTAATAAGGTTCGATTTGGCGGTAAAATCTCTAGCCTGATAGCTCGAGAGATACAAACTGCCCCACCGCCCCGTTTAAAAGTTGGCAGTGTTGAGCAGTGTTGGATTTGGGGTCGTCAGTCAAAGAGACGACTCTGTGATGGATCGAACAGGCTGGGAGTCAGTGGCGGCGCTCGTTCTGGTGGCAGCTCACGCTGCTTGGCGGCATTCGCCTTGGCTGTTTTCTGTTTCAGATGCTTGAGAATCTGCTCAATGACCTTCGGATCTTCGATGCTGGCAATCACTTTGACGTGACCGCCGCAGTGTTCGCAGACTTCAATATCAATATTGAAGACTCGCTTGAGGCGTTGCATCCAGGTCATGGCGCGGTGGCGCTCTGCAGGACTCTTGTCACGCCAGTTAGTATCGAGACCTTCCGATTTGTCGGGCTTCTTGCCCCGCTTGGCGGGTGTTACTTGAACTCGGTGTTTGCTGTTCGGTGCAAAGACGCCGTGGAAGCGTGTGAGGTTGACTCGCGGCTTAGGTACCAACGCAGCGAGTTTGGCGATGAAGTCCAGCGGCTCGAAGATCACATGGGTGGTGCCATTGCGGTACGGAGTTTTGAGCTCGTAACGCACCTGCCCATTGGCGGTTAATGCCAGACGTTTTTCTGAAACCGCTGGCCGACTAATGTAGCGACACAAGCGCTCAAGCTTATCCCGCTGATGCGCTTCGGCCATCACACCGGCGTGTAGCGAGAAACCAGCATGGTTGGCTACTCGACTGCTTGAGTCGGCTTTATCCTCACGCCCTGGCAAGGTTTGCAGGGTGAAGACTTTGCGCCCTTGCTGGGGGCCGACGGCAATGCGATACGTAACCGAAGCACCATGTAATTGAGTCAGCGTATCGTCTTCGCCCTCTTCCAGTGTCAACCACGTATTCTCGGCATCACGCTCCAAAATCCCACGCTTTTCCATGCAGCGAGCGATGCGATGGCTGAGGGTGTGAGCGAGCGTATTCAGCTCATCGTAAGTGGGTGCCTTGACACGATGGAAGCGTTGCTTGCCATAGTCATCTTCGGCATAGACACCATCGAGAAACAGCATGTGGTAGTGGACATTGAGATTTAGCGCGGAGCCAAAGCGTTGGATAAGAGTCACTGAGCCAGTTTGTGCAGAGGCTTTGGTGTAACCGGCTTTTTTGATCAGATGAGTTGAGAGTGTACGATAGACGATACTCAAGACCTGGCCCATCAGCTGGGGATGGCGAGCCAGCAAAAAGCGTAGCTGGAAAGGAAAGCTGAGCACCCACTGGCGAATGGGCTCCTTGGGGAAGACTTCGTCTATCAGCAGCGCCGCACTCTCGGCCATCCGGCGGGCACCGCAGCTAGGGCAAAAGCCGCGTCGTTTACAGCTGAAGGCGACCAGACGCTCGTGATGACAATCCTCGCAGCGAACCCGCATGAAACCATACTCCAGACGGCCACATTGGAGGAGGTCGTTGAATTCTTGTTGGATGTAGCGAGGCAGGTGTTGACCTTGGGCTTCGAGTGAGGCTTTGAAGGCTGGGTAGTGCTGCTCAACCAGCTGGTAGAGCAGCGTCTGGTCGGGTTGGTGGCGTTCGTAACCGTTTGTTTGAGTGGGCGATTGACTCGCCGTGGCGTTCCTTGCCAGCGACATGGGTATCCTCCGCTGATACTGTGGTTATGTACAGTATCAGCGGCTTGCGTTCAGACGTCCAGTCTGGCCCTAGACATCGCTAAATGCTTAACCCGCAATAGCCCTCACGAGTTGTTATCAGCCACTACCGGTTGAGCGAGAAGGTTTTGGGTTCAGGGTGCTATTGCTCCACCAATCACAATACTGAAGCCCCAACTGTTATCAGTTGGGGCTTTTTCTTGTCTGTTTGCGGCGGTTGCGTTTTATCGGTAGTCGTCGAGCTCTGCACCATCCCACATAAGAGCTTAACGGTGCGATCTTCAACGCCATCACACAAAACTTTCTTTTTCACGCACAGTCAACTTATTGGATGTTTTATTAACAACCCAAAAGGAGATATTTAGCGGGCGGCCGGAAGGTGAATGCTAGGCATGATCTAACCCTCGGTCTCTGGCGTCGCGACTGCGAAATTTCGCGAGGGTTTCCGAGAAGGTGATTGCGCTTCGCAGATCTCCAGGCGCGTGGGTGCGGACGTAGTCAGCGCCATTGCCGATCGCGTGAAGTTCCGCCGCAAGGCTCGCTGGACCCAGATCCTTTACAGGAAGGCCAACGGTGGCGCCCAAGAAGGATTTCCGCGACACCGAGACCAATAGCGGAAGCCCCAACGCCGACTTCAGCTTTTGAAGGTTCGACAGCACGTGCAGCGATGTTTCCGGTGCGGGGCTCAAGAAAAATCCCATCCCCGGATCGAGGATGAGCCGGTCGGCAGCGACCCCGCTCCGTCGCAAGGCGGAAACCCGCGCCTCGAAGAACCGCACAATCTCGTCGAGCGCGTCTTCGGGTCGAAGGTGACCGGTGCGGGTGGCGATGCCATCCCGCTGCGCTGAGTGCATAACCACCAGCCTGCAGTCCGCCTCAGCAATATCGGGATAGAGCGCAGGGTCAGGAAATCCTTGGATATCGTTCAGGTAGCCCACGCCGCGCTTGAGCGCATAGCGCTGGGTTTCCGGTTGGAAGCTGTCGATTGAAACACGGTGCATCTGATCGGACAGGGCGTCTAAGAGCGGCGCAATACGTCTGATCTCATCGGCCGGCGATACAGGCCTCGCGTCCGGATGGCTGGCGGCCGGTCCGACATCCACGACGTCTGATCCGACTCGCAGCATTTCGATCGCCGCGGTGACAGCGCCGGCGGGGTCTAGCCGCCGGCTCTCATCGAAGAAGGAGTCCTCGGTGAGATTCAGAATGCCGAACACCGTCACCATGGCGTCGGCCTCCGCAGCGACTTCCACGATGGGGATCGGGCGAGCAAAAAGGCAGCAATTATGAGCCCCATACCTACAAAGCCCCACGCATCAAGCTTTTGCCCATGAAGCAACCAGGCAATGGCTGTAATTATGACGACGCCGAGTCCCGACCAGACTGCATAAGCAACACCGACAGGGATGGATTTCAGAACCAGAGAAAGAAAATAAAATGCGATGCCATAACCGATTATGACAACGGCGGAAGGGGCAAGCTTAGTAAAGCCCTCGCTAGATTTTAATGCGGATGTTGCGATTACTTCGCCAACTATTGCGATAACAAGAAAAAGCCAGCCTTTCATGATATATCTCCCAATTTGTGTAGGGCTTATTATGCACGCTTAAAAATAATAAAAGCAGACTTGACCTGATAGTTTGGCTGTGAGCAATTATGTGCTTAGTGCATCTAACGCCGGAGTTAAGCCGCCGCGCGTAGCGCGGTCGGCTTGAACGAATTGTTAGACATCATTTACCAACTGACTTGATGATCTCGCCTTTCACAAAGCGAATAAATTCTTCCAAGTGATCTGCGCGTGAGGCCAAGTGATCTTCTTTTTGTCCCAGATAAGCTTGCTTAGCTTCAAGTAAGACGGGCTGATACTGGGCAGGTAGGCGTTTTATTGCCCAGTCGGCAGCGACATCCTTCGGCGCGATTTTGCCGGTTATTGCGCTGTACCAAATGCGGGACAACGTAAGCACTACATTTCGCTCATCGCCGGCCCAGTCGGGCTGCGAGTTCCATAGCTTCAAGGTTTCCCTCAGCGCCTCGAATAGATCCTGTTCAGGAACCGGGTCAAAGAATTCCTCCGCTGCCGGACCTACCAAGGCAACGCTATGTTCTCTTGCTTTTGTAAGCAGGATAGCTAGATCAATGTCGATCATGGCTGGCTCGAAGATACCCGCAAGAATGTCATTGCGCTGCCATTCTCCAAATTGCAGCTCGCGCTTAGCCGGATAACGCCACGGGATGATGTCGTCATGCACGACAAGGGTGACTTCTATAGCGCGGAGCGTCTCGCTCTCGCCAGGGAAAGCCGAAGCCTCCATAAGATCATTGAGCAATGCTCGCCGCGTCGTTTCATCAAGCTTTACGGCCACAGTAACCAACAAATCAATATCGCTGTATGGCTTCAGGCCGCCATCCACTGCGGAGCCGTACAAATGCACGGCCAGCAACGTTGATTCCAGATGGCGCTCAATGACGCTTAGCACCTCTGATAGTTGGTTCGAAATTTCGATGGTCACCGCTTCCCTCATGATGTCTAACGGGCGAGGTAAGCCGACCGCAGAATGCGGGTCGGCTTGACCGAAATGTTAGAACCAGAAGCCAAAACGGATAACTTGAATTTGGCGACGGGCGCTAACCGTGAAAAAACGCTGCGCCACCGAGGCGGCACAGCACTGCAAAAACGATAGCTGCTTGCGCTTGCTACGCAAGGGCTAGAGGCCAAAAAGACTGAAAACCTGCGCAGCCCATGCAGGCGAAGCCCGGAAAAAAGGCAAAACAGGCACTGAATAACGCCTGAAAAGCTAAATGCCGTTTGAATAAACATGAGCTAAATAAAGCTGGGTTTAAGTGGTGCTAACGTTGGACGTAACGAGAGCCGGAGCGCAGCGGAGGGAACCAAAATGCGCAGCATTTTGGCGTCCCGTTGACGGAATGGTTAGCCGTTTCGACGCGCATAAACGGAGTGGGTGTACGGAATTACAGCTTGAATGGTTTCGGTTGAGACAAGCTCGAATTCTGTTTCGTTGAGCATTGGGAAGAAGGCGTCACCCTCGAAGGTTTGATGTACCTCAGATAGAAACACGCCGTGGGCGTGAGGTAGTGCCAGAGTGTATATCTCAGCTCCGCCCGCGACGTAGAGTTCATTGCCGAGTTCGGATGCCAAAGCGATAGCGTGCGACAGCGTTGAAACAACTACGCAGCCAGTGGCGCGGTAGTTAGCTTGGCGTGAGATTACCAATGTGTGACGGTTCGGTAGAGGCTTGCCGATAGACTCAAAGGTCTTTCGCCCCATGACAACGACTTTTCCCTCAGTGAGTCTGCGAAAAATCTTCTGCTCACCCGGAATTTTCCAGGGGATATTAGGACCATTGCCAATAACCCGATTGGCTCCCATCGCAGCAACGAGATAAATGCGTACTGATTCCGAGTTCATATGGCTAACTTTGTTTTAGGGCGACTGCCCTGCTGCGTAACATCGTTGCTGCTCCATAACATCAAACATCGACCCACGGCGTAACGCGCTTGCTGCTTGGATGCCCGAGGCATAGACTGTACAAAAAAACAGTCATAACAAGCCATGAAAACCGCCACTGCGCCGTTACCACCGCTGCGTTCGGTCAAGGTTCTGGACCAGTTGCGTGAGCGCATACGCTACTTGCATTACAGCTTACCAACCGAACAGGCTTATGTCCACTGGGTTCGTGCCTTCATCCGTTTCCACGGTGTGCGTCACCCGGCAACCTTGGGCAGCAGCGAAGTCGAGGCATTTCTGTCCTGGCTGGCGAACGAGCGCAAGGTTTCGGTCTCCACGCATCGTCAGGCATTGGCGGCCTTGCTGTTCTTCTACGGCAAGGTGCTGTGCACGGATCTGCCCTGGCTTCAGGAGATCGGAAGACCTCGGCCGTCGCGGCGCTTGCCGGTGGTGCTGACCCCGGATGAAGTGGTTCGCATCCTCGGTTTTCTGGAAGGCGAGCATCGTTTGTTCGCCCAGCTTCTGTATGGAACGGGCATGCGGATCAGTGAGGGTTTGCAACTGCGGGTCAAGGATCTGGATTTCGATCACGGCACGATCATCGTGCGGGAGGGCAAGGGCTCCAAGGATCGGGCCTTGATGTTACCCGAGAGCTTGGCACCCAGCCTGCGCGAGCAGCTGTCGCGTGCACGGGCATGGTGGCTGAAGGACCAGGCCGAGGGCCGCAGCGGCGTTGCGCTTCCCGACGCCCTTGAGCGGAAGTATCCGCGCGCCGGGCATTCCTGGCCGTGGTTCTGGGTTTTTGCGCAGCACACGCATTCGACCGATCCACGGAGCGGTGTCGTGCGTCGCCATCACATGTATGACCAGACCTTTCAGCGCGCCTTCAAACGTGCCGTAGAACAAGCAGGCATCACGAAGCCCGCCACACCGCACACCCTCCGCCACTCGTTCGCGACGGCCTTGCTCCGCAG +>93_length=1443_depth=2.74x +TGTACTGACCCCAAAAAGTTGGACAGTTAAACACGAGGCATATAGGTCTGATTCCGATATTCAATTGGAGTCAGACCTTTTAATTTCAGGCTAATTCTTCTGCTGTTGTAGTATTCAATATATTCCGTAACAGCATCCTTCAGTTCGCTTATATTACTGAACTCATCAAGATAAAAACACTCCGACTTTAAGGTTCCAAAGAAACACTCCACCACAGCATTATCCAGACAATTGCCTTTTCTGGACATGCTTTGTTTAATACCATGTTCTTTAAGGATATTTTGATATCTTCTCATACGATACTGCCATCCCTGGTCAGAGTGCAGAACAGGATGCTCGTGAGGATTAAGCTTTTTGAATGCCTGATCGAGCATATTCTCAACCATGTTCATCACTGGTCTTTCCGAAAGGCTGTAAGAAATAACTTCGTTGTTGAAGAGATCTATTACTGGAGACAAATACAGCTTGCGCCCATTGACTGCAAATTCAGTAACATCGGTAACCCACTTCTCGTTTGGCCGCGTAGCCTTGAAATCTCTTTGGAGAACATTAGGGGCGGTTTGCCCTACCTCTCCTCTGTAAGAGCGGTATCGCTTGACCTTAATCGCTGCTTTAAGTGAGAGGGTTCCCATCAGGCGCTGAACAGCTTTATGGTTAATCTGTTTCCCTTCTCGATGAAGAGACAGCGTTACCCTACGGTATCCGTATCGGCCTCTATTCTCGTGATAAATCTCACTAATACGCTTTTTAACGTCCGCATACTTGTCAGGCTTGCTGAGAGCCTTTAGATGATAATAAAACGTACTGCGCGGTATCTCCGCAGCCCTGAGAAGCTCATCAAGAGGATAAAACTGCCTTAGCTCGTTGAGTACTTTCACTTTTTCGTGGGATGAGCTAAGGCTTTCAGCTTTTTTAGATACATAAGCCGCGTTTCAAGAAATCGAACTTGCCTTTCAAGATCCTCAATGCGTCGGTCTTTTGACAGCTCCAATGCTGATGCCGCTTTTTCTGGATCAACTGATATTGCAATGTTTCTTTTGGTGCCAATCTTGAGCGCGCGTAAACCAGCTTCTCCGCGCTCTTCATAGACCTTCAGCCACCTGGCTACAGAACCACTACCAGCAAGCATAAAGTGAGCAGCAGCCTGATTAAGGGACATGTGCTGCTCGATCACAGCTTTCACGACCTTAATACGCAACTCTGGATCAGCACTAACGCCTTTAGGTTTGGGAATTAAACCTTTTTCTCCATGTTTTTCATAGAGGGCAACCCATGTCCTGACCTGGGTTCGGGGGACACCAAAACGTGCCGAGATGATCCTGTAACCATCATCAGTTGTGAAGTAGTGATTCACGACTTCAAGGCGCTTTTCAAAAGGGTATTTTGGCTTTGACATATTAGGGGCTATTCCATTTCATCGTCCAACAAAATGGGTGCAGTACA diff -r 000000000000 -r c917ef6807d7 test-data/concatenated_mob_typer_reports.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/concatenated_mob_typer_reports.tsv Tue Nov 12 22:47:36 2019 -0500 @@ -0,0 +1,6 @@ +plasmid_1068 2 19016 50.54690786705932 - - - - - - - - Non-mobilizable CP021680 0.00705245 1068 - - - - - - - - - - - - - - - +plasmid_1550 5 106133 51.586217293396025 IncFIIA,IncFII,IncFIA 000136__AP014877_00014,000121__CP024805,000094__NZ_CP015070_00117 MOBF NC_017627_00068 MPF_F 08-5333_00200,NC_008460_00107,NC_014615_00033,NC_010488_00021,NC_018966_00040,NC_017639_00100,NC_007675_00027,NC_013437_00116,NC_017639_00094,NC_019094_00090,NC_010409_00124,NC_022651_00077 - - Conjugative CP011064 0.00476862 1550 - - - - - - - - - - - - - - - +plasmid_2719 2 11117 55.03283259872268 - - - - - - - - Non-mobilizable JQ739157 0.0496454 2719 - - - - - - - - - - - - - - - +plasmid_739 1 8070 57.707558859975215 - - - - - - - - Non-mobilizable KR259131 0.0401734 739 - - - - - - - - - - - - - - - +plasmid_839 1 2100 47.142857142857146 Inc13 000058__CP000643_00001 - - - - - - Non-mobilizable CP008719 0.00186568 839 - - - - - - - - - - - - - - - +plasmid_novel_0 1 3684 46.19978284473398 ColRNAI_rep_cluster_1987,rep_cluster_2050 000019__CP000651_00005,001473__CP024541 - - - - - - Non-mobilizable KU302807 0.0699663 197 - - - - - - - - - - - - - - - diff -r 000000000000 -r c917ef6807d7 test-data/outdir/mob_typer_record.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outdir/mob_typer_record.tsv Tue Nov 12 22:47:36 2019 -0500 @@ -0,0 +1,2 @@ +file_id num_contigs total_length gc rep_type(s) rep_type_accession(s) relaxase_type(s) relaxase_type_accession(s) mpf_type mpf_type_accession(s) orit_type(s) orit_accession(s) PredictedMobility mash_nearest_neighbor mash_neighbor_distance mash_neighbor_cluster NCBI-HR-rank NCBI-HR-Name LitRepHRPlasmClass LitPredDBHRRank LitPredDBHRRankSciName LitRepHRRankInPubs LitRepHRNameInPubs LitMeanTransferRate LitClosestRefAcc LitClosestRefDonorStrain LitClosestRefRecipientStrain LitClosestRefTransferRate LitClosestConjugTemp LitPMIDs LitPMIDsNumber +plasmid_2719 2 11117 55.03283259872268 - - - - - - - - Non-mobilizable JQ739157 0.0496454 2719 - - - - - - - - - - - - - - - diff -r 000000000000 -r c917ef6807d7 test-data/outdir/plasmid.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outdir/plasmid.fasta Tue Nov 12 22:47:36 2019 -0500 @@ -0,0 +1,4 @@ +>53_length=9674_depth=5.42x +GTTTACTCATACCTAGATTCTACGTCAGTACTTCAAAAAGCATAATCAAAGCCTTGATAAATATGCATTCCTTCGAAATTCAGCTTTCACCCATTGGGTGAAAGAAAAGTGCTCAACATAAAATACTCGAAGAAGATTTAGGCATAGATGTATATTTCTGTGACCCACATTCACCCTGGCAAAAAGGCACATGCGAAAATATGAATGGTTTAATTAGGCAATATTTACCTAAAGGGATTGATTTAAATCAGGCAGATCAGCATTATTTAAATCAAGTTGCCATGTCACTGAATACTCGTCCTAGAAAGGCGTTAGATTGGCTTACACCATTAGAGAAATTTGCTCAGCTTGTTGATTATCATATGGCTTTTGAAACTGTCGCACCTCATGTTTGAATTCGCCCCATATTTTTGCTACAGTGAACCAAATTAAGATCATCTATTTACTAGGCCTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTTGATGGAATTGCCCAATATTATGCACCCGGTCGCGAAGCTGAGCACCGCATTAGCCGCTGCATTGATGCTGAGCGGGTGCATGCCCGGTGAAATCCGCCCGACGATTGGCCAGCAAATGGAAACTGGCGACCAACGGTTTGGCGATCTGGTTTTCCGCCAGCTCGCACCGAATGTCTGGCAGCACACTTCCTATCTCGACATGCCGGGTTTCGGGGCAGTCGCTTCCAACGGTTTGATCGTCAGGGATGGCGGCCGCGTGCTGTTGGTCGATACCGCCTGGACCGATGACCAGACCGCCCAGATCCTCAACTGGATCAAGCAGGAGATCAACCTGCCGGTCGCGCTGGCGGTGGTGACTCACGCGCATCAGGACAAGATGGGCGGTATGGACGCGCTGCATGCGGCGGGGATTGCGACTTATGCCAATGCGTTGTCGAACCAGCTTGCCCCGCAAGAGGGGCTGGTTGCGGCGCAACACAGCCTGACTTTCGCCGCCAATGGCTGGGTCGAACCAGCAACCGCGCCCAACTTTGGCCCGCTCAAGGTATTTTACCCCGGCCCCGGCCACACCAGTGACAATATCACCGTTGGGATCGACGGCACCGACATCGCTTTTGGTGGCTGCCTGATCAAGGACAGCAAGGCCAAGTCGCTCGGCAATCTCGGTGATGCCGACACTGAGCACTACGCCGCGTCAGCGCGCGCGTTTGGTGCGGCGTTCCCCAAGGCCAGCATGATCGTGATGAGCCATTCCGCCCCCGATAGCCGCGCCGCAATCACTCATACGGCCCGCATGGCCGACAAGCTGCGCTGAGCCATGGCTGACCACGTCACCCCCAATCTGCCATCGCGCGATTTCGATGTGACAGAGGCGTTTTATGCGAAGCTGGGCTTTGCGACGAGTTGGAAGGATCGCGGCTGGATGATCCTGCAGCGCGGCGGTTTGCAGCTCGAATTCTTCCCCTATCCTGACCTCGACCCAGCTACGAGCTCGTTCGGCTGTTGCCTGCGGTTGGATGATCTCGATGCCATGGTGGCATTGGTGAACGCGGCGGGAGCCGAGGAAAAAAGCACCGGCTGGCCGCGCTTCAAAGCTCCGCAACTGGAGGCGAGCGGCCTGAGGATCGGCTACCTGATCGATCCCGACTGCACGCTGGTGCGGCTGATCCAGAACCCCGACTGACCGCATGCCCGCGAAAATCAAGATTTGCGGGATCAGCACACCCGAGGCGCTCGATGCGACCATCGCGGCGCGGGCGGACTATGCCGGGTTGGTGTTCTATCCAGCGTCGCCCCGTGCGGTTACGTCGAATGTCGCGGGCGCTTTGACATCGCGCGCAGCTGGCCAGATCGCCATGGTCGGTTTGTTCGTCGATGCGGATGATGCTGTCATCGCCGACGCACTGGTGGCAGCCAAGCTGAACGCGCTGCAGCTGCACGGTTCGGAATCGCCCGAACGCGTGGCCCAGTTGCGCGCGCGGTTTGGCAAGCCGGTGTGGAAGGCGCTGCCCGTCGCCAGCGCCAGCGATGTCGCACGCGCCGCAGCCTATGCCGGGGCGGCGGACTTGATCTTGTTCGACGCCAAGACCCCCAAAGGCGCGCTGCCCGGCGGCATGGGGTTGGCGTTCGACTGGTCGCTGCTGGCCGGATATCGCGGTGCCTTGCCGTGGGGGCTGGCAGGCGGGCTAAATCCGACGAATGTTGCCGAGGCGATTGCGCGCACCGGAGCGCCGCTGGTCGATACCTCCAGCGGCGTCGAAAGCGCGCCGGGCGTCAAGGATACCGACAAGATTACCAATTTCGCCTTTGCGGTGCGCTTGGCCTAAATCGCGTCGATCAATAGGCGTCGTTCAGCGCAAAGATCGGCTTGCGGGTGCGCCACTGCCCTCGGGTGAAGTCGGGAAAATCTAACGTGCGATTGCCCTCAGCAATCGATTGTTCCGACAGAGGCGTGATCGCGCTCCAGGCCAGCGCGTCGTAAATGTCGATTGGCATCGGGGCCTTGGCCTTCAGCGCCTCGACAAAAGCGTGGATCACGAACCAGTCCATCCCGCCATGCCCGGCCCCTGCCGCCAGATCGGCGTAGCGTTTCCATAGCGGGTGATCGTATTTCGCAAACCAGCCCTCGGCAGGCTCCCAGCGGTGCGGCTGTGGGCTCTTGCCCTCCAGATAGATCGACTTGTTGACGTCCATCCACAGCCCCTCGGTGCCTTGCACCCGAAAGCCGAGAGAATAGGGGCGCGGCAGCGAGGTGTCGTGGCACAGCATGATCGTTTCACCATTAGTGCAGCCGATCATGGTGTTGACCACATCACCCAGTGCGAATTTCACCTCGGCGTTGGGATGATCGGCAGAGCCGTTCTTGACGACATAATCATGCAGCCCGCGCGCCTTACAGCCGAAGCCGCCAGCGCCCGCTTCGCCCGGCAACGCGACCTTCAGGGTGCGGGTCTGCGGCGGGTAGCACACGCCGGCATCGGCGCAGCCCTGGTACTTCACGGTCAGGGTGGTCGCGCTCGCGCCGGCCGCGGGCGTGCCGGTGAGGGTGCCGAGCAATTCCTTGCGGTAGGTTTCGACGTCGCCGAAGAATTCGTCGCGGTAGGCCTTGCCCTTCGGCAGCGCCATGGTCGCGCCGGTGAAGGCGGCATCGGCCTTGACCGAGGTGCGGTGCCGGTACAGGTAATAGCCGTCGGCGATCCGCCAGCGCACCTCGATGCGGTCCGGCGCGGTGGCCTGCGCGGACAGGACGAAGACCTCGTCGACCGGCGGCAGTTCGAAGTCCTGGGCGACGGCCGAGGTCGCGGGCAGCGCAAGCAGCAGGGCGAGCCCGGCCAGCCAGCGGCGCAGGCGGATCGTGGATGCGGTCATTGGCTCAGTTTACCGGTCGGCTCTCGGCGGCCAGCCATTGCAGGTATTCGGGCAGGCCGGACGCGGCTTCGACCGCGAGCGGGTATAGGAAGTATAAACCACCTTTTTGCTCCTCATCCGAAGTATCTTACCTGAAATTCCCTCACTCGTTTACCGCTCAAGCCCCAATTTTAACTGCCGGTCCAGCCTAAACCGCTCTAATAAGGTTCGATTTGGCGGTAAAATCTCTAGCCTGATAGCTCGAGAGATACAAACTGCCCCACCGCCCCGTTTAAAAGTTGGCAGTGTTGAGCAGTGTTGGATTTGGGGTCGTCAGTCAAAGAGACGACTCTGTGATGGATCGAACAGGCTGGGAGTCAGTGGCGGCGCTCGTTCTGGTGGCAGCTCACGCTGCTTGGCGGCATTCGCCTTGGCTGTTTTCTGTTTCAGATGCTTGAGAATCTGCTCAATGACCTTCGGATCTTCGATGCTGGCAATCACTTTGACGTGACCGCCGCAGTGTTCGCAGACTTCAATATCAATATTGAAGACTCGCTTGAGGCGTTGCATCCAGGTCATGGCGCGGTGGCGCTCTGCAGGACTCTTGTCACGCCAGTTAGTATCGAGACCTTCCGATTTGTCGGGCTTCTTGCCCCGCTTGGCGGGTGTTACTTGAACTCGGTGTTTGCTGTTCGGTGCAAAGACGCCGTGGAAGCGTGTGAGGTTGACTCGCGGCTTAGGTACCAACGCAGCGAGTTTGGCGATGAAGTCCAGCGGCTCGAAGATCACATGGGTGGTGCCATTGCGGTACGGAGTTTTGAGCTCGTAACGCACCTGCCCATTGGCGGTTAATGCCAGACGTTTTTCTGAAACCGCTGGCCGACTAATGTAGCGACACAAGCGCTCAAGCTTATCCCGCTGATGCGCTTCGGCCATCACACCGGCGTGTAGCGAGAAACCAGCATGGTTGGCTACTCGACTGCTTGAGTCGGCTTTATCCTCACGCCCTGGCAAGGTTTGCAGGGTGAAGACTTTGCGCCCTTGCTGGGGGCCGACGGCAATGCGATACGTAACCGAAGCACCATGTAATTGAGTCAGCGTATCGTCTTCGCCCTCTTCCAGTGTCAACCACGTATTCTCGGCATCACGCTCCAAAATCCCACGCTTTTCCATGCAGCGAGCGATGCGATGGCTGAGGGTGTGAGCGAGCGTATTCAGCTCATCGTAAGTGGGTGCCTTGACACGATGGAAGCGTTGCTTGCCATAGTCATCTTCGGCATAGACACCATCGAGAAACAGCATGTGGTAGTGGACATTGAGATTTAGCGCGGAGCCAAAGCGTTGGATAAGAGTCACTGAGCCAGTTTGTGCAGAGGCTTTGGTGTAACCGGCTTTTTTGATCAGATGAGTTGAGAGTGTACGATAGACGATACTCAAGACCTGGCCCATCAGCTGGGGATGGCGAGCCAGCAAAAAGCGTAGCTGGAAAGGAAAGCTGAGCACCCACTGGCGAATGGGCTCCTTGGGGAAGACTTCGTCTATCAGCAGCGCCGCACTCTCGGCCATCCGGCGGGCACCGCAGCTAGGGCAAAAGCCGCGTCGTTTACAGCTGAAGGCGACCAGACGCTCGTGATGACAATCCTCGCAGCGAACCCGCATGAAACCATACTCCAGACGGCCACATTGGAGGAGGTCGTTGAATTCTTGTTGGATGTAGCGAGGCAGGTGTTGACCTTGGGCTTCGAGTGAGGCTTTGAAGGCTGGGTAGTGCTGCTCAACCAGCTGGTAGAGCAGCGTCTGGTCGGGTTGGTGGCGTTCGTAACCGTTTGTTTGAGTGGGCGATTGACTCGCCGTGGCGTTCCTTGCCAGCGACATGGGTATCCTCCGCTGATACTGTGGTTATGTACAGTATCAGCGGCTTGCGTTCAGACGTCCAGTCTGGCCCTAGACATCGCTAAATGCTTAACCCGCAATAGCCCTCACGAGTTGTTATCAGCCACTACCGGTTGAGCGAGAAGGTTTTGGGTTCAGGGTGCTATTGCTCCACCAATCACAATACTGAAGCCCCAACTGTTATCAGTTGGGGCTTTTTCTTGTCTGTTTGCGGCGGTTGCGTTTTATCGGTAGTCGTCGAGCTCTGCACCATCCCACATAAGAGCTTAACGGTGCGATCTTCAACGCCATCACACAAAACTTTCTTTTTCACGCACAGTCAACTTATTGGATGTTTTATTAACAACCCAAAAGGAGATATTTAGCGGGCGGCCGGAAGGTGAATGCTAGGCATGATCTAACCCTCGGTCTCTGGCGTCGCGACTGCGAAATTTCGCGAGGGTTTCCGAGAAGGTGATTGCGCTTCGCAGATCTCCAGGCGCGTGGGTGCGGACGTAGTCAGCGCCATTGCCGATCGCGTGAAGTTCCGCCGCAAGGCTCGCTGGACCCAGATCCTTTACAGGAAGGCCAACGGTGGCGCCCAAGAAGGATTTCCGCGACACCGAGACCAATAGCGGAAGCCCCAACGCCGACTTCAGCTTTTGAAGGTTCGACAGCACGTGCAGCGATGTTTCCGGTGCGGGGCTCAAGAAAAATCCCATCCCCGGATCGAGGATGAGCCGGTCGGCAGCGACCCCGCTCCGTCGCAAGGCGGAAACCCGCGCCTCGAAGAACCGCACAATCTCGTCGAGCGCGTCTTCGGGTCGAAGGTGACCGGTGCGGGTGGCGATGCCATCCCGCTGCGCTGAGTGCATAACCACCAGCCTGCAGTCCGCCTCAGCAATATCGGGATAGAGCGCAGGGTCAGGAAATCCTTGGATATCGTTCAGGTAGCCCACGCCGCGCTTGAGCGCATAGCGCTGGGTTTCCGGTTGGAAGCTGTCGATTGAAACACGGTGCATCTGATCGGACAGGGCGTCTAAGAGCGGCGCAATACGTCTGATCTCATCGGCCGGCGATACAGGCCTCGCGTCCGGATGGCTGGCGGCCGGTCCGACATCCACGACGTCTGATCCGACTCGCAGCATTTCGATCGCCGCGGTGACAGCGCCGGCGGGGTCTAGCCGCCGGCTCTCATCGAAGAAGGAGTCCTCGGTGAGATTCAGAATGCCGAACACCGTCACCATGGCGTCGGCCTCCGCAGCGACTTCCACGATGGGGATCGGGCGAGCAAAAAGGCAGCAATTATGAGCCCCATACCTACAAAGCCCCACGCATCAAGCTTTTGCCCATGAAGCAACCAGGCAATGGCTGTAATTATGACGACGCCGAGTCCCGACCAGACTGCATAAGCAACACCGACAGGGATGGATTTCAGAACCAGAGAAAGAAAATAAAATGCGATGCCATAACCGATTATGACAACGGCGGAAGGGGCAAGCTTAGTAAAGCCCTCGCTAGATTTTAATGCGGATGTTGCGATTACTTCGCCAACTATTGCGATAACAAGAAAAAGCCAGCCTTTCATGATATATCTCCCAATTTGTGTAGGGCTTATTATGCACGCTTAAAAATAATAAAAGCAGACTTGACCTGATAGTTTGGCTGTGAGCAATTATGTGCTTAGTGCATCTAACGCCGGAGTTAAGCCGCCGCGCGTAGCGCGGTCGGCTTGAACGAATTGTTAGACATCATTTACCAACTGACTTGATGATCTCGCCTTTCACAAAGCGAATAAATTCTTCCAAGTGATCTGCGCGTGAGGCCAAGTGATCTTCTTTTTGTCCCAGATAAGCTTGCTTAGCTTCAAGTAAGACGGGCTGATACTGGGCAGGTAGGCGTTTTATTGCCCAGTCGGCAGCGACATCCTTCGGCGCGATTTTGCCGGTTATTGCGCTGTACCAAATGCGGGACAACGTAAGCACTACATTTCGCTCATCGCCGGCCCAGTCGGGCTGCGAGTTCCATAGCTTCAAGGTTTCCCTCAGCGCCTCGAATAGATCCTGTTCAGGAACCGGGTCAAAGAATTCCTCCGCTGCCGGACCTACCAAGGCAACGCTATGTTCTCTTGCTTTTGTAAGCAGGATAGCTAGATCAATGTCGATCATGGCTGGCTCGAAGATACCCGCAAGAATGTCATTGCGCTGCCATTCTCCAAATTGCAGCTCGCGCTTAGCCGGATAACGCCACGGGATGATGTCGTCATGCACGACAAGGGTGACTTCTATAGCGCGGAGCGTCTCGCTCTCGCCAGGGAAAGCCGAAGCCTCCATAAGATCATTGAGCAATGCTCGCCGCGTCGTTTCATCAAGCTTTACGGCCACAGTAACCAACAAATCAATATCGCTGTATGGCTTCAGGCCGCCATCCACTGCGGAGCCGTACAAATGCACGGCCAGCAACGTTGATTCCAGATGGCGCTCAATGACGCTTAGCACCTCTGATAGTTGGTTCGAAATTTCGATGGTCACCGCTTCCCTCATGATGTCTAACGGGCGAGGTAAGCCGACCGCAGAATGCGGGTCGGCTTGACCGAAATGTTAGAACCAGAAGCCAAAACGGATAACTTGAATTTGGCGACGGGCGCTAACCGTGAAAAAACGCTGCGCCACCGAGGCGGCACAGCACTGCAAAAACGATAGCTGCTTGCGCTTGCTACGCAAGGGCTAGAGGCCAAAAAGACTGAAAACCTGCGCAGCCCATGCAGGCGAAGCCCGGAAAAAAGGCAAAACAGGCACTGAATAACGCCTGAAAAGCTAAATGCCGTTTGAATAAACATGAGCTAAATAAAGCTGGGTTTAAGTGGTGCTAACGTTGGACGTAACGAGAGCCGGAGCGCAGCGGAGGGAACCAAAATGCGCAGCATTTTGGCGTCCCGTTGACGGAATGGTTAGCCGTTTCGACGCGCATAAACGGAGTGGGTGTACGGAATTACAGCTTGAATGGTTTCGGTTGAGACAAGCTCGAATTCTGTTTCGTTGAGCATTGGGAAGAAGGCGTCACCCTCGAAGGTTTGATGTACCTCAGATAGAAACACGCCGTGGGCGTGAGGTAGTGCCAGAGTGTATATCTCAGCTCCGCCCGCGACGTAGAGTTCATTGCCGAGTTCGGATGCCAAAGCGATAGCGTGCGACAGCGTTGAAACAACTACGCAGCCAGTGGCGCGGTAGTTAGCTTGGCGTGAGATTACCAATGTGTGACGGTTCGGTAGAGGCTTGCCGATAGACTCAAAGGTCTTTCGCCCCATGACAACGACTTTTCCCTCAGTGAGTCTGCGAAAAATCTTCTGCTCACCCGGAATTTTCCAGGGGATATTAGGACCATTGCCAATAACCCGATTGGCTCCCATCGCAGCAACGAGATAAATGCGTACTGATTCCGAGTTCATATGGCTAACTTTGTTTTAGGGCGACTGCCCTGCTGCGTAACATCGTTGCTGCTCCATAACATCAAACATCGACCCACGGCGTAACGCGCTTGCTGCTTGGATGCCCGAGGCATAGACTGTACAAAAAAACAGTCATAACAAGCCATGAAAACCGCCACTGCGCCGTTACCACCGCTGCGTTCGGTCAAGGTTCTGGACCAGTTGCGTGAGCGCATACGCTACTTGCATTACAGCTTACCAACCGAACAGGCTTATGTCCACTGGGTTCGTGCCTTCATCCGTTTCCACGGTGTGCGTCACCCGGCAACCTTGGGCAGCAGCGAAGTCGAGGCATTTCTGTCCTGGCTGGCGAACGAGCGCAAGGTTTCGGTCTCCACGCATCGTCAGGCATTGGCGGCCTTGCTGTTCTTCTACGGCAAGGTGCTGTGCACGGATCTGCCCTGGCTTCAGGAGATCGGAAGACCTCGGCCGTCGCGGCGCTTGCCGGTGGTGCTGACCCCGGATGAAGTGGTTCGCATCCTCGGTTTTCTGGAAGGCGAGCATCGTTTGTTCGCCCAGCTTCTGTATGGAACGGGCATGCGGATCAGTGAGGGTTTGCAACTGCGGGTCAAGGATCTGGATTTCGATCACGGCACGATCATCGTGCGGGAGGGCAAGGGCTCCAAGGATCGGGCCTTGATGTTACCCGAGAGCTTGGCACCCAGCCTGCGCGAGCAGCTGTCGCGTGCACGGGCATGGTGGCTGAAGGACCAGGCCGAGGGCCGCAGCGGCGTTGCGCTTCCCGACGCCCTTGAGCGGAAGTATCCGCGCGCCGGGCATTCCTGGCCGTGGTTCTGGGTTTTTGCGCAGCACACGCATTCGACCGATCCACGGAGCGGTGTCGTGCGTCGCCATCACATGTATGACCAGACCTTTCAGCGCGCCTTCAAACGTGCCGTAGAACAAGCAGGCATCACGAAGCCCGCCACACCGCACACCCTCCGCCACTCGTTCGCGACGGCCTTGCTCCGCAG +>93_length=1443_depth=2.74x +TGTACTGACCCCAAAAAGTTGGACAGTTAAACACGAGGCATATAGGTCTGATTCCGATATTCAATTGGAGTCAGACCTTTTAATTTCAGGCTAATTCTTCTGCTGTTGTAGTATTCAATATATTCCGTAACAGCATCCTTCAGTTCGCTTATATTACTGAACTCATCAAGATAAAAACACTCCGACTTTAAGGTTCCAAAGAAACACTCCACCACAGCATTATCCAGACAATTGCCTTTTCTGGACATGCTTTGTTTAATACCATGTTCTTTAAGGATATTTTGATATCTTCTCATACGATACTGCCATCCCTGGTCAGAGTGCAGAACAGGATGCTCGTGAGGATTAAGCTTTTTGAATGCCTGATCGAGCATATTCTCAACCATGTTCATCACTGGTCTTTCCGAAAGGCTGTAAGAAATAACTTCGTTGTTGAAGAGATCTATTACTGGAGACAAATACAGCTTGCGCCCATTGACTGCAAATTCAGTAACATCGGTAACCCACTTCTCGTTTGGCCGCGTAGCCTTGAAATCTCTTTGGAGAACATTAGGGGCGGTTTGCCCTACCTCTCCTCTGTAAGAGCGGTATCGCTTGACCTTAATCGCTGCTTTAAGTGAGAGGGTTCCCATCAGGCGCTGAACAGCTTTATGGTTAATCTGTTTCCCTTCTCGATGAAGAGACAGCGTTACCCTACGGTATCCGTATCGGCCTCTATTCTCGTGATAAATCTCACTAATACGCTTTTTAACGTCCGCATACTTGTCAGGCTTGCTGAGAGCCTTTAGATGATAATAAAACGTACTGCGCGGTATCTCCGCAGCCCTGAGAAGCTCATCAAGAGGATAAAACTGCCTTAGCTCGTTGAGTACTTTCACTTTTTCGTGGGATGAGCTAAGGCTTTCAGCTTTTTTAGATACATAAGCCGCGTTTCAAGAAATCGAACTTGCCTTTCAAGATCCTCAATGCGTCGGTCTTTTGACAGCTCCAATGCTGATGCCGCTTTTTCTGGATCAACTGATATTGCAATGTTTCTTTTGGTGCCAATCTTGAGCGCGCGTAAACCAGCTTCTCCGCGCTCTTCATAGACCTTCAGCCACCTGGCTACAGAACCACTACCAGCAAGCATAAAGTGAGCAGCAGCCTGATTAAGGGACATGTGCTGCTCGATCACAGCTTTCACGACCTTAATACGCAACTCTGGATCAGCACTAACGCCTTTAGGTTTGGGAATTAAACCTTTTTCTCCATGTTTTTCATAGAGGGCAACCCATGTCCTGACCTGGGTTCGGGGGACACCAAAACGTGCCGAGATGATCCTGTAACCATCATCAGTTGTGAAGTAGTGATTCACGACTTCAAGGCGCTTTTCAAAAGGGTATTTTGGCTTTGACATATTAGGGGCTATTCCATTTCATCGTCCAACAAAATGGGTGCAGTACA diff -r 000000000000 -r c917ef6807d7 test-data/outdir/reference_plasmid.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outdir/reference_plasmid.fasta Tue Nov 12 22:47:36 2019 -0500 @@ -0,0 +1,96 @@ +>JQ739157.2 Acinetobacter pittii strain ABCA95 plasmid pABCA95, complete sequence +AAGCTTATAGCAGTGTCACAGATGCGAAAAAGCAATTAAGTGCATATTTTGAGTTTTATAATTTGAAACG +ACCTCATTCGAGTCTAGACAAAATGACACCAAATGAGTTTTACTATGATCAGCTACCCCAACAAAACAAG +GTGGCTTAACTAGAGCGGAATATCACTTATAAATACGCTTTTAGTTGTTCAAACAAGTGGGACCACCTCT +CTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTTGATGGAATTGCCCAATATTATTCAACA +ATTTATTGGAAACAGCGTTTTAGAGCCAAATAAAATTGGTCAGTCGCCATCGGATGTTTATTCTTTTAAT +CGAAATAATGAAACTTTTTTTCTTAAGCGATCTAGCACTTTATATACAGAGACCACATACAGTGTCTCTC +GCGAAGCGAAAATGTTGAGTTGGCTCTCTGAGAAATTAAAGGTGCCTGAACTCATCATGACTTTTCAGGA +TGAGCAGTTTGAATTAATGATCACTAAAGCGATCAATGCAAAACCAATTTCAGCGCTTTTTTTAACAGAC +CAAGAATTGCTTGCTATCTATAAGGAGGCACTCAATCTGTTAAATTCAGTTGCTATTATTGATTGTCCAT +TTATTTCAAACATTGATCATCGGTTAAAAGAGTCAAAATTTTTTATTGATAACCAACTCCTTGACGATAT +AGATCAAGATGATTTTGACGCTGAATTATGGGGAGACCATAGAACTTACCTAAGTCTATGGAATGAGTTA +ACTGAGACTCGTGTTGAAGAAAGATTGGTTTTTTCTCATGGCGATATCACGGATAGTAATATTTTTATAG +ATAAATTCAATGAAATTTACTTTTTAGATCTTGGCCGTGCTGGGTTAGCTGATGAATTTGTAGATATATC +CTTTGTTGAACGTTGCCTAAGAGAGGATGCCTCGGAGGAAACTGCTAAAATATTTTTAAAGCATTTAAAA +AATGATAGACCTGACAAAAGGAATTATTTTTTAAAACTTGATGAATTGAATTGATTCTAAGCATTATCTA +AAAATACTTAATTGTCTTTTAACGTCGCTAAATTTTAAATAAATAAGTGAAGAGTGTTAGTGGAGCCACT +GATTTAAAGTTGGCAGAGTAAAACTTGAAGTGCGACATAAACCACCTAATTAATTTAAAGGGTTTATGGA +GTATATAAAATTGTCATACCATCATCTTAACTTTGAAGATCGTACTGCATTAATGCTTGAGTCAAGAAAA +GAAGGCTTTTCAGCCAGAAAATTTGCTGAACTCATTAAAAGACATCCTAGTACGATCTATCGTGAGCTTA +AAAGAAATAGCATCAATGACGTTTATCAAGCTCGATATGCTTCTGATAACACCTTCGCTAGACGTAGACG +TGGTCACAGAAAACTCAAAATCGATTCAATCCTCTGGAAATTTATTGTTGAAGCGATCCGTTGTTTATGG +TCTCCTCAGCAAATAGCAAAGCGTTTAAAGACATTTCCTGATTTGGATCAAACAATGAATGTAAGCCATA +CAACGATTTATTCAACGATACGAGCATTACCAAAGGGTGAGTTGAAAAAAGACTTATTATCCTGTCTACG +TCATGAAAATAAAAAGCGAAAAGCTAACGGTGAACCTAAAAAAGATTCTATATTACAGGATATTAAAACT +ATTCATGAGCGCCCAGCCGAAGTTCAAGAAAGAAAAATACCGGGTCATTGGGAAGCTGATTTAATTAAAG +GTAAAGACAATAAAAGTTCGATAGCAACACTTATTGAACGAAATACACGGCTCTGTATCTTGGCAACATT +ACCTGATGCAAAGGCAGAATCAGTGCGCAAGGCTTTAACTGAAGCTCTGAAATATTTACCTGCAGAACTG +CGTAAAACGTTGACCTATGACCGTGGACGTGAGATGTCAGAACATAAAATACTCGAAGAAGATTTAGGCA +TAGATGTATATTTCTGTGACCCACATTCACCCTGGCAAAAAGGCACATGCGAAAATATGAATGGTTTAAT +TAGGCAATATTTACCTAAAGGGATTGATTTAAATCAGGCAGATCAGCATTATTTAAATCAAGTTGCCATG +TCACTGAATACTCGTCCTAGAAAGGCGTTAGATTGGCTTACACCATTAGAGAAATTTGCTCAGCTTGTTG +ATTATCATATGGCTTTTGAAACTGTCGCACCTCATGTTTGAATTCGCCCCATATTTTTGCTACAGTGAAC +CAAATTAAGATCATCTATTTACTAGGCCTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTT +GATGGAATTGCCCAATATTATGCACCCGGTCGCGAAGCTGAGCACCGCATTAGCCGCTGCATTGATGCTG +AGCGGGTGCATGCCCGGTGAAATCCGCCCGACGATTGGCCAGCAAATGGAAACTGGCGACCAACGGTTTG +GCGATCTGGTTTTCCGCCAGCTCGCACCGAATGTCTGGCAGCACACTTCCTATCTCGACATGCCGGGTTT +CGGGGCAGTCGCTTCCAACGGTTTGATCGTCAGGGATGGCGGCCGCGTGCTGGTGGTCGATACCGCCTGG +ACCGATGACCAGACCGCCCAGATCCTCAACTGGATCAAGCAGGAGATCAACCTGCCGGTCGCGCTGGCGG +TGGTGACTCACGCGCATCAGGACAAGATGGGCGGTATGGACGCGCTGCATGCGGCGGGGATTGCGACTTA +TGCCAATGCGTTGTCGAACCAGCTTGCCCCGCAAGAGGGGATGGTTGCGGCGCAACACAGCCTGACTTTC +GCCGCCAATGGCTGGGTCGAACCAGCAACCGCGCCCAACTTTGGCCCGCTCAAGGTATTTTACCCCGGCC +CCGGCCACACCAGTGACAATATCACCGTTGGGATCGACGGCACCGACATCGCTTTTGGTGGCTGCCTGAT +CAAGGACAGCAAGGCCAAGTCGCTCGGCAATCTCGGTGATGCCGACACTGAGCACTACGCCGCGTCAGCG +CGCGCGTTTGGTGCGGCGTTCCCCAAGGCCAGCATGATCGTGATGAGCCATTCCGCCCCCGATAGCCGCG +CCGCAATCACTCATACGGCCCGCATGGCCGACAAGCTGCGCTGAGCCATGGCTGACCACGTCACCCCCAA +TCTGCCATCGCGCGATTTCGATGTGACAGAGGCGTTTTATGCGAAGCTGGGCTTTGCGACGAGTTGGAAG +GATCGCGGCTGGATGATCCTGCAGCGCGGCGGTTTGCAGCTCGAATTCTTCCCCTATCCTGACCTCGACC +CAGCTACGAGCTCGTTCGGCTGTTGCCTGCGGTTGGATGATCTCGATGCCATGGTGGCATTGGTGAACGC +GGCGGGAGCCGAGGAAAAAAGCACCGGCTGGCCGCGCTTCAAAGCTCCGCAACTGGAGGCGAGCGGCCTG +AGGATCGGCTACCTGATCGATCCCGACTGCACGCTGGTGCGGCTGATCCAGAACCCCGACTGACCGCATG +CCCGCGAAAATCAAGATTTGCGGGATCAGCACACCCGAGGCGCTCGATGCGACCATCGCGGCGCGGGCGG +ACTATGCCGGGTTGGTGTTCTATCCAGCGTCGCCCCGTGCGGTTACGTCGAATGTCGCGGGCGCTTTGAC +ATCGCGCGCAGCTGGCCAGATCGCCATGGTCGGTTTGTTCGTCGATGCGGATGATGCTGTCATCGCCGAC +GCACTGGTGGCAGCCAAGCTGAACGCGCTGCAGCTGCACGGTTCGGAATCGCCCGAACGCGTGGCCCAGT +TGCGCGCGCGGTTTGGCAAGCCGGTGTGGAAGGCGCTGCCCGTCGCCAGCGCCAGCGATGTCGCACGCGC +CGCAGCCTATGCCGGGGCGGCGGACTTGATCTTGTTCGACGCCAAGACCCCCAAAGGCGCGCTGCCCGGC +GGCATGGGGTTGGCGTTCGACTGGTCGCTGCTGGCCGGATATCGCGGTGCCTTGCCGTGGGGGCTGGCAG +GCGGGCTAAATCCGACGAATGTTGCCGAGGCGATTGCGCGCACCGGAGCGCCGCTGGTCGATACCTCCAG +CGGCGTCGAAAGCGCGCCGGGCGTCAAGGATACCGACAAGATTACCAATTTCGCCTTTGCGGTGCGCTTG +GCCTAAATCGCGTCGATCAATAGGCGTCGTTCAGCGCAAAGATCGGCTTGCGGGTGCGCCACTGCCCTCG +GGTGAAGTCGGGAAAATCTAACGTGCGATTGCCCTCAGCAATCGATTGTTCCGACAGAGGCGTGATCGCG +CTCCAGGCCAGCGCGTCGTAAATGTCGATTGGCATCGGGGCCTTGGCCTTCAGCGCCTCGACAAAAGCGT +GGATCACGAACCAGTCCATCCCGCCATGCCCGGCCCCTGCCGCCAGATCGGCGTAGCGTTTCCATAGCGG +GTGATCGTATTTCGCAAACCAGCCCTCGGCAGGCTCCCAGCGGTGCGGCTGTGGGCTCTTGCCCTCCAGA +TAGATCGACTTGTTGACGTCCATCCACAGCCCCTCGGTGCCTTGCACCCGAAAGCCGAGAGAATAGGGGC +GCGGCAGCGAGGTGTCGTGGCACAGCATGATCGTTTCACCATTAGTGCAGCCGATCATGGTGTTGACCAC +ATCACCCAGTGCGAATTTCACCTCGGCGTTGGGATGATCGGCAGAGCCGTTCTTGACGACATAATCATGC +AGCCCGCGCGCCTTACAGCCGAAGCCGCCAGCGCCCGCTTCGCCCGGCAACGCGACCTTCAGGGTGCGGG +TCTGCGGCGGGTAGCACACGCCGGCATCGGCGCAGCCCTGGTACTTCACGGTCAGGGTGGTCGCGCTCGC +GCCGGCCGCGGGCGTGCCGGTGAGGGTGCCGAGCAATTCCTTGCGGTAGGTTTCGACGTCGCCGAAGAAT +TCGTCGCGGTAGGCCTTGCCCTTCGGCAGCGCCATGGTCGCGCCGGTGAAGGCGGCATCGGCCTTGACCG +AGGTGCGGTGCCGGTACAGGTAATAGCCGTCGGCGATCCGCCAGCGCACCTCGATGCGGTCCGGCGCGGT +GGCCTGCGCGGACAGGACGAAGACCTCGTCGACCGGCGGCAGTTCGAAGTCCTGGGCGACGGCCGAGGTC +GCGGGCAGCGCAAGCAGCAGGGCGAGCCCGGCCAGCCAGCGGCGCAGGCGGATCGTGGATGCGGTCATTG +GCTCAGTTTACCGGTCGGCTCTCGGCGGCCAGCCATTGCAGGTATTCGGGCAGGCCGGACGCGGCTTCGA +CCGCGAGCAGCTCCGGGAGTTCGTAGGGATATAGTATTTTATTAAATTCTTATGGGAAATGACGAATGTT +AAATTATCTTAAGAGCTTTAATAATATCAATACTTATTTGATTTTATCGATAATTCTGCTGTTAATCATA +ATATCTCTAGATTATTTCTAAACTGAATGAATGTTTATAATGAGTGATTCATATTGCTATTGAAATCGCC +TTCTCACTTTGAAAGAAGGCGAGGATGAGGGACTTTTATGTTGAATTATCATTTTAAAAATGCCTTATAA +AAGAAGCTTAATGTGTTTTCTTATATAGGTTTAAACATAATTGTTGTATATCTTAAATCCAATTGATCTT +AAAATTTTCCTTTATTTTTTGTTATGAGTGCGAGAAAATTGTCAAAAAGGTCAATCAGACTGGGCGTTAA +TTTGTTTTGCATACTTTTTCCTATATCGAATTAAAGTCATATAACTAACACCATAATCTTTAGCTATTTG +AGTGAAAGGGTATGAATCGTCCTTATTTTTAAGGGTATGAATTAACTCTTTTAGTTTTTCTTCTGTAATC +GCAGGCGATCTTCCCTTGTATTTACCTTTCTTTTTTTTAGCTAATTTAATTCCCTCTGCTTGATTCTCAC +TAATAATACCCCTTTCAAGTTCAGCTACAGCGCCTAATACATGGAGTTGAAACTTATCGAACTTGTCATC +TGAATTGGGGGTAAAGTTCAGGTTATTTTTGACAATATGAACAGACACTCCTTTTTTATTTAGCTTTTGA +ACAATGGTTACAAGGTCAATCAAGCTACGTGCCAATCTAAAAACATCATGAGCGTACACAATGTCCCCAC +TACGGACATAATCGAACATTTCCTGAAGTGCAGGGCGTTTGGCAGTCTTTCCGCTAAAATGATCAATAAA +AGTTTTATCTAGCTCAAAGGGTAGATCATGGAGCTGTCTTTCAGGGTTTTGGTCTTTAGTGGATACACGG +ATATACCCCACTCTTTGAAAGGGTGTGTTTTTAATTTGATCTTCAATATCTAAATTTTCTTTTTCCATAA +CCAGTATAACAAAATTAGATAACCTCAATGTTATATCACATTAGATTAACAAAACAACCCTATTGTTATA +GGGTTTTTAGGGTGTATTATTATATAACAATAGGGTATACCCTATTGTTATATATCTTCAGGTATAAGGA +AAAATAACGATGATTAATTTTAATGATCTAAGCGAATCTGAATTATTAAGGATTGCACAGACTGGCATAT +CAAACCGTATAGGATTGCGTACTTCAGGACATTG + diff -r 000000000000 -r c917ef6807d7 test-data/outdir/reference_plasmid.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outdir/reference_plasmid.gbk Tue Nov 12 22:47:36 2019 -0500 @@ -0,0 +1,214 @@ +LOCUS JQ739157 6544 bp DNA circular BCT 26-JUL-2016 +DEFINITION Acinetobacter pittii strain ABCA95 plasmid pABCA95, complete + sequence. +ACCESSION JQ739157 +VERSION JQ739157.2 +KEYWORDS . +SOURCE Acinetobacter pittii + ORGANISM Acinetobacter pittii + Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; + Moraxellaceae; Acinetobacter; Acinetobacter calcoaceticus/baumannii + complex. +REFERENCE 1 (bases 1 to 6544) + AUTHORS Fu,Y., Du,X., Ji,J., Chen,Y., Jiang,Y. and Yu,Y. + TITLE Epidemiological characteristics and genetic structure of blaNDM-1 + in non-baumannii Acinetobacter spp. in China + JOURNAL J. Antimicrob. Chemother. 67 (9), 2114-2122 (2012) + PUBMED 22604448 +REFERENCE 2 (bases 1 to 6544) + AUTHORS Fu,Y., Du,X., Ji,J., Chen,Y., Jiang,Y. and Yu,Y. + TITLE Direct Submission + JOURNAL Submitted (05-MAR-2012) Department of Infectious Diseases, Sir Run + Run Shaw Hospital, Affiliated with School of Medicine, Zhejiang + University, 3 East Qingchun Road, Hangzhou, Zhejiang 310016, China +REFERENCE 3 (bases 1 to 6544) + AUTHORS Fu,Y., Du,X., Ji,J., Chen,Y., Jiang,Y. and Yu,Y. + TITLE Direct Submission + JOURNAL Submitted (12-MAY-2014) Department of Infectious Diseases, Sir Run + Run Shaw Hospital, Affiliated with School of Medicine, Zhejiang + University, 3 East Qingchun Road, Hangzhou, Zhejiang 310016, China + REMARK Sequence update by submitter +COMMENT On May 12, 2014 this sequence version replaced JQ739157.1. +FEATURES Location/Qualifiers + source 1..6544 + /organism="Acinetobacter pittii" + /mol_type="genomic DNA" + /strain="ABCA95" + /db_xref="taxon:48296" + /plasmid="pABCA95" + gene 375..1031 + /gene="aphA6" + misc_feature 375..1031 + /gene="aphA6" + /note="similar to aphA6; aminoglycoside + 3'-phosphotransferase; 5' end of gene is missing" + mobile_element 1129..2215 + /mobile_element_type="insertion sequence:ISAba125" + repeat_region 1138..1154 + /note="IRL" + /rpt_type=inverted + CDS 1186..2211 + /codon_start=1 + /transl_table=11 + /product="Transposase" + /protein_id="AFK73837.1" + /translation="MEYIKLSYHHLNFEDRTALMLESRKEGFSARKFAELIKRHPSTI + YRELKRNSINDVYQARYASDNTFARRRRGHRKLKIDSILWKFIVEAIRCLWSPQQIAK + RLKTFPDLDQTMNVSHTTIYSTIRALPKGELKKDLLSCLRHENKKRKANGEPKKDSIL + QDIKTIHERPAEVQERKIPGHWEADLIKGKDNKSSIATLIERNTRLCILATLPDAKAE + SVRKALTEALKYLPAELRKTLTYDRGREMSEHKILEEDLGIDVYFCDPHSPWQKGTCE + NMNGLIRQYLPKGIDLNQADQHYLNQVAMSLNTRPRKALDWLTPLEKFAQLVDYHMAF + ETVAPHV" + repeat_region 2190..2206 + /note="IRL" + /rpt_type=inverted + gene 2312..3124 + /gene="blaNDM-1" + CDS 2312..3124 + /gene="blaNDM-1" + /codon_start=1 + /transl_table=11 + /product="metallo-beta-lactamase" + /protein_id="AFK73838.1" + /translation="MELPNIMHPVAKLSTALAAALMLSGCMPGEIRPTIGQQMETGDQ + RFGDLVFRQLAPNVWQHTSYLDMPGFGAVASNGLIVRDGGRVLVVDTAWTDDQTAQIL + NWIKQEINLPVALAVVTHAHQDKMGGMDALHAAGIATYANALSNQLAPQEGMVAAQHS + LTFAANGWVEPATAPNFGPLKVFYPGPGHTSDNITVGIDGTDIAFGGCLIKDSKAKSL + GNLGDADTEHYAASARAFGAAFPKASMIVMSHSAPDSRAAITHTARMADKLR" + gene 3128..3493 + /gene="bleo" + CDS 3128..3493 + /gene="bleo" + /note="involved in resistance to bleomycin" + /codon_start=1 + /transl_table=11 + /product="bleomycin resistance protein" + /protein_id="AFK73839.1" + /translation="MADHVTPNLPSRDFDVTEAFYAKLGFATSWKDRGWMILQRGGLQ + LEFFPYPDLDPATSSFGCCLRLDDLDAMVALVNAAGAEEKSTGWPRFKAPQLEASGLR + IGYLIDPDCTLVRLIQNPD" + gene 3498..4133 + /gene="trpF" + misc_feature 3498..4133 + /gene="trpF" + /note="similar to TrpF; phosphoribosyl anthranilate + isomerase; 3' end not present" + gene complement(4150..4830) + /gene="tat" + misc_feature complement(4150..4830) + /gene="tat" + /note="similar to TAT; twin-arginine translocation pathway + signal sequence protein; 5' end of gene is missing." +ORIGIN + 1 aagcttatag cagtgtcaca gatgcgaaaa agcaattaag tgcatatttt gagttttata + 61 atttgaaacg acctcattcg agtctagaca aaatgacacc aaatgagttt tactatgatc + 121 agctacccca acaaaacaag gtggcttaac tagagcggaa tatcacttat aaatacgctt + 181 ttagttgttc aaacaagtgg gaccacctct ctcgcatttg cggggttttt aatgctgaat + 241 aaaaggaaaa cttgatggaa ttgcccaata ttattcaaca atttattgga aacagcgttt + 301 tagagccaaa taaaattggt cagtcgccat cggatgttta ttcttttaat cgaaataatg + 361 aaactttttt tcttaagcga tctagcactt tatatacaga gaccacatac agtgtctctc + 421 gcgaagcgaa aatgttgagt tggctctctg agaaattaaa ggtgcctgaa ctcatcatga + 481 cttttcagga tgagcagttt gaattaatga tcactaaagc gatcaatgca aaaccaattt + 541 cagcgctttt tttaacagac caagaattgc ttgctatcta taaggaggca ctcaatctgt + 601 taaattcagt tgctattatt gattgtccat ttatttcaaa cattgatcat cggttaaaag + 661 agtcaaaatt ttttattgat aaccaactcc ttgacgatat agatcaagat gattttgacg + 721 ctgaattatg gggagaccat agaacttacc taagtctatg gaatgagtta actgagactc + 781 gtgttgaaga aagattggtt ttttctcatg gcgatatcac ggatagtaat atttttatag + 841 ataaattcaa tgaaatttac tttttagatc ttggccgtgc tgggttagct gatgaatttg + 901 tagatatatc ctttgttgaa cgttgcctaa gagaggatgc ctcggaggaa actgctaaaa + 961 tatttttaaa gcatttaaaa aatgatagac ctgacaaaag gaattatttt ttaaaacttg + 1021 atgaattgaa ttgattctaa gcattatcta aaaatactta attgtctttt aacgtcgcta + 1081 aattttaaat aaataagtga agagtgttag tggagccact gatttaaagt tggcagagta + 1141 aaacttgaag tgcgacataa accacctaat taatttaaag ggtttatgga gtatataaaa + 1201 ttgtcatacc atcatcttaa ctttgaagat cgtactgcat taatgcttga gtcaagaaaa + 1261 gaaggctttt cagccagaaa atttgctgaa ctcattaaaa gacatcctag tacgatctat + 1321 cgtgagctta aaagaaatag catcaatgac gtttatcaag ctcgatatgc ttctgataac + 1381 accttcgcta gacgtagacg tggtcacaga aaactcaaaa tcgattcaat cctctggaaa + 1441 tttattgttg aagcgatccg ttgtttatgg tctcctcagc aaatagcaaa gcgtttaaag + 1501 acatttcctg atttggatca aacaatgaat gtaagccata caacgattta ttcaacgata + 1561 cgagcattac caaagggtga gttgaaaaaa gacttattat cctgtctacg tcatgaaaat + 1621 aaaaagcgaa aagctaacgg tgaacctaaa aaagattcta tattacagga tattaaaact + 1681 attcatgagc gcccagccga agttcaagaa agaaaaatac cgggtcattg ggaagctgat + 1741 ttaattaaag gtaaagacaa taaaagttcg atagcaacac ttattgaacg aaatacacgg + 1801 ctctgtatct tggcaacatt acctgatgca aaggcagaat cagtgcgcaa ggctttaact + 1861 gaagctctga aatatttacc tgcagaactg cgtaaaacgt tgacctatga ccgtggacgt + 1921 gagatgtcag aacataaaat actcgaagaa gatttaggca tagatgtata tttctgtgac + 1981 ccacattcac cctggcaaaa aggcacatgc gaaaatatga atggtttaat taggcaatat + 2041 ttacctaaag ggattgattt aaatcaggca gatcagcatt atttaaatca agttgccatg + 2101 tcactgaata ctcgtcctag aaaggcgtta gattggctta caccattaga gaaatttgct + 2161 cagcttgttg attatcatat ggcttttgaa actgtcgcac ctcatgtttg aattcgcccc + 2221 atatttttgc tacagtgaac caaattaaga tcatctattt actaggcctc gcatttgcgg + 2281 ggtttttaat gctgaataaa aggaaaactt gatggaattg cccaatatta tgcacccggt + 2341 cgcgaagctg agcaccgcat tagccgctgc attgatgctg agcgggtgca tgcccggtga + 2401 aatccgcccg acgattggcc agcaaatgga aactggcgac caacggtttg gcgatctggt + 2461 tttccgccag ctcgcaccga atgtctggca gcacacttcc tatctcgaca tgccgggttt + 2521 cggggcagtc gcttccaacg gtttgatcgt cagggatggc ggccgcgtgc tggtggtcga + 2581 taccgcctgg accgatgacc agaccgccca gatcctcaac tggatcaagc aggagatcaa + 2641 cctgccggtc gcgctggcgg tggtgactca cgcgcatcag gacaagatgg gcggtatgga + 2701 cgcgctgcat gcggcgggga ttgcgactta tgccaatgcg ttgtcgaacc agcttgcccc + 2761 gcaagagggg atggttgcgg cgcaacacag cctgactttc gccgccaatg gctgggtcga + 2821 accagcaacc gcgcccaact ttggcccgct caaggtattt taccccggcc ccggccacac + 2881 cagtgacaat atcaccgttg ggatcgacgg caccgacatc gcttttggtg gctgcctgat + 2941 caaggacagc aaggccaagt cgctcggcaa tctcggtgat gccgacactg agcactacgc + 3001 cgcgtcagcg cgcgcgtttg gtgcggcgtt ccccaaggcc agcatgatcg tgatgagcca + 3061 ttccgccccc gatagccgcg ccgcaatcac tcatacggcc cgcatggccg acaagctgcg + 3121 ctgagccatg gctgaccacg tcacccccaa tctgccatcg cgcgatttcg atgtgacaga + 3181 ggcgttttat gcgaagctgg gctttgcgac gagttggaag gatcgcggct ggatgatcct + 3241 gcagcgcggc ggtttgcagc tcgaattctt cccctatcct gacctcgacc cagctacgag + 3301 ctcgttcggc tgttgcctgc ggttggatga tctcgatgcc atggtggcat tggtgaacgc + 3361 ggcgggagcc gaggaaaaaa gcaccggctg gccgcgcttc aaagctccgc aactggaggc + 3421 gagcggcctg aggatcggct acctgatcga tcccgactgc acgctggtgc ggctgatcca + 3481 gaaccccgac tgaccgcatg cccgcgaaaa tcaagatttg cgggatcagc acacccgagg + 3541 cgctcgatgc gaccatcgcg gcgcgggcgg actatgccgg gttggtgttc tatccagcgt + 3601 cgccccgtgc ggttacgtcg aatgtcgcgg gcgctttgac atcgcgcgca gctggccaga + 3661 tcgccatggt cggtttgttc gtcgatgcgg atgatgctgt catcgccgac gcactggtgg + 3721 cagccaagct gaacgcgctg cagctgcacg gttcggaatc gcccgaacgc gtggcccagt + 3781 tgcgcgcgcg gtttggcaag ccggtgtgga aggcgctgcc cgtcgccagc gccagcgatg + 3841 tcgcacgcgc cgcagcctat gccggggcgg cggacttgat cttgttcgac gccaagaccc + 3901 ccaaaggcgc gctgcccggc ggcatggggt tggcgttcga ctggtcgctg ctggccggat + 3961 atcgcggtgc cttgccgtgg gggctggcag gcgggctaaa tccgacgaat gttgccgagg + 4021 cgattgcgcg caccggagcg ccgctggtcg atacctccag cggcgtcgaa agcgcgccgg + 4081 gcgtcaagga taccgacaag attaccaatt tcgcctttgc ggtgcgcttg gcctaaatcg + 4141 cgtcgatcaa taggcgtcgt tcagcgcaaa gatcggcttg cgggtgcgcc actgccctcg + 4201 ggtgaagtcg ggaaaatcta acgtgcgatt gccctcagca atcgattgtt ccgacagagg + 4261 cgtgatcgcg ctccaggcca gcgcgtcgta aatgtcgatt ggcatcgggg ccttggcctt + 4321 cagcgcctcg acaaaagcgt ggatcacgaa ccagtccatc ccgccatgcc cggcccctgc + 4381 cgccagatcg gcgtagcgtt tccatagcgg gtgatcgtat ttcgcaaacc agccctcggc + 4441 aggctcccag cggtgcggct gtgggctctt gccctccaga tagatcgact tgttgacgtc + 4501 catccacagc ccctcggtgc cttgcacccg aaagccgaga gaataggggc gcggcagcga + 4561 ggtgtcgtgg cacagcatga tcgtttcacc attagtgcag ccgatcatgg tgttgaccac + 4621 atcacccagt gcgaatttca cctcggcgtt gggatgatcg gcagagccgt tcttgacgac + 4681 ataatcatgc agcccgcgcg ccttacagcc gaagccgcca gcgcccgctt cgcccggcaa + 4741 cgcgaccttc agggtgcggg tctgcggcgg gtagcacacg ccggcatcgg cgcagccctg + 4801 gtacttcacg gtcagggtgg tcgcgctcgc gccggccgcg ggcgtgccgg tgagggtgcc + 4861 gagcaattcc ttgcggtagg tttcgacgtc gccgaagaat tcgtcgcggt aggccttgcc + 4921 cttcggcagc gccatggtcg cgccggtgaa ggcggcatcg gccttgaccg aggtgcggtg + 4981 ccggtacagg taatagccgt cggcgatccg ccagcgcacc tcgatgcggt ccggcgcggt + 5041 ggcctgcgcg gacaggacga agacctcgtc gaccggcggc agttcgaagt cctgggcgac + 5101 ggccgaggtc gcgggcagcg caagcagcag ggcgagcccg gccagccagc ggcgcaggcg + 5161 gatcgtggat gcggtcattg gctcagttta ccggtcggct ctcggcggcc agccattgca + 5221 ggtattcggg caggccggac gcggcttcga ccgcgagcag ctccgggagt tcgtagggat + 5281 atagtatttt attaaattct tatgggaaat gacgaatgtt aaattatctt aagagcttta + 5341 ataatatcaa tacttatttg attttatcga taattctgct gttaatcata atatctctag + 5401 attatttcta aactgaatga atgtttataa tgagtgattc atattgctat tgaaatcgcc + 5461 ttctcacttt gaaagaaggc gaggatgagg gacttttatg ttgaattatc attttaaaaa + 5521 tgccttataa aagaagctta atgtgttttc ttatataggt ttaaacataa ttgttgtata + 5581 tcttaaatcc aattgatctt aaaattttcc tttatttttt gttatgagtg cgagaaaatt + 5641 gtcaaaaagg tcaatcagac tgggcgttaa tttgttttgc atactttttc ctatatcgaa + 5701 ttaaagtcat ataactaaca ccataatctt tagctatttg agtgaaaggg tatgaatcgt + 5761 ccttattttt aagggtatga attaactctt ttagtttttc ttctgtaatc gcaggcgatc + 5821 ttcccttgta tttacctttc ttttttttag ctaatttaat tccctctgct tgattctcac + 5881 taataatacc cctttcaagt tcagctacag cgcctaatac atggagttga aacttatcga + 5941 acttgtcatc tgaattgggg gtaaagttca ggttattttt gacaatatga acagacactc + 6001 cttttttatt tagcttttga acaatggtta caaggtcaat caagctacgt gccaatctaa + 6061 aaacatcatg agcgtacaca atgtccccac tacggacata atcgaacatt tcctgaagtg + 6121 cagggcgttt ggcagtcttt ccgctaaaat gatcaataaa agttttatct agctcaaagg + 6181 gtagatcatg gagctgtctt tcagggtttt ggtctttagt ggatacacgg atatacccca + 6241 ctctttgaaa gggtgtgttt ttaatttgat cttcaatatc taaattttct ttttccataa + 6301 ccagtataac aaaattagat aacctcaatg ttatatcaca ttagattaac aaaacaaccc + 6361 tattgttata gggtttttag ggtgtattat tatataacaa tagggtatac cctattgtta + 6421 tatatcttca ggtataagga aaaataacga tgattaattt taatgatcta agcgaatctg + 6481 aattattaag gattgcacag actggcatat caaaccgtat aggattgcgt acttcaggac + 6541 attg +// +