changeset 0:7ff266aecf01 draft default tip

planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
author tduigou
date Wed, 11 Jun 2025 09:42:24 +0000
parents
children
files save_to_db.py seq_to_db.xml test-data/HC_Amp_ccdB.gb test-data/p4_Kt-L7Ae-Weiss.gb test-data/p6_Kozak-ATG.gb test-data/p6_Nt-IgKLsequence.gb test-data/p7_L7Ae-Weiss.gb test-data/test-JSON_arg.json test-data/test-JSON_arg_block.json test-data/test_raport.txt verification.py
diffstat 11 files changed, 879 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/save_to_db.py	Wed Jun 11 09:42:24 2025 +0000
@@ -0,0 +1,266 @@
+import subprocess
+import argparse
+import time
+import os
+import socket
+import re
+import json
+from sqlalchemy import create_engine, inspect
+from sqlalchemy.engine.url import make_url
+from sqlalchemy.sql import text
+from sqlalchemy.exc import OperationalError
+
+
+def resolve_parameters(user_params: dict, json_params: dict, keys: list):
+    resolved = {}
+    for key in keys:
+        # Prefer user parameter if it's provided (not None or empty string)
+        if key in user_params and user_params[key]:
+            resolved[key] = user_params[key]
+        else:
+            resolved[key] = json_params.get(f"JSON_{key}")
+    return resolved
+
+
+def fix_db_uri(uri):
+    """Replace __at__ with @ in the URI if needed."""
+    return uri.replace("__at__", "@")
+
+
+def is_port_in_use(uri):
+    """Check if a TCP port is already in use on host."""
+    url = make_url(uri)
+    host = url.host
+    port = url.port
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.settimeout(2)
+        return s.connect_ex((host, port)) == 0
+
+def extract_db_name(uri):
+    """Extract the database name from the SQLAlchemy URI."""
+    url = make_url(uri)
+    return url.database
+
+
+# this fuction is to activate the Docker id the DB is in container. BUT IT IS NOT USED IN MAIN()
+def start_postgres_container(db_name):
+    """Start a PostgreSQL container with the given database name as the container name."""
+    container_name = db_name
+
+    # Check if container is already running
+    container_running = subprocess.run(
+        f"docker ps -q -f name={container_name}", shell=True, capture_output=True, text=True
+    )
+
+    if container_running.stdout.strip():
+        print(f"Container '{container_name}' is already running.")
+        return
+
+    # Check if container exists (stopped)
+    container_exists = subprocess.run(
+        f"docker ps -a -q -f name={container_name}", shell=True, capture_output=True, text=True
+    )
+
+    if container_exists.stdout.strip():
+        print(f"Starting existing container '{container_name}'...")
+        subprocess.run(f"docker start {container_name}", shell=True)
+        print(f"PostgreSQL Docker container '{container_name}' activated.")
+        return
+
+    # If container does not exist, create and start a new one
+    port = 5432 if not is_port_in_use(5432) else 5433
+    postgres_password = os.getenv("POSTGRES_PASSWORD", "RK17")
+
+    start_command = [
+        "docker", "run", "--name", container_name,
+        "-e", f"POSTGRES_PASSWORD={postgres_password}",
+        "-p", f"{port}:5432",
+        "-d", "postgres"
+    ]
+
+    try:
+        subprocess.run(start_command, check=True)
+        print(f"PostgreSQL Docker container '{container_name}' started on port {port}.")
+    except subprocess.CalledProcessError as e:
+        print(f"Failed to start Docker container: {e}")
+
+
+def wait_for_db(uri, timeout=60):
+    """Try connecting to the DB until it works or timeout."""
+    engine = create_engine(uri)
+    start_time = time.time()
+    while time.time() - start_time < timeout:
+        try:
+            with engine.connect():
+                print("Connected to database.")
+                return
+        except OperationalError:
+            print("Database not ready, retrying...")
+            time.sleep(2)
+    raise Exception("Database connection failed after timeout.")
+
+
+def push_gb_annotations(gb_files, sequence_column, annotation_column, db_uri, table_name, fragment_column_name, output, file_name_mapping):
+    """Push GenBank file content into the database if the fragment is not already present."""
+    db_uri = fix_db_uri(db_uri)
+    engine = create_engine(db_uri)
+    inserted_fragments = []
+
+    try:
+        # Parse the file_name_mapping string into a dictionary {base_file_name: fragment_name}
+        file_name_mapping_dict = {
+            os.path.basename(path): os.path.splitext(fragment_name)[0]
+            for mapping in file_name_mapping.split(",")
+            for path, fragment_name in [mapping.split(":")]
+        }
+
+        #print("File name mapping dictionary:")
+        #print(file_name_mapping_dict)  # Debugging: Print the mapping dictionary
+
+        with engine.begin() as connection:
+            inspector = inspect(engine)
+            columns = [col['name'] for col in inspector.get_columns(table_name)]
+
+            if fragment_column_name not in columns:
+                raise ValueError(f"Fragment column '{fragment_column_name}' not found in table '{table_name}'.")
+
+            # Get existing fragments
+            all_rows = connection.execute(text(f"SELECT {fragment_column_name} FROM {table_name}")).fetchall()
+            existing_fragments = {row[0] for row in all_rows}
+
+            insert_rows = []
+
+            for gb_file in gb_files:
+                # Extract base file name (just the file name, not the full path)
+                real_file_name = os.path.basename(gb_file)
+                fragment_name = file_name_mapping_dict.get(real_file_name)
+
+                print(f"Processing file: {real_file_name}({fragment_name})")  # Debugging: Log the current file
+
+                # Get the corresponding fragment name from the mapping
+                fragment_name = file_name_mapping_dict.get(real_file_name)
+
+                if not fragment_name:
+                    raise ValueError(f"Fragment name not found for file '{real_file_name}' in file_name_mapping.")
+
+                # If the fragment is already in the DB, raise an error and stop the process
+                if fragment_name in existing_fragments:
+                    raise RuntimeError(f"Fatal Error: Fragment '{fragment_name}' already exists in DB. Stopping the process.")
+
+                with open(gb_file, "r") as f:
+                    content = f.read()
+
+                origin_match = re.search(r"^ORIGIN.*$", content, flags=re.MULTILINE)
+                if not origin_match:
+                    raise ValueError(f"ORIGIN section not found in file: {gb_file}")
+
+                origin_start = origin_match.start()
+                annotation_text = content[:origin_start].strip()
+                sequence_text = content[origin_start:].strip()
+
+                values = {}
+                values[fragment_column_name] = fragment_name
+                values[annotation_column] = annotation_text
+                values[sequence_column] = sequence_text
+
+                insert_rows.append(values)
+                inserted_fragments.append(fragment_name)
+
+            # Insert the rows into the database
+            for values in insert_rows:
+                col_names = ", ".join(values.keys())
+                placeholders = ", ".join([f":{key}" for key in values.keys()])
+                insert_stmt = text(f"INSERT INTO {table_name} ({col_names}) VALUES ({placeholders})")
+
+                # print(f"Inserting into DB: {values}")  # Debugging print statement
+                connection.execute(insert_stmt, values)
+
+                # print(f"Insert result: {result.rowcount if hasattr(result, 'rowcount') else 'N/A'}")  # Debugging the row count
+
+            print(f"Inserted {len(insert_rows)} fragments.")
+
+            # Write inserted fragment names to a text file
+            with open(output, "w") as log_file:
+                for frag in inserted_fragments:
+                    log_file.write(f"{frag}\n")
+            print(f"Fragment names written to '{output}'.")
+
+    except Exception as e:
+        print(f"Error during GB file insertion: {e}")
+        raise
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Fetch annotations from PostgreSQL database and save as JSON.")
+    parser.add_argument("--input", required=True, help="Input gb files")
+    parser.add_argument("--sequence_column", required=True, help="DB column contains sequence for ganbank file")
+    parser.add_argument("--annotation_column", required=True, help="DB column contains head for ganbank file")
+    parser.add_argument("--db_uri", required=True, help="Database URI connection string")
+    parser.add_argument("--table", required=True, help="Table name in the database")
+    parser.add_argument("--fragment_column", required=True, help="Fragment column name in the database")
+    parser.add_argument("--output", required=True, help="Text report")
+    parser.add_argument("--file_name_mapping", required=True, help="real fragments names")
+    parser.add_argument("--json_conf", required=False, help="JSON config file with DB parameters")
+    args = parser.parse_args()
+
+    # Load JSON config if provided
+    json_config = {}
+    if args.json_conf != 'None' or '':
+        with open(args.json_conf, "r") as f:
+            json_config = json.load(f)
+        if "execution" in json_config and json_config["execution"] == "false":
+            print("Execution was blocked by config (execution = false)")
+            return
+
+    # Prefer user input; fallback to JSON_ values if not provided
+    user_params = {
+        "table": args.table,
+        "sequence_column": args.sequence_column,
+        "annotation_column": args.annotation_column,
+        "fragment_column": args.fragment_column,
+        "db_uri": args.db_uri
+    }
+
+    keys = ["table", "sequence_column", "annotation_column", "fragment_column", "db_uri"]
+    resolved = resolve_parameters(user_params, json_config, keys)
+
+ # Unpack resolved parameters
+    table = resolved["table"]
+    sequence_column = resolved["sequence_column"]
+    annotation_column = resolved["annotation_column"]
+    fragment_column = resolved["fragment_column"]
+    db_uri = fix_db_uri(resolved["db_uri"])
+
+ # Prepare gb files
+    gb_file_list = [f.strip() for f in args.input.split(",") if f.strip()]
+
+ # Start and wait for DB
+    # db_name = extract_db_name(db_uri)
+    # start_postgres_container(db_name)
+    MAX_RETRIES = 3
+    for attempt in range(1, MAX_RETRIES + 1):
+        try:
+            wait_for_db(db_uri)
+            break  # Success
+        except Exception as e:
+            if attempt == MAX_RETRIES:
+                print(f"Attempt {attempt} failed: Could not connect to database at {db_uri}.")
+                raise e
+            else:
+                time.sleep(2)
+
+ # Push annotations
+    push_gb_annotations(
+        gb_file_list,
+        sequence_column,
+        annotation_column,
+        db_uri,
+        table,
+        fragment_column,
+        args.output,
+        args.file_name_mapping
+    )
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seq_to_db.xml	Wed Jun 11 09:42:24 2025 +0000
@@ -0,0 +1,148 @@
+<tool id="seq_to_db" name="Save Sequence Data In DB" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09">
+    <description>Save fragment's sequence in an accessible database and import it from .gb files</description>
+    <macros>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@TOOL_VERSION@">0.1.0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="2.2.3">pandas</requirement>
+        <requirement type="package" version="2.0.40">sqlalchemy</requirement>
+        <requirement type="package" version="2.9.9">psycopg2</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        #set genbank_file_paths = ','.join([str(f) for f in $genbank_files])
+        #set $file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $genbank_files])
+        python '$__tool_directory__/save_to_db.py'
+            --input '$genbank_file_paths' 
+            --sequence_column '$sequence_column'
+            --annotation_column '$annotation_column'
+            --db_uri '$db_uri' 
+            --table '$table' 
+            --fragment_column '$fragment_column'
+            --output '$output'
+            --file_name_mapping '$file_name_mapping'
+            --json_conf '$json_conf'
+    ]]></command>
+    <inputs>
+        <param name="genbank_files" type="data_collection" collection_type="list" format="genbank" label="GenBank File(s)"/>
+        <param name="table" type="text" label="DB Table Name" optional="true" help="It can be extracted from JSON file -key:'JSON_table'-" />
+        <param name="sequence_column" type="text" label="DB Column Contains Sequence For ganbank File" optional="true" help="It can be extracted from JSON file -key:'JSON_sequence_column'-" />
+        <param name="annotation_column" type="text" label="DB Column Contains Annotation For Ganbank File" optional="true" help="It can be extracted from JSON file -key:'JSON_annotation_column'-" />
+        <param name="fragment_column" type="text" label="DB IDs Column Name" optional="true" help="It can be extracted from JSON file -key:'JSON_fragment_column'-" />
+        <param name="db_uri" type="text" label="DB Connection URI" help="postgresql://container_name:password@host:port/path/to/database (It can be extracted from JSON file -key:'JSON_db_uri'-)" optional="true" />
+        <section name='adv' title='Advance' expanded='false'>
+            <param name="json_conf" type="data" format='json' label="DB config as a json file" help="JSON file specifying the database URI, table name and the column names for annotation and sequence data" optional="true" />
+        </section>
+    </inputs>   
+    <outputs>
+        <data name="output" format="txt" label="saving report" />
+    </outputs>
+    <tests>
+    <!--Only 1 test can be execute because the fragment will be already saved for the second test and it will return error as the fragments are present in the DB (execut ../get_db_data/testMock.py to regenerate initial DB)-->
+        <!--test tool blocking from JSON. It is commented because it should faild as it is a test to validate that the tool can be blocked from json file -->
+        <test> 
+            <param name="genbank_files">
+                <collection type="list">
+                    <element name="p7_L7Ae-Weiss" value="p7_L7Ae-Weiss.gb" />
+                    <element name="p7_gfp_sequence" value="p6_Nt-IgKLsequence.gb" />
+                    <element name="p14_CMVp" value="p6_Kozak-ATG.gb" />
+                    <element name="p16_bGHpolyA" value="p4_Kt-L7Ae-Weiss.gb" />
+                    <element name="p18_CMVp" value="HC_Amp_ccdB.gb" />
+                </collection>
+            </param>
+            <param name="adv|json_conf" value="test-JSON_arg_block.json" />
+            <output name="output" file="test_raport.txt" ftype="txt" >
+                <assert_contents>
+                     <has_n_lines n="5" />
+                     <has_line_matching expression="p7_L7Ae-Weiss" />
+                     <has_line_matching expression="p6_Nt-IgKLsequence" />
+                     <has_line_matching expression="p6_Kozak-ATG" />
+                     <has_line_matching expression="p4_Kt-L7Ae-Weiss" />
+                     <has_line_matching expression="HC_Amp_ccdB" />
+                </assert_contents>
+            </output>
+        </test>
+        <!--test DB config in the tool -->
+        <test> 
+            <param name="genbank_files">
+                <collection type="list">
+                    <element name="p7_L7Ae-Weiss" value="p7_L7Ae-Weiss.gb" />
+                    <element name="p7_gfp_sequence" value="p6_Nt-IgKLsequence.gb" />
+                    <element name="p14_CMVp" value="p6_Kozak-ATG.gb" />
+                    <element name="p16_bGHpolyA" value="p4_Kt-L7Ae-Weiss.gb" />
+                    <element name="p18_CMVp" value="HC_Amp_ccdB.gb" />
+                </collection>
+            </param>
+            <param name="table" value="sample" />
+            <param name="sequence_column" value="sequence" />
+            <param name="annotation_column" value="annotation" />
+            <param name="fragment_column" value="fragment" />
+            <param name="db_uri" value="postgresql://postgres:RK17@localhost:5432/test_fragments_db" />
+            <output name="output" file="test_raport.txt" ftype="txt" >
+                <assert_contents>
+                     <has_n_lines n="5" />
+                     <has_line_matching expression="p7_L7Ae-Weiss" />
+                     <has_line_matching expression="p6_Nt-IgKLsequence" />
+                     <has_line_matching expression="p6_Kozak-ATG" />
+                     <has_line_matching expression="p4_Kt-L7Ae-Weiss" />
+                     <has_line_matching expression="HC_Amp_ccdB" />
+                </assert_contents>
+            </output>
+        </test>
+        <!--test DB config from JSON. It is commented because the save can be done only on time then the fragment willl be in the DB and it will return a failure. to run the test comment the test above (one saving test in the run)-->
+        <test> 
+            <param name="genbank_files">
+                <collection type="list">
+                    <element name="p7_L7Ae-Weiss" value="p7_L7Ae-Weiss.gb" />
+                    <element name="p7_gfp_sequence" value="p6_Nt-IgKLsequence.gb" />
+                    <element name="p14_CMVp" value="p6_Kozak-ATG.gb" />
+                    <element name="p16_bGHpolyA" value="p4_Kt-L7Ae-Weiss.gb" />
+                    <element name="p18_CMVp" value="HC_Amp_ccdB.gb" />
+                </collection>
+            </param>
+            <param name="adv|json_conf" value="test-JSON_arg.json" />
+            <output name="output" file="test_raport.txt" ftype="txt" >
+                <assert_contents>
+                     <has_n_lines n="5" />
+                     <has_line_matching expression="p7_L7Ae-Weiss" />
+                     <has_line_matching expression="p6_Nt-IgKLsequence" />
+                     <has_line_matching expression="p6_Kozak-ATG" />
+                     <has_line_matching expression="p4_Kt-L7Ae-Weiss" />
+                     <has_line_matching expression="HC_Amp_ccdB" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+Save Sequence Data In DB
+========================
+
+Implemented a system to save GenBank (.gb) files in an accessible DB, based on a connection via URI requests.
+
+**Parameters**:
+---------------
+* **GenBank File(s)**: List of GenBaks files.
+* **DB Table Name**: Name of the target table in the PostgreSQL database.
+* **DB Column Contains Sequence For ganbank File**: Column storing sequence data, expected to start with "ORIGIN". 
+* **DB Column Contains Annotation For Ganbank File**: Column containing annotation data, to save al part before "ORIGIN" in the .gb file.
+* **DB IDs Column Name**: Column holding the unique fragment IDs.
+* **DB Connection URI**: URI used to connect to the database (e.g., postgresql://postgres:pass@localhost:5432/test_fragments_db).
+* **DB config as a json file**: JSON file contains the DB configuration:
+    - "JSON_table": will be the key to the table name.
+    - "JSON_sequence_column":  will be the key to the sequence column.
+    - "JSON_annotation_column":  will be the key to the annotation column.
+    - "JSON_fragment_column": will be the key to the fragment column.
+    - "JSON_db_uri": will be the key to the URI.
+    - "execution": It is the key to execute or block the tool during a workflow ("True" or "False").
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+            @unpublished{seq_to_db
+                author = {Ramiz Khaled},
+                title = {{seq_to_db}},
+                url = {https://github.com/brsynth/},
+            }
+        </citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/HC_Amp_ccdB.gb	Wed Jun 11 09:42:24 2025 +0000
@@ -0,0 +1,109 @@
+LOCUS       Exported                2721 bp ds-DNA     circular SYN 14-SEP-2017
+DEFINITION  synthetic circular DNA.
+ACCESSION   .
+VERSION     .
+KEYWORDS    HC_Amp_ccdB(1-25).gb
+SOURCE      synthetic DNA construct
+  ORGANISM  synthetic DNA construct
+REFERENCE   1  (bases 1 to 2721)
+  AUTHORS   Trial User
+  TITLE     Direct Submission
+  JOURNAL   Exported Sep 14, 2017 from SnapGene Viewer 4.0.2
+            http://www.snapgene.com
+FEATURES             Location/Qualifiers
+     source          1..2721
+                     /organism="synthetic DNA construct"
+                     /mol_type="other DNA"
+     misc_feature    complement(73..78)
+                     /label=BsmBI
+     misc_feature    129..462
+                     /label=*ccdB promoter*
+     exon            463..768
+                     /label=ccdB
+                     /note="ccdB"
+     terminator      809..880
+                     /note="rrnB T1 terminator
+                     transcription terminator T1 from the E. coli rrnB gene"
+     terminator      896..923
+                     /note="T7Te terminator
+                     phage T7 early transcription terminator"
+     misc_feature    930..942
+                     /label=BioBrick suffix
+                     /note="universal suffix for all parts"
+     misc_feature    943..948
+                     /label=BsmBI
+     terminator      1027..1056
+                     /note="T3Te terminator
+                     phage T3 early transcription terminator"
+     rep_origin      1078..1665
+                     /direction=RIGHT
+                     /label=ori
+                     /note="high-copy-number ColE1/pMB1/pBR322/pUC origin of 
+                     replication"
+     terminator      1677..1704
+                     /note="T7Te terminator
+                     phage T7 early transcription terminator"
+     CDS             complement(1728..2588)
+                     /codon_start=1
+                     /gene="bla"
+                     /product="beta-lactamase"
+                     /note="AmpR
+                     confers resistance to ampicillin, carbenicillin, and 
+                     related antibiotics"
+                     /translation="MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYI
+                     ELDLNSGKILESLRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYS
+                     PVTEKHLTDGMTVRELCSAAITMSDNTAANLLLATIGGPKELTAFLHNMGDHVTRLDRW
+                     EPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGSLLRSA
+                     LPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGAS
+                     LIKHW"
+     promoter        complement(2589..2691)
+                     /note="cat promoter
+                     promoter of the E. coli cat gene"
+ORIGIN
+        1 ctttctgcta tggaggtcag gtatgattta aatggtcagt attgagcgat atctagagaa
+       61 ttcgtcatag gagagacgca atacgcaaac cgcctctccc cgcgcgttgg ccgattcatt
+      121 aatgcaggga tccggcttac taaaagccag ataacagtat gcgtatttgc gcgctgattt
+      181 ttgcggtata agaatatata ctgatatgta tacccgaagt atgtcaaaaa gaggtatgct
+      241 atgaagcagc gtattacagt gacagttgac agcgacagct atcagttgct caaggcatat
+      301 atgatgtcaa tatctccggt ctggtaagca caaccatgca gaatgaagcc cgtcgtctgc
+      361 gtgccgaacg ctggaaagcg gaaaatcagg aagggatggc tgaggtcgcc cggtttattg
+      421 aaatgaacgg ctcttttgct gacgagaaca ggggctggtg aaatgcagtt taaggtttac
+      481 acctataaaa gagagagccg ttatcgtctg tttgtggatg tacagagtga tattattgac
+      541 acgcccgggc gacggatggt gatccccctg gccagtgcac gtctgctgtc agataaagtc
+      601 ccccgtgaac tttacccggt ggtgcatatc ggggatgaaa gctggcgcat gatgaccacc
+      661 gatatggcca gtgtgccggt ctccgttatc ggggaagaag tggctgatct cagccaccgc
+      721 gaaaatgaca tcaaaaacgc cattaacctg atgttctggg gaatataagc tgatagtgct
+      781 agtgtagatc gctactagag ccaggcatca aataaaacga aaggctcagt cgaaagactg
+      841 ggcctttcgt tttatctgtt gtttgtcggt gaacgctctc tactagagtc acactggctc
+      901 accttcgggt gggcctttct gcgtttatat actagtagcg gccgtctcaa cgataacgaa
+      961 ttcaagcttg atatcattca ggacgagcct cagactccag cgtaactgga ctgcaatcaa
+     1021 ctcactggct caccttcacg ggtgggcctt tcttcggtag aaaatcaaag gatcttcttg
+     1081 agatcctttt tttctgcgcg taatctgctg cttgcaaaca aaaaaaccac cgctaccagc
+     1141 ggtggtttgt ttgccggatc aagagctacc aactcttttt ccgaggtaac tggcttcagc
+     1201 agagcgcaga taccaaatac tgttcttcta gtgtagccgt agttaggcca ccacttcaag
+     1261 aactctgtag caccgcctac atacctcgct ctgctaatcc tgttaccagt ggctgctgcc
+     1321 agtggcgata agtcgtgtct taccgggttg gactcaagac gatagttacc ggataaggcg
+     1381 cagcggtcgg gctgaacggg gggttcgtgc acacagccca gcttggagcg aacgacctac
+     1441 accgaactga gatacctaca gcgtgagcta tgagaaagcg ccacgcttcc cgaagggaga
+     1501 aaggcggaca ggtatccggt aagcggcagg gtcggaacag gagagcgcac gagggagctt
+     1561 ccagggggaa acgcctggta tctttatagt cctgtcgggt ttcgccacct ctgacttgag
+     1621 catcgatttt tgtgatgctc gtcagggggg cggagcctat ggaaaaacgc cagcaacgca
+     1681 gaaaggccca cccgaaggtg agccaggtga ttacatttgg gccctcatta ccaatgctta
+     1741 atcagtgagg cacctatctc agcgatctgt ctatttcgtt catccatagt tgcctgactc
+     1801 cccgtcgtgt agataactac gatgcgggag ggcttaccat ctggccccag tgctgcaatg
+     1861 ataccgcgag aaccacgctc accggctcca gatttatcag caataaacca gccagccggg
+     1921 agggccgagc gcagaagtga tcctgcaact ttatccgcct ccatccagtc tattaattgt
+     1981 tgccgggaag ctagagtaag tagttcgcca gttaatagtt tgcgcaacgt tgttgccatt
+     2041 gctacaggca tcgtggtgtc acgctcgtcg tttggtatgg cttcattcag ctccggttcc
+     2101 caacgatcaa ggcgagttac atgatccccc atgttgtgca aaaaagcggt tagctccttc
+     2161 ggtcctccga tcgttgccag aagtaagttg gccgcagtgt tatcactcat ggttatggca
+     2221 gcactgcata attctcttac tgtcatgcca tccgtgagat gcttttctgt gactggtgag
+     2281 tactcaacca agtcattctg agaatagtgt atgcggcgac cgagttgctc ttgcccggcg
+     2341 tcaatacggg ataataccgc gccacatagc agaactttaa aagtgctcat cattggaaaa
+     2401 cgttcttcgg ggcgtaaact ctcaaggatc ttaccgctgt tgagatccag ttcgatgtaa
+     2461 cccactcgtg cacccaactg atcttcagca tcttttactt tcaccagcgt ttctgggtga
+     2521 gcaaaaacag gaaggcaaaa tgccgcaaaa aagggaataa gggcgacacg gaaatgttga
+     2581 atactcattt tagcttcctt agctcctgaa aatctcgata actcaaaaaa tacgcccggt
+     2641 agtgatctta tttcattatg gtgaaagttg gaacctctta cgtgccgatc aagtcaaaag
+     2701 cctccggtcg gaggcttttg a
+//
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/p4_Kt-L7Ae-Weiss.gb	Wed Jun 11 09:42:24 2025 +0000
@@ -0,0 +1,72 @@
+LOCUS       .                       1845 bp    DNA              UNK 01-JAN-1980
+DEFINITION  .
+ACCESSION   <unknown id>
+VERSION     <unknown id>
+KEYWORDS    .
+SOURCE      .
+  ORGANISM  .
+            .
+FEATURES             Location/Qualifiers
+     terminator      392..419
+                     /note="T7Te terminator"
+                     /note="phage T7 early transcription terminator"
+     rep_origin      complement(431..1018)
+                     /direction=LEFT
+                     /note="ori"
+                     /note="high-copy-number ColE1/pMB1/pBR322/pUC origin of
+                     replication"
+     terminator      1040..1069
+                     /note="T3Te terminator"
+                     /note="phage T3 early transcription terminator"
+     misc_feature    1143..1148
+                     /note="BsmBI"
+     source          5..1149
+                     /source="Exported"
+     misc_feature    1156..1182
+                     /note="Kt - L7Ae"
+     source          1154..1185
+                     /source="Exported"
+     misc_feature    complement(1191..1196)
+                     /note="BsmBI"
+     terminator      1263..1294
+                     /note="tonB terminator"
+                     /note="bidirectional E. coli tonB-P14 transcription
+                     terminator"
+     promoter        1295..1397
+                     /note="cat promoter"
+                     /note="promoter of the E. coli cat gene"
+     source          1190..1845
+                     /source="Exported"
+ORIGIN
+        1 ctcaggcgca atcacgaatg aataacggtt tggttggtgc gagtgatttt gatgacgagc
+       61 gtaatggctg gcctgttgaa caagtctgga aagaaatgca taagcttttg ccattctcac
+      121 cggattcagt cgtcactcat ggtgatttct cacttgataa ccttattttt gacgagggga
+      181 aattaatagg ttgtattgat gttggacgag tcggaatcgc agaccgatac caggatcttg
+      241 ccatcctatg gaactgcctc ggtgagtttt ctccttcatt acagaaacgg ctttttcaaa
+      301 aatatggtat tgataatcct gatatgaata aattgcagtt tcacttgatg ctcgatgagt
+      361 ttttctaatg agggcccaaa tgtaatcacc tggctcacct tcgggtgggc ctttctgcgt
+      421 tgctggcgtt tttccatagg ctccgccccc ctgacgagca tcacaaaaat cgatgctcaa
+      481 gtcagaggtg gcgaaacccg acaggactat aaagatacca ggcgtttccc cctggaagct
+      541 ccctcgtgcg ctctcctgtt ccgaccctgc cgcttaccgg atacctgtcc gcctttctcc
+      601 cttcgggaag cgtggcgctt tctcatagct cacgctgtag gtatctcagt tcggtgtagg
+      661 tcgttcgctc caagctgggc tgtgtgcacg aaccccccgt tcagcccgac cgctgcgcct
+      721 tatccggtaa ctatcgtctt gagtccaacc cggtaagaca cgacttatcg ccactggcag
+      781 cagccactgg taacaggatt agcagagcga ggtatgtagg cggtgctaca gagttcttga
+      841 agtggtggcc taactacggc tacactagaa gaacagtatt tggtatctgc gctctgctga
+      901 agccagttac ctcggaaaaa gagttggtag ctcttgatcc ggcaaacaaa ccaccgctgg
+      961 tagcggtggt ttttttgttt gcaagcagca gattacgcgc agaaaaaaag gatctcaaga
+     1021 agatcctttg attttctacc gaagaaaggc ccacccgtga aggtgagcca gtgagttgat
+     1081 tgcagtccag ttacgctgga gtctgaggct cgtcctgaat gatatcaagc ttgaattcgt
+     1141 tacgtctcgg gacaaggatc cgtgatcgga aacgtgagat ccagttccgc gagacgaaga
+     1201 cgaattctct agatatcgct caatactgac catttaaatc atacctgacc tccatagcag
+     1261 aaagtcaaaa gcctccgacc ggaggctttt gacttgatcg gcacgtaaga ggttccaact
+     1321 ttcaccataa tgaaataaga tcactaccgg gcgtattttt tgagttatcg agattttcag
+     1381 gagctaagga agctaaaatg agccatattc aacgggaaac gtcttgctcg aggccgcgat
+     1441 taaattccaa catggatgct gatttatatg ggtataaatg ggctcgcgat aatgtcgggc
+     1501 aatcaggtgc gacaatctat cgattgtatg ggaagcccga tgcgccagag ttgtttctga
+     1561 aacatggcaa aggtagcgtt gccaatgatg ttacagatga gatggtcagg ctaaactggc
+     1621 tgacggaatt tatgcctctt ccgaccatca agcattttat ccgtactcct gatgatgcat
+     1681 ggttactcac cactgcgatc ccagggaaaa cagcattcca ggtattagaa gaatatcctg
+     1741 attcaggtga aaatattgtt gatgcgctgg cagtgttcct gcgccggttg cattcgattc
+     1801 ctgtttgtaa ttgtcctttt aacggcgatc gcgtatttcg tctcg
+//
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/p6_Kozak-ATG.gb	Wed Jun 11 09:42:24 2025 +0000
@@ -0,0 +1,76 @@
+LOCUS       .                       1856 bp    DNA              UNK 01-JAN-1980
+DEFINITION  .
+ACCESSION   <unknown id>
+VERSION     <unknown id>
+KEYWORDS    .
+SOURCE      .
+  ORGANISM  .
+            .
+FEATURES             Location/Qualifiers
+     terminator      392..419
+                     /note="T7Te terminator"
+                     /note="phage T7 early transcription terminator"
+     rep_origin      complement(431..1018)
+                     /direction=LEFT
+                     /note="ori"
+                     /note="high-copy-number ColE1/pMB1/pBR322/pUC origin of
+                     replication"
+     terminator      1040..1069
+                     /note="T3Te terminator"
+                     /note="phage T3 early transcription terminator"
+     misc_feature    1143..1148
+                     /note="BsmBI"
+     source          5..1149
+                     /source="Exported"
+     misc_feature    1184..1195
+                     /note="Kozak"
+     CDS             1192..1194
+                     /codon_start=1
+                     /note="ATG"
+                     /translation="M"
+     source          1154..1196
+                     /source="Exported"
+     misc_feature    complement(1202..1207)
+                     /note="BsmBI"
+     terminator      1274..1305
+                     /note="tonB terminator"
+                     /note="bidirectional E. coli tonB-P14 transcription
+                     terminator"
+     promoter        1306..1408
+                     /note="cat promoter"
+                     /note="promoter of the E. coli cat gene"
+     source          1201..1856
+                     /source="Exported"
+ORIGIN
+        1 ctcaggcgca atcacgaatg aataacggtt tggttggtgc gagtgatttt gatgacgagc
+       61 gtaatggctg gcctgttgaa caagtctgga aagaaatgca taagcttttg ccattctcac
+      121 cggattcagt cgtcactcat ggtgatttct cacttgataa ccttattttt gacgagggga
+      181 aattaatagg ttgtattgat gttggacgag tcggaatcgc agaccgatac caggatcttg
+      241 ccatcctatg gaactgcctc ggtgagtttt ctccttcatt acagaaacgg ctttttcaaa
+      301 aatatggtat tgataatcct gatatgaata aattgcagtt tcacttgatg ctcgatgagt
+      361 ttttctaatg agggcccaaa tgtaatcacc tggctcacct tcgggtgggc ctttctgcgt
+      421 tgctggcgtt tttccatagg ctccgccccc ctgacgagca tcacaaaaat cgatgctcaa
+      481 gtcagaggtg gcgaaacccg acaggactat aaagatacca ggcgtttccc cctggaagct
+      541 ccctcgtgcg ctctcctgtt ccgaccctgc cgcttaccgg atacctgtcc gcctttctcc
+      601 cttcgggaag cgtggcgctt tctcatagct cacgctgtag gtatctcagt tcggtgtagg
+      661 tcgttcgctc caagctgggc tgtgtgcacg aaccccccgt tcagcccgac cgctgcgcct
+      721 tatccggtaa ctatcgtctt gagtccaacc cggtaagaca cgacttatcg ccactggcag
+      781 cagccactgg taacaggatt agcagagcga ggtatgtagg cggtgctaca gagttcttga
+      841 agtggtggcc taactacggc tacactagaa gaacagtatt tggtatctgc gctctgctga
+      901 agccagttac ctcggaaaaa gagttggtag ctcttgatcc ggcaaacaaa ccaccgctgg
+      961 tagcggtggt ttttttgttt gcaagcagca gattacgcgc agaaaaaaag gatctcaaga
+     1021 agatcctttg attttctacc gaagaaaggc ccacccgtga aggtgagcca gtgagttgat
+     1081 tgcagtccag ttacgctgga gtctgaggct cgtcctgaat gatatcaagc ttgaattcgt
+     1141 tacgtctcgc cagaaccgtc agatccgcta gagattacgc caaccgccac catgggcagc
+     1201 cgagacgaag acgaattctc tagatatcgc tcaatactga ccatttaaat catacctgac
+     1261 ctccatagca gaaagtcaaa agcctccgac cggaggcttt tgacttgatc ggcacgtaag
+     1321 aggttccaac tttcaccata atgaaataag atcactaccg ggcgtatttt ttgagttatc
+     1381 gagattttca ggagctaagg aagctaaaat gagccatatt caacgggaaa cgtcttgctc
+     1441 gaggccgcga ttaaattcca acatggatgc tgatttatat gggtataaat gggctcgcga
+     1501 taatgtcggg caatcaggtg cgacaatcta tcgattgtat gggaagcccg atgcgccaga
+     1561 gttgtttctg aaacatggca aaggtagcgt tgccaatgat gttacagatg agatggtcag
+     1621 gctaaactgg ctgacggaat ttatgcctct tccgaccatc aagcatttta tccgtactcc
+     1681 tgatgatgca tggttactca ccactgcgat cccagggaaa acagcattcc aggtattaga
+     1741 agaatatcct gattcaggtg aaaatattgt tgatgcgctg gcagtgttcc tgcgccggtt
+     1801 gcattcgatt cctgtttgta attgtccttt taacggcgat cgcgtatttc gtctcg
+//
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/p6_Nt-IgKLsequence.gb	Wed Jun 11 09:42:24 2025 +0000
@@ -0,0 +1,83 @@
+LOCUS       .                       1886 bp    DNA              UNK 01-JAN-1980
+DEFINITION  .
+ACCESSION   <unknown id>
+VERSION     <unknown id>
+KEYWORDS    .
+SOURCE      .
+  ORGANISM  .
+            .
+FEATURES             Location/Qualifiers
+     terminator      392..419
+                     /note="T7Te terminator"
+                     /note="phage T7 early transcription terminator"
+     rep_origin      complement(431..1018)
+                     /direction=LEFT
+                     /note="ori"
+                     /note="high-copy-number ColE1/pMB1/pBR322/pUC origin of
+                     replication"
+     terminator      1040..1069
+                     /note="T3Te terminator"
+                     /note="phage T3 early transcription terminator"
+     misc_feature    1143..1148
+                     /note="BsmBI"
+     source          5..1149
+                     /source="Exported"
+     misc_feature    1154..1165
+                     /note="KozaK"
+     CDS             1162..1164
+                     /codon_start=1
+                     /note="ATG"
+                     /translation="M"
+     CDS             1165..1224
+                     /codon_start=1
+                     /note="Ig-kappa leader"
+                     /product="leader sequence from mouse immunoglobulin kappa
+                     light chain"
+                     /translation="ETDTLLLWVLLLWVPGSTGD"
+     source          1154..1226
+                     /source="Exported"
+     misc_feature    complement(1232..1237)
+                     /note="BsmBI"
+     terminator      1304..1335
+                     /note="tonB terminator"
+                     /note="bidirectional E. coli tonB-P14 transcription
+                     terminator"
+     promoter        1336..1438
+                     /note="cat promoter"
+                     /note="promoter of the E. coli cat gene"
+     source          1231..1886
+                     /source="Exported"
+ORIGIN
+        1 ctcaggcgca atcacgaatg aataacggtt tggttggtgc gagtgatttt gatgacgagc
+       61 gtaatggctg gcctgttgaa caagtctgga aagaaatgca taagcttttg ccattctcac
+      121 cggattcagt cgtcactcat ggtgatttct cacttgataa ccttattttt gacgagggga
+      181 aattaatagg ttgtattgat gttggacgag tcggaatcgc agaccgatac caggatcttg
+      241 ccatcctatg gaactgcctc ggtgagtttt ctccttcatt acagaaacgg ctttttcaaa
+      301 aatatggtat tgataatcct gatatgaata aattgcagtt tcacttgatg ctcgatgagt
+      361 ttttctaatg agggcccaaa tgtaatcacc tggctcacct tcgggtgggc ctttctgcgt
+      421 tgctggcgtt tttccatagg ctccgccccc ctgacgagca tcacaaaaat cgatgctcaa
+      481 gtcagaggtg gcgaaacccg acaggactat aaagatacca ggcgtttccc cctggaagct
+      541 ccctcgtgcg ctctcctgtt ccgaccctgc cgcttaccgg atacctgtcc gcctttctcc
+      601 cttcgggaag cgtggcgctt tctcatagct cacgctgtag gtatctcagt tcggtgtagg
+      661 tcgttcgctc caagctgggc tgtgtgcacg aaccccccgt tcagcccgac cgctgcgcct
+      721 tatccggtaa ctatcgtctt gagtccaacc cggtaagaca cgacttatcg ccactggcag
+      781 cagccactgg taacaggatt agcagagcga ggtatgtagg cggtgctaca gagttcttga
+      841 agtggtggcc taactacggc tacactagaa gaacagtatt tggtatctgc gctctgctga
+      901 agccagttac ctcggaaaaa gagttggtag ctcttgatcc ggcaaacaaa ccaccgctgg
+      961 tagcggtggt ttttttgttt gcaagcagca gattacgcgc agaaaaaaag gatctcaaga
+     1021 agatcctttg attttctacc gaagaaaggc ccacccgtga aggtgagcca gtgagttgat
+     1081 tgcagtccag ttacgctgga gtctgaggct cgtcctgaat gatatcaagc ttgaattcgt
+     1141 tacgtctcgc cagccgccac catggaaaca gacacactgc tgctatgggt actgctgctc
+     1201 tgggttccag gttccactgg tgacagcagc cgagacgaag acgaattctc tagatatcgc
+     1261 tcaatactga ccatttaaat catacctgac ctccatagca gaaagtcaaa agcctccgac
+     1321 cggaggcttt tgacttgatc ggcacgtaag aggttccaac tttcaccata atgaaataag
+     1381 atcactaccg ggcgtatttt ttgagttatc gagattttca ggagctaagg aagctaaaat
+     1441 gagccatatt caacgggaaa cgtcttgctc gaggccgcga ttaaattcca acatggatgc
+     1501 tgatttatat gggtataaat gggctcgcga taatgtcggg caatcaggtg cgacaatcta
+     1561 tcgattgtat gggaagcccg atgcgccaga gttgtttctg aaacatggca aaggtagcgt
+     1621 tgccaatgat gttacagatg agatggtcag gctaaactgg ctgacggaat ttatgcctct
+     1681 tccgaccatc aagcatttta tccgtactcc tgatgatgca tggttactca ccactgcgat
+     1741 cccagggaaa acagcattcc aggtattaga agaatatcct gattcaggtg aaaatattgt
+     1801 tgatgcgctg gcagtgttcc tgcgccggtt gcattcgatt cctgtttgta attgtccttt
+     1861 taacggcgat cgcgtatttc gtctcg
+//
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/p7_L7Ae-Weiss.gb	Wed Jun 11 09:42:24 2025 +0000
@@ -0,0 +1,82 @@
+LOCUS       .                       2169 bp    DNA              UNK 01-JAN-1980
+DEFINITION  .
+ACCESSION   <unknown id>
+VERSION     <unknown id>
+KEYWORDS    .
+SOURCE      .
+  ORGANISM  .
+            .
+FEATURES             Location/Qualifiers
+     terminator      392..419
+                     /note="T7Te terminator"
+                     /note="phage T7 early transcription terminator"
+     rep_origin      complement(431..1018)
+                     /direction=LEFT
+                     /note="ori"
+                     /note="high-copy-number ColE1/pMB1/pBR322/pUC origin of
+                     replication"
+     terminator      1040..1069
+                     /note="T3Te terminator"
+                     /note="phage T3 early transcription terminator"
+     misc_feature    1143..1148
+                     /note="BsmBI"
+     source          5..1149
+                     /source="Exported"
+     CDS             1154..1507
+                     /codon_start=1
+                     /note="L7Ae (Weiss)"
+                     /translation="YVRFEVPEDMQNEALSLLEKVRESGKVKKGTNETTKAVERGLAKL
+                     VYIAEDVDPPEIVAHLPLLCEEKNVPYIYVKSKNDLGRAVGIEVPCASAAIINEGELRK
+                     ELGSLVEKIKGLQK"
+     source          1154..1509
+                     /source="Exported"
+     misc_feature    complement(1515..1520)
+                     /note="BsmBI"
+     terminator      1587..1618
+                     /note="tonB terminator"
+                     /note="bidirectional E. coli tonB-P14 transcription
+                     terminator"
+     promoter        1619..1721
+                     /note="cat promoter"
+                     /note="promoter of the E. coli cat gene"
+     source          1514..2169
+                     /source="Exported"
+ORIGIN
+        1 ctcaggcgca atcacgaatg aataacggtt tggttggtgc gagtgatttt gatgacgagc
+       61 gtaatggctg gcctgttgaa caagtctgga aagaaatgca taagcttttg ccattctcac
+      121 cggattcagt cgtcactcat ggtgatttct cacttgataa ccttattttt gacgagggga
+      181 aattaatagg ttgtattgat gttggacgag tcggaatcgc agaccgatac caggatcttg
+      241 ccatcctatg gaactgcctc ggtgagtttt ctccttcatt acagaaacgg ctttttcaaa
+      301 aatatggtat tgataatcct gatatgaata aattgcagtt tcacttgatg ctcgatgagt
+      361 ttttctaatg agggcccaaa tgtaatcacc tggctcacct tcgggtgggc ctttctgcgt
+      421 tgctggcgtt tttccatagg ctccgccccc ctgacgagca tcacaaaaat cgatgctcaa
+      481 gtcagaggtg gcgaaacccg acaggactat aaagatacca ggcgtttccc cctggaagct
+      541 ccctcgtgcg ctctcctgtt ccgaccctgc cgcttaccgg atacctgtcc gcctttctcc
+      601 cttcgggaag cgtggcgctt tctcatagct cacgctgtag gtatctcagt tcggtgtagg
+      661 tcgttcgctc caagctgggc tgtgtgcacg aaccccccgt tcagcccgac cgctgcgcct
+      721 tatccggtaa ctatcgtctt gagtccaacc cggtaagaca cgacttatcg ccactggcag
+      781 cagccactgg taacaggatt agcagagcga ggtatgtagg cggtgctaca gagttcttga
+      841 agtggtggcc taactacggc tacactagaa gaacagtatt tggtatctgc gctctgctga
+      901 agccagttac ctcggaaaaa gagttggtag ctcttgatcc ggcaaacaaa ccaccgctgg
+      961 tagcggtggt ttttttgttt gcaagcagca gattacgcgc agaaaaaaag gatctcaaga
+     1021 agatcctttg attttctacc gaagaaaggc ccacccgtga aggtgagcca gtgagttgat
+     1081 tgcagtccag ttacgctgga gtctgaggct cgtcctgaat gatatcaagc ttgaattcgt
+     1141 tacgtctcgc agctacgtga gatttgaggt tcctgaggac atgcagaacg aagctctgag
+     1201 tctgctggag aaggttaggg agagcggtaa ggtaaagaaa ggtaccaacg aaacgacaaa
+     1261 ggctgtggag aggggactgg caaagctcgt ttacatcgca gaggatgttg acccgcctga
+     1321 gatcgttgct catctgcccc tcctctgcga ggagaagaat gtgccgtaca tttacgttaa
+     1381 aagcaagaac gaccttggaa gggctgtggg cattgaggtg ccatgcgctt cggcagcgat
+     1441 aatcaacgag ggagagctga gaaaggagct tggaagcctt gtggagaaga ttaaaggcct
+     1501 tcagaagtca ggccgagacg aagacgaatt ctctagatat cgctcaatac tgaccattta
+     1561 aatcatacct gacctccata gcagaaagtc aaaagcctcc gaccggaggc ttttgacttg
+     1621 atcggcacgt aagaggttcc aactttcacc ataatgaaat aagatcacta ccgggcgtat
+     1681 tttttgagtt atcgagattt tcaggagcta aggaagctaa aatgagccat attcaacggg
+     1741 aaacgtcttg ctcgaggccg cgattaaatt ccaacatgga tgctgattta tatgggtata
+     1801 aatgggctcg cgataatgtc gggcaatcag gtgcgacaat ctatcgattg tatgggaagc
+     1861 ccgatgcgcc agagttgttt ctgaaacatg gcaaaggtag cgttgccaat gatgttacag
+     1921 atgagatggt caggctaaac tggctgacgg aatttatgcc tcttccgacc atcaagcatt
+     1981 ttatccgtac tcctgatgat gcatggttac tcaccactgc gatcccaggg aaaacagcat
+     2041 tccaggtatt agaagaatat cctgattcag gtgaaaatat tgttgatgcg ctggcagtgt
+     2101 tcctgcgccg gttgcattcg attcctgttt gtaattgtcc ttttaacggc gatcgcgtat
+     2161 ttcgtctcg
+//
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-JSON_arg.json	Wed Jun 11 09:42:24 2025 +0000
@@ -0,0 +1,9 @@
+{
+    "JSON_table": "sample",
+    "JSON_sequence_column": "sequence",
+    "JSON_annotation_column": "annotation",
+    "JSON_fragment_column": "fragment",
+    "JSON_db_uri": "postgresql://postgres:RK17@localhost:5432/test_fragments_db",
+    "execution": "true"
+}
+  
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-JSON_arg_block.json	Wed Jun 11 09:42:24 2025 +0000
@@ -0,0 +1,9 @@
+{
+    "JSON_table": "sample",
+    "JSON_sequence_column": "sequence",
+    "JSON_annotation_column": "annotation",
+    "JSON_fragment_column": "fragment",
+    "JSON_db_uri": "postgresql://postgres:RK17@localhost:5432/test_fragments_db",
+    "execution": "false"
+}
+  
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_raport.txt	Wed Jun 11 09:42:24 2025 +0000
@@ -0,0 +1,5 @@
+p7_L7Ae-Weiss
+p6_Nt-IgKLsequence
+p6_Kozak-ATG
+p4_Kt-L7Ae-Weiss
+HC_Amp_ccdB
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/verification.py	Wed Jun 11 09:42:24 2025 +0000
@@ -0,0 +1,20 @@
+from sqlalchemy import create_engine, text
+
+db_uri = "postgresql://postgres:RK17@localhost:5432/test_fragments_db"  # adapt with your URI's DB
+engine = create_engine(db_uri)
+
+with engine.connect() as conn:
+    result = conn.execute(text("""
+        SELECT fragment, sequence, annotation
+        FROM sample
+        ORDER BY fragment
+    """))
+
+    print("Full contents of fragments in DB:\n")
+    for row in result:
+        print(f" Fragment: {row.fragment}")
+        print(" Sequence:")
+        print(row.sequence)
+        print("\n Annotation:")
+        print(row.annotation)
+        print("-" * 80)