# HG changeset patch
# User estrain
# Date 1705544550 0
# Node ID 56271dcbc91c5e5f768f9fb0665e72e54ca46e48
Uploaded
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/data_manager/data_manager_mlst.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/data_manager/data_manager_mlst.py	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,108 @@
+import os
+import subprocess
+import shutil
+import json
+import argparse
+import datetime
+import requests
+
+def download_pubmlst_databases():
+    """Download databases from pubmlst."""
+    try:
+        subprocess.run(["cp", "-R", "/mnt/data/mlst/db","pubmlst"], check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Error downloading databases: {e}")
+        sys.exit(1)
+
+def make_blast_database(output_directory):
+    """Create a BLAST database from downloaded data."""
+    #dir_path = os.path.dirname(os.path.realpath(__file__))
+    dir_path = os.getcwd()
+    mlst_dir = os.path.join(dir_path, "pubmlst")
+    output_directory = os.path.abspath(output_directory)
+    output_mlst_dir = os.path.join(output_directory, "pubmlst")
+
+    if os.path.exists(output_mlst_dir):
+        shutil.rmtree(output_mlst_dir)
+    shutil.move(mlst_dir, output_mlst_dir)
+
+    blast_dir = os.path.join(output_directory, "blast")
+    os.makedirs(blast_dir, exist_ok=True)
+
+    blast_file = os.path.join(blast_dir, "mlst.fa")
+    for scheme_dir in [d for d in os.listdir(output_mlst_dir) if os.path.isdir(os.path.join(output_mlst_dir, d))]:
+        scheme = os.path.basename(scheme_dir)
+        with open(blast_file, 'a') as outfile:
+            for file_name in os.listdir(os.path.join(output_mlst_dir, scheme_dir)):
+                if file_name.endswith('.tfa'):
+                    with open(os.path.join(output_mlst_dir, scheme_dir, file_name), 'r') as infile:
+                        for line in infile:
+                            if 'not a locus' not in line:
+                                if line.startswith('>'):
+                                    outfile.write(f">{scheme}.{line[1:]}")
+                                else:
+                                    outfile.write(line)
+
+    try:
+        subprocess.run(["makeblastdb", "-hash_index", "-in", blast_file, "-dbtype", "nucl", "-title", "PubMLST", "-parse_seqids"], check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Error creating BLAST database: {e}")
+        sys.exit(1)
+
+def write_json(version, args_path, args_name, args_out):
+    """Write data table entry to JSON file."""
+    data_table_entry = {
+        'data_tables': {
+            'mlst': [
+                {
+                    "value": version,
+                    "name": args_name,
+                    "path": args_path,
+                }
+            ]
+        }
+    }
+
+    with open(args_out, 'w') as fh:
+        json.dump(data_table_entry, fh, indent=2, sort_keys=True)
+
+def main():
+    parser = argparse.ArgumentParser(description='Download and process pubmlst databases')
+    parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
+    args = parser.parse_args()
+
+    with open(args.out[0]) as fh:
+        params = json.load(fh)
+
+    output_directory = params['output_data'][0]['extra_files_path']
+    if not os.path.exists(output_directory):
+        os.makedirs(output_directory)
+
+    download_pubmlst_databases()
+    make_blast_database(output_directory)
+
+    
+    url = 'https://raw.githubusercontent.com/tseemann/mlst/master/db/scheme_species_map.tab'
+
+    # Send a GET request to the URL
+    response = requests.get(url)
+
+    # Check if the request was successful
+    if response.status_code == 200:
+      with open('scheme_species_map.tab', 'w') as file:
+        file.write(response.text)
+      print("File downloaded successfully")
+    else:
+      print("Failed to retrieve the file")
+
+    stab = "scheme_species_map.tab"
+    shutil.copy(stab,output_directory) 
+ 
+    datetime_str = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    tablename = f"mlst_database_{datetime_str}"
+
+    write_json(tablename, output_directory, tablename, args.out[0])
+
+if __name__ == "__main__":
+    main()
+
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/data_manager/data_manager_mlst.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/data_manager/data_manager_mlst.xml	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,30 @@
+
+    
+        blast
+        mlst
+    
+    
+    
+    
+    
+        
+    
+    
+        
+        
+    
+    
+    
+    
+      
+       @UNPUBLISHED{Seemann2016,
+         author = "Seemann T",
+         title = "MLST: Scan contig files against PubMLST typing schemes",
+         year = "2016",
+         url = {https://github.com/tseemann/mlst}
+       }
+      
+    
+
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/data_manager_conf.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/data_manager_conf.xml	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,19 @@
+
+
+    
+      
+        
+      
+    
+
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/test-data/mlst.loc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/test-data/mlst.loc	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,6 @@
+# this is a tab separated file describing the location of mlst databases
+#
+# the columns are:
+# value name path
+#
+# for example
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/tool-data/mlst.loc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/tool-data/mlst.loc	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,6 @@
+# this is a tab separated file describing the location of mlst databases
+#
+# the columns are:
+# value name path
+#
+# for example
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/tool_data_table_conf.xml.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/tool_data_table_conf.xml.sample	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,6 @@
+
+    
+
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/tool_data_table_conf.xml.test
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/tool_data_table_conf.xml.test	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,6 @@
+
+    
+