# HG changeset patch
# User estrain
# Date 1705544550 0
# Node ID 56271dcbc91c5e5f768f9fb0665e72e54ca46e48
Uploaded
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/data_manager/data_manager_mlst.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/data_manager/data_manager_mlst.py Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,108 @@
+import os
+import subprocess
+import shutil
+import json
+import argparse
+import datetime
+import requests
+
+def download_pubmlst_databases():
+ """Download databases from pubmlst."""
+ try:
+ subprocess.run(["cp", "-R", "/mnt/data/mlst/db","pubmlst"], check=True)
+ except subprocess.CalledProcessError as e:
+ print(f"Error downloading databases: {e}")
+ sys.exit(1)
+
+def make_blast_database(output_directory):
+ """Create a BLAST database from downloaded data."""
+ #dir_path = os.path.dirname(os.path.realpath(__file__))
+ dir_path = os.getcwd()
+ mlst_dir = os.path.join(dir_path, "pubmlst")
+ output_directory = os.path.abspath(output_directory)
+ output_mlst_dir = os.path.join(output_directory, "pubmlst")
+
+ if os.path.exists(output_mlst_dir):
+ shutil.rmtree(output_mlst_dir)
+ shutil.move(mlst_dir, output_mlst_dir)
+
+ blast_dir = os.path.join(output_directory, "blast")
+ os.makedirs(blast_dir, exist_ok=True)
+
+ blast_file = os.path.join(blast_dir, "mlst.fa")
+ for scheme_dir in [d for d in os.listdir(output_mlst_dir) if os.path.isdir(os.path.join(output_mlst_dir, d))]:
+ scheme = os.path.basename(scheme_dir)
+ with open(blast_file, 'a') as outfile:
+ for file_name in os.listdir(os.path.join(output_mlst_dir, scheme_dir)):
+ if file_name.endswith('.tfa'):
+ with open(os.path.join(output_mlst_dir, scheme_dir, file_name), 'r') as infile:
+ for line in infile:
+ if 'not a locus' not in line:
+ if line.startswith('>'):
+ outfile.write(f">{scheme}.{line[1:]}")
+ else:
+ outfile.write(line)
+
+ try:
+ subprocess.run(["makeblastdb", "-hash_index", "-in", blast_file, "-dbtype", "nucl", "-title", "PubMLST", "-parse_seqids"], check=True)
+ except subprocess.CalledProcessError as e:
+ print(f"Error creating BLAST database: {e}")
+ sys.exit(1)
+
+def write_json(version, args_path, args_name, args_out):
+ """Write data table entry to JSON file."""
+ data_table_entry = {
+ 'data_tables': {
+ 'mlst': [
+ {
+ "value": version,
+ "name": args_name,
+ "path": args_path,
+ }
+ ]
+ }
+ }
+
+ with open(args_out, 'w') as fh:
+ json.dump(data_table_entry, fh, indent=2, sort_keys=True)
+
+def main():
+ parser = argparse.ArgumentParser(description='Download and process pubmlst databases')
+ parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
+ args = parser.parse_args()
+
+ with open(args.out[0]) as fh:
+ params = json.load(fh)
+
+ output_directory = params['output_data'][0]['extra_files_path']
+ if not os.path.exists(output_directory):
+ os.makedirs(output_directory)
+
+ download_pubmlst_databases()
+ make_blast_database(output_directory)
+
+
+ url = 'https://raw.githubusercontent.com/tseemann/mlst/master/db/scheme_species_map.tab'
+
+ # Send a GET request to the URL
+ response = requests.get(url)
+
+ # Check if the request was successful
+ if response.status_code == 200:
+ with open('scheme_species_map.tab', 'w') as file:
+ file.write(response.text)
+ print("File downloaded successfully")
+ else:
+ print("Failed to retrieve the file")
+
+ stab = "scheme_species_map.tab"
+ shutil.copy(stab,output_directory)
+
+ datetime_str = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+ tablename = f"mlst_database_{datetime_str}"
+
+ write_json(tablename, output_directory, tablename, args.out[0])
+
+if __name__ == "__main__":
+ main()
+
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/data_manager/data_manager_mlst.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/data_manager/data_manager_mlst.xml Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,30 @@
+
+
+ blast
+ mlst
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @UNPUBLISHED{Seemann2016,
+ author = "Seemann T",
+ title = "MLST: Scan contig files against PubMLST typing schemes",
+ year = "2016",
+ url = {https://github.com/tseemann/mlst}
+ }
+
+
+
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/data_manager_conf.xml Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/test-data/mlst.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/test-data/mlst.loc Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,6 @@
+# this is a tab separated file describing the location of mlst databases
+#
+# the columns are:
+# value name path
+#
+# for example
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/tool-data/mlst.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/tool-data/mlst.loc Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,6 @@
+# this is a tab separated file describing the location of mlst databases
+#
+# the columns are:
+# value name path
+#
+# for example
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/tool_data_table_conf.xml.sample Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,6 @@
+
+
+
diff -r 000000000000 -r 56271dcbc91c data_manager_mlst/tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/tool_data_table_conf.xml.test Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,6 @@
+
+
+