changeset 0:56271dcbc91c draft

Uploaded
author estrain
date Thu, 18 Jan 2024 02:22:30 +0000
parents
children a2da81d3378b
files data_manager_mlst/data_manager/data_manager_mlst.py data_manager_mlst/data_manager/data_manager_mlst.xml data_manager_mlst/data_manager_conf.xml data_manager_mlst/test-data/mlst.loc data_manager_mlst/tool-data/mlst.loc data_manager_mlst/tool_data_table_conf.xml.sample data_manager_mlst/tool_data_table_conf.xml.test
diffstat 7 files changed, 181 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/data_manager/data_manager_mlst.py	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,108 @@
+import os
+import subprocess
+import shutil
+import json
+import argparse
+import datetime
+import requests
+
+def download_pubmlst_databases():
+    """Download databases from pubmlst."""
+    try:
+        subprocess.run(["cp", "-R", "/mnt/data/mlst/db","pubmlst"], check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Error downloading databases: {e}")
+        sys.exit(1)
+
+def make_blast_database(output_directory):
+    """Create a BLAST database from downloaded data."""
+    #dir_path = os.path.dirname(os.path.realpath(__file__))
+    dir_path = os.getcwd()
+    mlst_dir = os.path.join(dir_path, "pubmlst")
+    output_directory = os.path.abspath(output_directory)
+    output_mlst_dir = os.path.join(output_directory, "pubmlst")
+
+    if os.path.exists(output_mlst_dir):
+        shutil.rmtree(output_mlst_dir)
+    shutil.move(mlst_dir, output_mlst_dir)
+
+    blast_dir = os.path.join(output_directory, "blast")
+    os.makedirs(blast_dir, exist_ok=True)
+
+    blast_file = os.path.join(blast_dir, "mlst.fa")
+    for scheme_dir in [d for d in os.listdir(output_mlst_dir) if os.path.isdir(os.path.join(output_mlst_dir, d))]:
+        scheme = os.path.basename(scheme_dir)
+        with open(blast_file, 'a') as outfile:
+            for file_name in os.listdir(os.path.join(output_mlst_dir, scheme_dir)):
+                if file_name.endswith('.tfa'):
+                    with open(os.path.join(output_mlst_dir, scheme_dir, file_name), 'r') as infile:
+                        for line in infile:
+                            if 'not a locus' not in line:
+                                if line.startswith('>'):
+                                    outfile.write(f">{scheme}.{line[1:]}")
+                                else:
+                                    outfile.write(line)
+
+    try:
+        subprocess.run(["makeblastdb", "-hash_index", "-in", blast_file, "-dbtype", "nucl", "-title", "PubMLST", "-parse_seqids"], check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Error creating BLAST database: {e}")
+        sys.exit(1)
+
+def write_json(version, args_path, args_name, args_out):
+    """Write data table entry to JSON file."""
+    data_table_entry = {
+        'data_tables': {
+            'mlst': [
+                {
+                    "value": version,
+                    "name": args_name,
+                    "path": args_path,
+                }
+            ]
+        }
+    }
+
+    with open(args_out, 'w') as fh:
+        json.dump(data_table_entry, fh, indent=2, sort_keys=True)
+
+def main():
+    parser = argparse.ArgumentParser(description='Download and process pubmlst databases')
+    parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
+    args = parser.parse_args()
+
+    with open(args.out[0]) as fh:
+        params = json.load(fh)
+
+    output_directory = params['output_data'][0]['extra_files_path']
+    if not os.path.exists(output_directory):
+        os.makedirs(output_directory)
+
+    download_pubmlst_databases()
+    make_blast_database(output_directory)
+
+    
+    url = 'https://raw.githubusercontent.com/tseemann/mlst/master/db/scheme_species_map.tab'
+
+    # Send a GET request to the URL
+    response = requests.get(url)
+
+    # Check if the request was successful
+    if response.status_code == 200:
+      with open('scheme_species_map.tab', 'w') as file:
+        file.write(response.text)
+      print("File downloaded successfully")
+    else:
+      print("Failed to retrieve the file")
+
+    stab = "scheme_species_map.tab"
+    shutil.copy(stab,output_directory) 
+ 
+    datetime_str = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    tablename = f"mlst_database_{datetime_str}"
+
+    write_json(tablename, output_directory, tablename, args.out[0])
+
+if __name__ == "__main__":
+    main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/data_manager/data_manager_mlst.xml	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,30 @@
+<tool id="data_manager_mlst" name="mlst Data Manger" tool_type="manage_data" version="0.0.1" profile="20.01">
+    <requirements>
+        <requirement type="package">blast</requirement>
+        <requirement type="package">mlst</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python $__tool_directory__/data_manager_mlst.py --out ${output_file};
+    ]]></command>
+    <inputs>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+    </help>
+    <citations>
+      <citation type="bibtex">
+       @UNPUBLISHED{Seemann2016,
+         author = "Seemann T",
+         title = "MLST: Scan contig files against PubMLST typing schemes",
+         year = "2016",
+         url = {https://github.com/tseemann/mlst}
+       }
+      </citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/data_manager_conf.xml	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager_mlst.xml" id="data_manager_mlst">
+      <data_table name="mlst">
+        <output>
+          <column name="value" />
+          <column name="name" />
+          <column name="path" output_ref="output_file" >
+            <move type="directory" relativize_symlinks="True">
+              <src>${path}</src>
+              <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">mlst/${value}</target>
+            </move>
+            <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/mlst/${value}</value_translation>
+            <value_translation type="function">abspath</value_translation>
+          </column>
+        </output>
+      </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/test-data/mlst.loc	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,6 @@
+# this is a tab separated file describing the location of mlst databases
+#
+# the columns are:
+# value name path
+#
+# for example
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/tool-data/mlst.loc	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,6 @@
+# this is a tab separated file describing the location of mlst databases
+#
+# the columns are:
+# value name path
+#
+# for example
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/tool_data_table_conf.xml.sample	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="mlst" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/mlst.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_mlst/tool_data_table_conf.xml.test	Thu Jan 18 02:22:30 2024 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="mlst" comment_char="#" allow_duplicate_entries="False">
+        <columns>value,name, path</columns>
+        <file path="${__HERE__}/test-data/mlst.loc" />
+    </table>
+</tables>