# HG changeset patch
# User iuc
# Date 1494860923 14400
# Node ID f8608fddfb230bbba57a53d79ef26bf38cc0b784
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_qiime_database_downloader commit 4934eb34300b5fa54d62d8b67e5b6e989e963ac9
diff -r 000000000000 -r f8608fddfb23 data_manager/data_manager_qiime_download.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_qiime_download.py Mon May 15 11:08:43 2017 -0400
@@ -0,0 +1,378 @@
+#!/usr/bin/env python
+# Data manager for reference data for the QIIME Galaxy tools
+
+import argparse
+import ftplib
+import json
+import os
+import tarfile
+import zipfile
+
+import requests
+
+
+protocol = {
+ "unite": "http",
+ "greengenes": "ftp",
+ "silva": "http",
+ "img": "ftp"
+}
+baseUrl = {
+ "unite": "http://unite.ut.ee/sh_files/sh_qiime_release_",
+ "greengenes": "greengenes.microbio.me",
+ "silva": "http://www.arb-silva.de/fileadmin/silva_databases/qiime/Silva_",
+ "img": "ftp.microbio.me"
+}
+ftp_dir = {
+ "greengenes": "/greengenes_release/gg_",
+ "img": ""
+}
+ftp_file_prefix = {
+ "greengenes": "gg_",
+ "img": ""
+}
+ftp_file_suffix = {
+ "greengenes": "_otus",
+ "img": ""
+}
+extension = {
+ "unite": "zip",
+ "greengenes": "tar.gz",
+ "silva": {
+ "104_release": "tgz",
+ "108_release": "tgz",
+ "108_release_curated": "tgz",
+ "111_release": "tgz",
+ "119_consensus_majority_taxonomy": "zip",
+ "119_release": "zip",
+ "119_release_aligned_rep_files": "tar.gz",
+ "123_release": "zip",
+ "128_release": "tgz"},
+ "img": "tgz"
+}
+filetypes = ["rep_set", "rep_set_aligned", "taxonomy", "trees"]
+
+
+# Utility functions for interacting with Galaxy JSON
+def read_input_json(jsonfile):
+ """Read the JSON supplied from the data manager tool
+
+ Returns a tuple (param_dict,extra_files_path)
+
+ 'param_dict' is an arbitrary dictionary of parameters
+ input into the tool; 'extra_files_path' is the path
+ to a directory where output files must be put for the
+ receiving data manager to pick them up.
+
+ NB the directory pointed to by 'extra_files_path'
+ doesn't exist initially, it is the job of the script
+ to create it if necessary.
+
+ """
+ params = json.loads(open(jsonfile).read())
+ return (params['param_dict'],
+ params['output_data'][0]['extra_files_path'])
+
+
+# Utility functions for creating data table dictionaries
+#
+# Example usage:
+# >>> d = create_data_tables_dict()
+# >>> add_data_table(d,'my_data')
+# >>> add_data_table_entry(dict(dbkey='hg19',value='human'))
+# >>> add_data_table_entry(dict(dbkey='mm9',value='mouse'))
+# >>> print str(json.dumps(d))
+def create_data_tables_dict():
+ """Return a dictionary for storing data table information
+
+ Returns a dictionary that can be used with 'add_data_table'
+ and 'add_data_table_entry' to store information about a
+ data table. It can be converted to JSON to be sent back to
+ the data manager.
+
+ """
+ d = {}
+ d['data_tables'] = {}
+ return d
+
+
+def add_data_table(d, table):
+ """Add a data table to the data tables dictionary
+
+ Creates a placeholder for a data table called 'table'.
+
+ """
+ d['data_tables'][table] = []
+
+
+def add_data_table_entry(d, table, entry):
+ """Add an entry to a data table
+
+ Appends an entry to the data table 'table'. 'entry'
+ should be a dictionary where the keys are the names of
+ columns in the data table.
+
+ Raises an exception if the named data table doesn't
+ exist.
+
+ """
+ try:
+ d['data_tables'][table].append(entry)
+ except KeyError:
+ raise Exception("add_data_table_entry: no table '%s'" % table)
+
+
+def get_ftp_file(ftp, filename):
+ """
+ """
+ try:
+ ftp.retrbinary("RETR " + filename, open(filename, 'wb').write)
+ except:
+ print("Error")
+
+
+def download_archive(db, version, ext):
+ """
+
+ """
+ filepath = "%s_%s.%s" % (db, version, ext)
+ if protocol[db] == "http":
+ url = "%s%s.%s" % (baseUrl[db], version, ext)
+ r = requests.get(url, stream=True)
+ r.raise_for_status()
+ with open(filepath, "wb") as fd:
+ for chunk in r.iter_content(chunk_size=128):
+ fd.write(chunk)
+ elif protocol[db] == "ftp":
+ ftp = ftplib.FTP(baseUrl[db])
+ ftp.login("anonymous", "ftplib-example-1")
+ if db == "greengenes" and version == "13_8":
+ ftp.cwd("%s%s" % (ftp_dir[db], "13_5"))
+ else:
+ ftp.cwd("%s%s" % (ftp_dir[db], version))
+ filepath = "%s%s%s.%s" % (
+ ftp_file_prefix[db],
+ version,
+ ftp_file_suffix[db],
+ ext)
+ get_ftp_file(ftp, filepath)
+ ftp.quit()
+ return filepath
+
+
+def find_archive_content_path(archive_content_path):
+ """
+ """
+ content = os.listdir(archive_content_path)
+ archive_content = []
+ for x in content:
+ if not x.startswith(".") and not x.startswith("_"):
+ archive_content.append(x)
+ if len(archive_content) == 1:
+ archive_content_path = os.path.join(
+ archive_content_path,
+ archive_content[0])
+ return archive_content_path
+
+
+def extract_archive(filepath, ext, db):
+ """
+ """
+ archive_content_path = "tmp"
+ if ext == "tar.gz" or ext == "tgz":
+ tar = tarfile.open(filepath)
+ tar.extractall(path=archive_content_path)
+ tar.close()
+ archive_content_path = find_archive_content_path(archive_content_path)
+ elif ext == "zip":
+ zip_ref = zipfile.ZipFile(filepath, 'r')
+ zip_ref.extractall(archive_content_path)
+ zip_ref.close()
+ archive_content_path = find_archive_content_path(archive_content_path)
+ return archive_content_path
+
+
+def move_unite_files(archive_content_path, filename_prefix, name_prefix, data_tables, target_dir):
+ """
+
+ """
+ archive_content = os.listdir(archive_content_path)
+ for content in archive_content:
+ content_filepath = os.path.join(archive_content_path, content)
+ content_name_prefix = "%s - %s" % (name_prefix, content.split(".")[0])
+ content_filename_prefix = "%s_%s" % (filename_prefix, content)
+ if content.find("refs") != -1:
+ move_file(
+ content_filepath,
+ content_filename_prefix,
+ content_name_prefix,
+ data_tables,
+ os.path.join(target_dir, "rep_set"),
+ "rep_set")
+ elif content.find("taxonomy") != -1:
+ move_file(
+ content_filepath,
+ content_filename_prefix,
+ content_name_prefix,
+ data_tables,
+ os.path.join(target_dir, "taxonomy"),
+ "taxonomy")
+
+
+def move_file(input_filepath, filename, name, data_tables, target_dir, filetype):
+ """
+ """
+ output_filepath = os.path.join(target_dir, filename)
+ os.rename(input_filepath, output_filepath)
+ add_data_table_entry(
+ data_tables,
+ "qiime_%s" % (filetype),
+ dict(
+ dbkey=filename,
+ value="1.0",
+ name=name,
+ path=output_filepath))
+
+
+def move_dir_content(input_path, filename_prefix, name_prefix, data_tables, target_dir, filetype):
+ """
+ """
+ for content in os.listdir(input_path):
+ if content.startswith("."):
+ continue
+ content_path = os.path.join(input_path, content)
+ content_name_prefix = "%s - %s" % (name_prefix, content.split(".")[0])
+ content_filename_prefix = "%s_%s" % (filename_prefix, content)
+ if os.path.isdir(content_path):
+ move_dir_content(
+ content_path,
+ content_filename_prefix,
+ content_name_prefix,
+ data_tables,
+ target_dir,
+ filetype)
+ else:
+ move_file(
+ content_path,
+ content_filename_prefix,
+ content_name_prefix,
+ data_tables,
+ target_dir,
+ filetype)
+
+
+def move_files(archive_content_path, filename_prefix, name_prefix, data_tables, target_dir, db, version):
+ """
+ """
+ for filetype in filetypes:
+ if filetype == "rep_set_aligned":
+ if db == "greengenes" and version == "12_10":
+ continue
+ filetype_target_dir = os.path.join(
+ target_dir,
+ filetype)
+ filetype_path = os.path.join(
+ archive_content_path,
+ filetype)
+ move_dir_content(
+ filetype_path,
+ filename_prefix,
+ name_prefix,
+ data_tables,
+ filetype_target_dir,
+ filetype)
+
+
+def download_db(data_tables, db, version, target_dir):
+ """Download QIIME database
+
+ Creates references to the specified file(s) on the Galaxy
+ server in the appropriate data table (determined from the
+ file extension).
+
+ The 'data_tables' dictionary should have been created using
+ the 'create_data_tables_dict' and 'add_data_table' functions.
+
+ Arguments:
+ data_tables: a dictionary containing the data table info
+ db: name of the database
+ version: version of the database
+ table_name: name of the table
+ target_dir: directory to put copy or link to the data file
+
+ """
+ ext = extension[db]
+ if db == "silva":
+ ext = ext[version]
+
+ print("Download archive")
+ filepath = download_archive(db, version, ext)
+
+ print("Extract archive %s" % filepath)
+ archive_content_path = extract_archive(filepath, ext, db)
+
+ print("Moving file from %s" % archive_content_path)
+ filename_prefix = "%s_%s" % (db, version)
+ name_prefix = "%s (%s)" % (db, version)
+ if db == "greengenes" or db == "silva":
+ move_files(
+ archive_content_path,
+ filename_prefix,
+ name_prefix,
+ data_tables,
+ target_dir,
+ db,
+ version)
+ elif db == "unite":
+ move_unite_files(
+ archive_content_path,
+ filename_prefix,
+ name_prefix,
+ data_tables,
+ target_dir)
+
+
+if __name__ == "__main__":
+ print("Starting...")
+
+ # Read command line
+ parser = argparse.ArgumentParser(
+ description='Download QIIME reference database')
+ parser.add_argument('--database', help="Database name")
+ parser.add_argument('--version', help="Database version")
+ parser.add_argument('--jsonfile', help="Output JSON file")
+ args = parser.parse_args()
+
+ jsonfile = args.jsonfile
+
+ # Read the input JSON
+ params, target_dir = read_input_json(jsonfile)
+
+ # Make the target directory
+ print("Making %s" % target_dir)
+ os.mkdir(target_dir)
+ os.mkdir(os.path.join(target_dir, "rep_set"))
+ os.mkdir(os.path.join(target_dir, "rep_set_aligned"))
+ os.mkdir(os.path.join(target_dir, "taxonomy"))
+ os.mkdir(os.path.join(target_dir, "trees"))
+
+ # Set up data tables dictionary
+ data_tables = create_data_tables_dict()
+ add_data_table(data_tables, "qiime_rep_set")
+ add_data_table(data_tables, "qiime_rep_set_aligned")
+ add_data_table(data_tables, "qiime_taxonomy")
+ add_data_table(data_tables, "qiime_trees")
+
+ # Fetch data from specified data sources
+ download_db(
+ data_tables,
+ args.database,
+ args.version,
+ target_dir)
+
+ # Write output JSON
+ print("Outputting JSON")
+ print(str(json.dumps(data_tables)))
+ with open(jsonfile, 'w') as out:
+ json.dump(data_tables, out)
+ print("Done.")
diff -r 000000000000 -r f8608fddfb23 data_manager/data_manager_qiime_download.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_qiime_download.xml Mon May 15 11:08:43 2017 -0400
@@ -0,0 +1,95 @@
+
+ reference databases
+
+ requests
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1038/nmeth.f.303
+
+
+
\ No newline at end of file
diff -r 000000000000 -r f8608fddfb23 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Mon May 15 11:08:43 2017 -0400
@@ -0,0 +1,65 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r f8608fddfb23 tool-data/qiime_rep_set.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/qiime_rep_set.loc.sample Mon May 15 11:08:43 2017 -0400
@@ -0,0 +1,6 @@
+#
+#
+#For each reference database, you need to download the fasta file in qiime path
+#
+#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html
+#
\ No newline at end of file
diff -r 000000000000 -r f8608fddfb23 tool-data/qiime_rep_set_aligned.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/qiime_rep_set_aligned.loc.sample Mon May 15 11:08:43 2017 -0400
@@ -0,0 +1,6 @@
+#
+#
+#For each reference database, you need to download the fasta file in qiime path
+#
+#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html
+#
\ No newline at end of file
diff -r 000000000000 -r f8608fddfb23 tool-data/qiime_taxonomy.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/qiime_taxonomy.loc.sample Mon May 15 11:08:43 2017 -0400
@@ -0,0 +1,6 @@
+#
+#
+#For each reference database, you need to download the fasta file in qiime path
+#
+#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html
+#
\ No newline at end of file
diff -r 000000000000 -r f8608fddfb23 tool-data/qiime_trees.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/qiime_trees.loc.sample Mon May 15 11:08:43 2017 -0400
@@ -0,0 +1,6 @@
+#
+#
+#For each reference database, you need to download the fasta file in qiime path
+#
+#List of useful db for qiime: http://qiime.org/home_static/dataFiles.html
+#
\ No newline at end of file
diff -r 000000000000 -r f8608fddfb23 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon May 15 11:08:43 2017 -0400
@@ -0,0 +1,18 @@
+
+
+ value, name, dbkey, path
+
+
+
+ value, name, dbkey, path
+
+
+
+ value, name, dbkey, path
+
+
+
+ value, name, dbkey, path
+
+
+
\ No newline at end of file