Mercurial > repos > bgruening > data_manager_diamond_database_builder
annotate data_manager/data_manager_diamond_database_builder.py @ 1:5a0d0bee4f8d draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
author | bgruening |
---|---|
date | Tue, 03 Dec 2019 17:39:48 -0500 |
parents | ce62d0912b10 |
children | 5558f74bd296 |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
2 import json |
0 | 3 import sys |
4 import os | |
5 import tempfile | |
6 import shutil | |
7 import optparse | |
8 import urllib2 | |
9 import subprocess | |
10 from ftplib import FTP | |
11 import tarfile | |
12 import zipfile | |
13 import gzip | |
14 import bz2 | |
15 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
16 CHUNK_SIZE = 2**20 # 1mb |
0 | 17 |
18 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
19 def cleanup_before_exit(tmp_dir): |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
20 if tmp_dir and os.path.exists(tmp_dir): |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
21 shutil.rmtree(tmp_dir) |
0 | 22 |
23 | |
24 def stop_err(msg): | |
25 sys.stderr.write(msg) | |
26 sys.exit(1) | |
27 | |
28 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
29 def _get_files_in_ftp_path(ftp, path): |
0 | 30 path_contents = [] |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
31 ftp.retrlines('MLSD %s' % (path), path_contents.append) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
32 return [line.split(';')[-1].lstrip() for line in path_contents] |
0 | 33 |
34 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
35 def _get_stream_readers_for_tar(file_obj, tmp_dir): |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
36 fasta_tar = tarfile.open(fileobj=file_obj, mode='r:*') |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
37 return [fasta_tar.extractfile(member) for member in fasta_tar.getmembers()] |
0 | 38 |
39 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
40 def _get_stream_readers_for_zip(file_obj, tmp_dir): |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
41 fasta_zip = zipfile.ZipFile(file_obj, 'r') |
0 | 42 rval = [] |
43 for member in fasta_zip.namelist(): | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
44 fasta_zip.extract(member, tmp_dir) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
45 rval.append(open(os.path.join(tmp_dir, member), 'rb')) |
0 | 46 return rval |
47 | |
48 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
49 def _get_stream_readers_for_gzip(file_obj, tmp_dir): |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
50 return [gzip.GzipFile(fileobj=file_obj, mode='rb')] |
0 | 51 |
52 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
53 def _get_stream_readers_for_bz2(file_obj, tmp_dir): |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
54 return [bz2.BZ2File(file_obj.name, 'rb')] |
0 | 55 |
56 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
57 def download_from_ncbi(data_manager_dict, params, target_directory, database_id, database_name): |
0 | 58 NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov' |
59 NCBI_DOWNLOAD_PATH = '/blast/db/FASTA/' | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
60 COMPRESSED_EXTENSIONS = [('.tar.gz', _get_stream_readers_for_tar), ('.tar.bz2', _get_stream_readers_for_tar), ('.zip', _get_stream_readers_for_zip), ('.gz', _get_stream_readers_for_gzip), ('.bz2', _get_stream_readers_for_bz2)] |
0 | 61 |
62 ncbi_identifier = params['param_dict']['reference_source']['requested_identifier'] | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
63 ftp = FTP(NCBI_FTP_SERVER) |
0 | 64 ftp.login() |
65 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
66 path_contents = _get_files_in_ftp_path(ftp, NCBI_DOWNLOAD_PATH) |
0 | 67 |
68 ncbi_file_name = None | |
69 get_stream_reader = None | |
70 ext = None | |
71 for ext, get_stream_reader in COMPRESSED_EXTENSIONS: | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
72 if "%s%s" % (ncbi_identifier, ext) in path_contents: |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
73 ncbi_file_name = "%s%s%s" % (NCBI_DOWNLOAD_PATH, ncbi_identifier, ext) |
0 | 74 break |
75 | |
76 if not ncbi_file_name: | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
77 raise Exception('Unable to determine filename for NCBI database for %s: %s' % (ncbi_identifier, path_contents)) |
0 | 78 |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
79 tmp_dir = tempfile.mkdtemp(prefix='tmp-data-manager-ncbi-') |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
80 ncbi_fasta_filename = os.path.join(tmp_dir, "%s%s" % (ncbi_identifier, ext)) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
81 |
0 | 82 fasta_base_filename = "%s.fa" % database_id |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
83 fasta_filename = os.path.join(target_directory, fasta_base_filename) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
84 fasta_writer = open(fasta_filename, 'wb+') |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
85 |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
86 tmp_extract_dir = os.path.join(tmp_dir, 'extracted_fasta') |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
87 os.mkdir(tmp_extract_dir) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
88 |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
89 tmp_fasta = open(ncbi_fasta_filename, 'wb+') |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
90 |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
91 ftp.retrbinary('RETR %s' % ncbi_file_name, tmp_fasta.write) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
92 |
0 | 93 tmp_fasta.flush() |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
94 tmp_fasta.seek(0) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
95 |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
96 fasta_readers = get_stream_reader(tmp_fasta, tmp_extract_dir) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
97 |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
98 data_table_entry = _stream_fasta_to_file(fasta_readers, target_directory, database_id, database_name, params) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
99 _add_data_table_entry(data_manager_dict, data_table_entry) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
100 |
0 | 101 for fasta_reader in fasta_readers: |
102 fasta_reader.close() | |
103 tmp_fasta.close() | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
104 cleanup_before_exit(tmp_dir) |
0 | 105 |
106 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
107 def download_from_url(data_manager_dict, params, target_directory, database_id, database_name): |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
108 # TODO: we should automatically do decompression here |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
109 urls = filter(bool, map(lambda x: x.strip(), params['param_dict']['reference_source']['user_url'].split('\n'))) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
110 fasta_reader = [urllib2.urlopen(url) for url in urls] |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
111 |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
112 data_table_entry = _stream_fasta_to_file(fasta_reader, target_directory, database_id, database_name, params) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
113 _add_data_table_entry(data_manager_dict, data_table_entry) |
0 | 114 |
115 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
116 def download_from_history(data_manager_dict, params, target_directory, database_id, database_name): |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
117 # TODO: allow multiple FASTA input files |
0 | 118 input_filename = params['param_dict']['reference_source']['input_fasta'] |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
119 if isinstance(input_filename, list): |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
120 fasta_reader = [open(filename, 'rb') for filename in input_filename] |
0 | 121 else: |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
122 fasta_reader = open(input_filename) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
123 |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
124 data_table_entry = _stream_fasta_to_file(fasta_reader, target_directory, database_id, database_name, params) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
125 _add_data_table_entry(data_manager_dict, data_table_entry) |
0 | 126 |
127 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
128 def copy_from_directory(data_manager_dict, params, target_directory, database_id, database_name): |
0 | 129 input_filename = params['param_dict']['reference_source']['fasta_filename'] |
130 create_symlink = params['param_dict']['reference_source']['create_symlink'] == 'create_symlink' | |
131 if create_symlink: | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
132 data_table_entry = _create_symlink(input_filename, target_directory, database_id, database_name) |
0 | 133 else: |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
134 if isinstance(input_filename, list): |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
135 fasta_reader = [open(filename, 'rb') for filename in input_filename] |
0 | 136 else: |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
137 fasta_reader = open(input_filename) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
138 data_table_entry = _stream_fasta_to_file(fasta_reader, target_directory, database_id, database_name, params) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
139 _add_data_table_entry(data_manager_dict, data_table_entry) |
0 | 140 |
141 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
142 def _add_data_table_entry(data_manager_dict, data_table_entry): |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
143 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
144 data_manager_dict['data_tables']['diamond_database'] = data_manager_dict['data_tables'].get('diamond_database', []) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
145 data_manager_dict['data_tables']['diamond_database'].append(data_table_entry) |
0 | 146 return data_manager_dict |
147 | |
148 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
149 def _stream_fasta_to_file(fasta_stream, target_directory, database_id, database_name, params, close_stream=True): |
0 | 150 fasta_base_filename = "%s.fa" % database_id |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
151 fasta_filename = os.path.join(target_directory, fasta_base_filename) |
0 | 152 |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
153 temp_fasta = tempfile.NamedTemporaryFile(delete=False, suffix=".fasta") |
0 | 154 temp_fasta.close() |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
155 fasta_writer = open(temp_fasta.name, 'wb+') |
0 | 156 |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
157 if isinstance(fasta_stream, list) and len(fasta_stream) == 1: |
0 | 158 fasta_stream = fasta_stream[0] |
159 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
160 if isinstance(fasta_stream, list): |
0 | 161 last_char = None |
162 for fh in fasta_stream: | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
163 if last_char not in [None, '\n', '\r']: |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
164 fasta_writer.write('\n') |
0 | 165 while True: |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
166 data = fh.read(CHUNK_SIZE) |
0 | 167 if data: |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
168 fasta_writer.write(data) |
0 | 169 last_char = data[-1] |
170 else: | |
171 break | |
172 if close_stream: | |
173 fh.close() | |
174 else: | |
175 while True: | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
176 data = fasta_stream.read(CHUNK_SIZE) |
0 | 177 if data: |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
178 fasta_writer.write(data) |
0 | 179 else: |
180 break | |
181 if close_stream: | |
182 fasta_stream.close() | |
183 | |
184 fasta_writer.close() | |
185 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
186 args = ['diamond', 'makedb', '--in', temp_fasta.name, '--db', fasta_filename] |
0 | 187 |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
188 tmp_stderr = tempfile.NamedTemporaryFile(prefix="tmp-data-manager-diamond-database-builder-stderr") |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
189 proc = subprocess.Popen(args=args, shell=False, cwd=target_directory, stderr=tmp_stderr.fileno()) |
0 | 190 return_code = proc.wait() |
191 if return_code: | |
192 tmp_stderr.flush() | |
193 tmp_stderr.seek(0) | |
194 print >> sys.stderr, "Error building diamond database:" | |
195 while True: | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
196 chunk = tmp_stderr.read(CHUNK_SIZE) |
0 | 197 if not chunk: |
198 break | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
199 sys.stderr.write(chunk) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
200 sys.exit(return_code) |
0 | 201 tmp_stderr.close() |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
202 os.remove(temp_fasta.name) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
203 return dict(value=database_id, name=database_name, db_path="%s.dmnd" % fasta_base_filename) |
0 | 204 |
205 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
206 def _create_symlink(input_filename, target_directory, database_id, database_name): |
0 | 207 fasta_base_filename = "%s.fa" % database_id |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
208 fasta_filename = os.path.join(target_directory, fasta_base_filename) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
209 os.symlink(input_filename, fasta_filename) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
210 return dict(value=database_id, name=database_name, db_path=fasta_base_filename) |
0 | 211 |
212 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
213 REFERENCE_SOURCE_TO_DOWNLOAD = dict(ncbi=download_from_ncbi, url=download_from_url, history=download_from_history, directory=copy_from_directory) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
214 |
0 | 215 |
216 def main(): | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
217 # Parse Command Line |
0 | 218 parser = optparse.OptionParser() |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
219 parser.add_option('-d', '--dbkey_description', dest='dbkey_description', action='store', type="string", default=None, help='dbkey_description') |
0 | 220 (options, args) = parser.parse_args() |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
221 |
0 | 222 filename = args[0] |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
223 |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
224 params = json.loads(open(filename).read()) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
225 target_directory = params['output_data'][0]['extra_files_path'] |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
226 os.mkdir(target_directory) |
0 | 227 data_manager_dict = {} |
228 | |
229 database_id = params['param_dict']['database_id'] | |
230 database_name = params['param_dict']['database_name'] | |
231 | |
1
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
232 # Fetch the FASTA |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
233 REFERENCE_SOURCE_TO_DOWNLOAD[params['param_dict']['reference_source']['reference_source_selector']](data_manager_dict, params, target_directory, database_id, database_name) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
234 |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
235 # save info to json file |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
236 open(filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) |
5a0d0bee4f8d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents:
0
diff
changeset
|
237 |
0 | 238 |
239 if __name__ == "__main__": | |
240 main() |