Mercurial > repos > rhohensinner > galaxy_irods_interface
annotate irods_upload.py @ 3:d2be2eb8350f draft
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
author | rhohensinner |
---|---|
date | Mon, 19 Jul 2021 13:11:45 +0000 |
parents | 0641ea2f75b1 |
children | 84f685c067ad |
rev | line source |
---|---|
2
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
1 #!/usr/bin/env python |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
2 # Processes uploads from the user. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
3 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
4 # WARNING: Changes in this tool (particularly as related to parsing) may need |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
5 # to be reflected in galaxy.web.controllers.tool_runner and galaxy.tools |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
6 from __future__ import print_function |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
7 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
8 import errno |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
9 import os |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
10 import shutil |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
11 import sys |
3
d2be2eb8350f
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
2
diff
changeset
|
12 import main |
2
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
13 from json import dump, load, loads |
3
d2be2eb8350f
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
2
diff
changeset
|
14 global python_path |
d2be2eb8350f
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
2
diff
changeset
|
15 sys.path = python_path |
2
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
16 from galaxy.datatypes import sniff |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
17 from galaxy.datatypes.registry import Registry |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
18 from galaxy.datatypes.upload_util import handle_upload, UploadProblemException |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
19 from galaxy.util import ( |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
20 bunch, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
21 safe_makedirs, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
22 unicodify |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
23 ) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
24 from galaxy.util.compression_utils import CompressedFile |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
25 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
26 assert sys.version_info[:2] >= (2, 7) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
27 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
28 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
29 _file_sources = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
30 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
31 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
32 def get_file_sources(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
33 global _file_sources |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
34 if _file_sources is None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
35 from galaxy.files import ConfiguredFileSources |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
36 file_sources = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
37 if os.path.exists("file_sources.json"): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
38 file_sources_as_dict = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
39 with open("file_sources.json") as f: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
40 file_sources_as_dict = load(f) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
41 if file_sources_as_dict is not None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
42 file_sources = ConfiguredFileSources.from_dict(file_sources_as_dict) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
43 if file_sources is None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
44 ConfiguredFileSources.from_dict([]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
45 _file_sources = file_sources |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
46 return _file_sources |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
47 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
48 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
49 def file_err(msg, dataset): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
50 # never remove a server-side upload |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
51 if dataset.type not in ('server_dir', 'path_paste'): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
52 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
53 os.remove(dataset.path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
54 except Exception: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
55 pass |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
56 return dict(type='dataset', |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
57 ext='data', |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
58 dataset_id=dataset.dataset_id, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
59 stderr=msg, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
60 failed=True) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
61 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
62 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
63 def safe_dict(d): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
64 """Recursively clone JSON structure with unicode dictionary keys.""" |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
65 if isinstance(d, dict): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
66 return {unicodify(k): safe_dict(v) for k, v in d.items()} |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
67 elif isinstance(d, list): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
68 return [safe_dict(x) for x in d] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
69 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
70 return d |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
71 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
72 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
73 def parse_outputs(args): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
74 rval = {} |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
75 for arg in args: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
76 id, files_path, path = arg.split(':', 2) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
77 rval[int(id)] = (path, files_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
78 return rval |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
79 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
80 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
81 def add_file(dataset, registry, output_path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
82 ext = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
83 compression_type = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
84 line_count = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
85 link_data_only_str = dataset.get('link_data_only', 'copy_files') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
86 if link_data_only_str not in ['link_to_files', 'copy_files']: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
87 raise UploadProblemException("Invalid setting '%s' for option link_data_only - upload request misconfigured" % link_data_only_str) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
88 link_data_only = link_data_only_str == 'link_to_files' |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
89 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
90 # run_as_real_user is estimated from galaxy config (external chmod indicated of inputs executed) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
91 # If this is True we always purge supplied upload inputs so they are cleaned up and we reuse their |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
92 # paths during data conversions since this user already owns that path. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
93 # Older in_place check for upload jobs created before 18.01, TODO remove in 19.XX. xref #5206 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
94 run_as_real_user = dataset.get('run_as_real_user', False) or dataset.get("in_place", False) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
95 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
96 # purge_source defaults to True unless this is an FTP import and |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
97 # ftp_upload_purge has been overridden to False in Galaxy's config. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
98 # We set purge_source to False if: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
99 # - the job does not have write access to the file, e.g. when running as the |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
100 # real user |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
101 # - the files are uploaded from external paths. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
102 purge_source = dataset.get('purge_source', True) and not run_as_real_user and dataset.type not in ('server_dir', 'path_paste') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
103 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
104 # in_place is True unless we are running as a real user or importing external paths (i.e. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
105 # this is a real upload and not a path paste or ftp import). |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
106 # in_place should always be False if running as real user because the uploaded file will |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
107 # be owned by Galaxy and not the user and it should be False for external paths so Galaxy doesn't |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
108 # modify files not controlled by Galaxy. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
109 in_place = not run_as_real_user and dataset.type not in ('server_dir', 'path_paste', 'ftp_import') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
110 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
111 # Base on the check_upload_content Galaxy config option and on by default, this enables some |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
112 # security related checks on the uploaded content, but can prevent uploads from working in some cases. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
113 check_content = dataset.get('check_content' , True) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
114 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
115 # auto_decompress is a request flag that can be swapped off to prevent Galaxy from automatically |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
116 # decompressing archive files before sniffing. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
117 auto_decompress = dataset.get('auto_decompress', True) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
118 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
119 dataset.file_type |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
120 except AttributeError: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
121 raise UploadProblemException('Unable to process uploaded file, missing file_type parameter.') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
122 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
123 if dataset.type == 'url': |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
124 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
125 dataset.path = sniff.stream_url_to_file(dataset.path, file_sources=get_file_sources()) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
126 except Exception as e: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
127 raise UploadProblemException('Unable to fetch %s\n%s' % (dataset.path, unicodify(e))) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
128 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
129 # See if we have an empty file |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
130 if not os.path.exists(dataset.path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
131 raise UploadProblemException('Uploaded temporary file (%s) does not exist.' % dataset.path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
132 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
133 stdout, ext, datatype, is_binary, converted_path = handle_upload( |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
134 registry=registry, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
135 path=dataset.path, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
136 requested_ext=dataset.file_type, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
137 name=dataset.name, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
138 tmp_prefix='data_id_%s_upload_' % dataset.dataset_id, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
139 tmp_dir=output_adjacent_tmpdir(output_path), |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
140 check_content=check_content, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
141 link_data_only=link_data_only, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
142 in_place=in_place, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
143 auto_decompress=auto_decompress, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
144 convert_to_posix_lines=dataset.to_posix_lines, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
145 convert_spaces_to_tabs=dataset.space_to_tab, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
146 ) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
147 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
148 # Strip compression extension from name |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
149 if compression_type and not getattr(datatype, 'compressed', False) and dataset.name.endswith('.' + compression_type): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
150 dataset.name = dataset.name[:-len('.' + compression_type)] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
151 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
152 # Move dataset |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
153 if link_data_only: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
154 # Never alter a file that will not be copied to Galaxy's local file store. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
155 if datatype.dataset_content_needs_grooming(dataset.path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
156 err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \ |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
157 '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.' |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
158 raise UploadProblemException(err_msg) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
159 if not link_data_only: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
160 # Move the dataset to its "real" path. converted_path is a tempfile so we move it even if purge_source is False. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
161 if purge_source or converted_path: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
162 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
163 # If user has indicated that the original file to be purged and have converted_path tempfile |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
164 if purge_source and converted_path: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
165 shutil.move(converted_path, output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
166 os.remove(dataset.path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
167 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
168 shutil.move(converted_path or dataset.path, output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
169 except OSError as e: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
170 # We may not have permission to remove the input |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
171 if e.errno != errno.EACCES: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
172 raise |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
173 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
174 shutil.copy(dataset.path, output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
175 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
176 # Write the job info |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
177 stdout = stdout or 'uploaded %s file' % ext |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
178 info = dict(type='dataset', |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
179 dataset_id=dataset.dataset_id, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
180 ext=ext, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
181 stdout=stdout, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
182 name=dataset.name, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
183 line_count=line_count) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
184 if dataset.get('uuid', None) is not None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
185 info['uuid'] = dataset.get('uuid') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
186 # FIXME: does this belong here? also not output-adjacent-tmpdir aware =/ |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
187 if not link_data_only and datatype and datatype.dataset_content_needs_grooming(output_path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
188 # Groom the dataset content if necessary |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
189 datatype.groom_dataset_content(output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
190 return info |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
191 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
192 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
193 def add_composite_file(dataset, registry, output_path, files_path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
194 datatype = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
195 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
196 # Find data type |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
197 if dataset.file_type is not None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
198 datatype = registry.get_datatype_by_extension(dataset.file_type) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
199 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
200 def to_path(path_or_url): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
201 is_url = path_or_url.find('://') != -1 # todo fixme |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
202 if is_url: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
203 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
204 temp_name = sniff.stream_url_to_file(path_or_url, file_sources=get_file_sources()) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
205 except Exception as e: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
206 raise UploadProblemException('Unable to fetch %s\n%s' % (path_or_url, unicodify(e))) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
207 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
208 return temp_name, is_url |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
209 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
210 return path_or_url, is_url |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
211 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
212 def make_files_path(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
213 safe_makedirs(files_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
214 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
215 def stage_file(name, composite_file_path, is_binary=False): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
216 dp = composite_file_path['path'] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
217 path, is_url = to_path(dp) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
218 if is_url: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
219 dataset.path = path |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
220 dp = path |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
221 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
222 auto_decompress = composite_file_path.get('auto_decompress', True) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
223 if auto_decompress and not datatype.composite_type and CompressedFile.can_decompress(dp): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
224 # It isn't an explicitly composite datatype, so these are just extra files to attach |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
225 # as composite data. It'd be better if Galaxy was communicating this to the tool |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
226 # a little more explicitly so we didn't need to dispatch on the datatype and so we |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
227 # could attach arbitrary extra composite data to an existing composite datatype if |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
228 # if need be? Perhaps that would be a mistake though. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
229 CompressedFile(dp).extract(files_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
230 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
231 tmpdir = output_adjacent_tmpdir(output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
232 tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
233 sniff.handle_composite_file( |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
234 datatype, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
235 dp, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
236 files_path, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
237 name, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
238 is_binary, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
239 tmpdir, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
240 tmp_prefix, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
241 composite_file_path, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
242 ) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
243 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
244 # Do we have pre-defined composite files from the datatype definition. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
245 if dataset.composite_files: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
246 make_files_path() |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
247 for name, value in dataset.composite_files.items(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
248 value = bunch.Bunch(**value) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
249 if value.name not in dataset.composite_file_paths: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
250 raise UploadProblemException("Failed to find file_path %s in %s" % (value.name, dataset.composite_file_paths)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
251 if dataset.composite_file_paths[value.name] is None and not value.optional: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
252 raise UploadProblemException('A required composite data file was not provided (%s)' % name) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
253 elif dataset.composite_file_paths[value.name] is not None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
254 composite_file_path = dataset.composite_file_paths[value.name] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
255 stage_file(name, composite_file_path, value.is_binary) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
256 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
257 # Do we have ad-hoc user supplied composite files. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
258 elif dataset.composite_file_paths: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
259 make_files_path() |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
260 for key, composite_file in dataset.composite_file_paths.items(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
261 stage_file(key, composite_file) # TODO: replace these defaults |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
262 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
263 # Move the dataset to its "real" path |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
264 primary_file_path, _ = to_path(dataset.primary_file) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
265 shutil.move(primary_file_path, output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
266 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
267 # Write the job info |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
268 return dict(type='dataset', |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
269 dataset_id=dataset.dataset_id, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
270 stdout='uploaded %s file' % dataset.file_type) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
271 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
272 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
273 def __read_paramfile(path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
274 with open(path) as fh: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
275 obj = load(fh) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
276 # If there's a single dataset in an old-style paramfile it'll still parse, but it'll be a dict |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
277 assert type(obj) == list |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
278 return obj |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
279 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
280 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
281 def __read_old_paramfile(path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
282 datasets = [] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
283 with open(path) as fh: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
284 for line in fh: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
285 datasets.append(loads(line)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
286 return datasets |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
287 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
288 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
289 def __write_job_metadata(metadata): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
290 # TODO: make upload/set_metadata compatible with https://github.com/galaxyproject/galaxy/pull/4437 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
291 with open('galaxy.json', 'w') as fh: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
292 for meta in metadata: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
293 dump(meta, fh) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
294 fh.write('\n') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
295 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
296 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
297 def output_adjacent_tmpdir(output_path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
298 """ For temp files that will ultimately be moved to output_path anyway |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
299 just create the file directly in output_path's directory so shutil.move |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
300 will work optimally. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
301 """ |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
302 return os.path.dirname(output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
303 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
304 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
305 def __main__(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
306 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
307 if len(sys.argv) < 4: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
308 print('usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...', file=sys.stderr) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
309 sys.exit(1) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
310 |
3
d2be2eb8350f
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
2
diff
changeset
|
311 sys.path.append(argv[5]) |
2
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
312 output_paths = parse_outputs(sys.argv[4:]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
313 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
314 registry = Registry() |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
315 registry.load_datatypes(root_dir=sys.argv[1], config=sys.argv[2]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
316 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
317 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
318 datasets = __read_paramfile(sys.argv[3]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
319 except (ValueError, AssertionError): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
320 datasets = __read_old_paramfile(sys.argv[3]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
321 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
322 metadata = [] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
323 for dataset in datasets: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
324 dataset = bunch.Bunch(**safe_dict(dataset)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
325 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
326 output_path = output_paths[int(dataset.dataset_id)][0] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
327 except Exception: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
328 print('Output path for dataset %s not found on command line' % dataset.dataset_id, file=sys.stderr) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
329 sys.exit(1) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
330 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
331 if dataset.type == 'composite': |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
332 files_path = output_paths[int(dataset.dataset_id)][1] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
333 metadata.append(add_composite_file(dataset, registry, output_path, files_path)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
334 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
335 metadata.append(add_file(dataset, registry, output_path)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
336 except UploadProblemException as e: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
337 metadata.append(file_err(unicodify(e), dataset)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
338 __write_job_metadata(metadata) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
339 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
340 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
341 if __name__ == '__main__': |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
342 __main__() |