Mercurial > repos > rhohensinner > galaxy_irods_interface
annotate irods_upload.py @ 2:0641ea2f75b1 draft
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
author | rhohensinner |
---|---|
date | Fri, 02 Jul 2021 09:40:25 +0000 |
parents | |
children | d2be2eb8350f |
rev | line source |
---|---|
2
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
1 #!/usr/bin/env python |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
2 # Processes uploads from the user. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
3 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
4 # WARNING: Changes in this tool (particularly as related to parsing) may need |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
5 # to be reflected in galaxy.web.controllers.tool_runner and galaxy.tools |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
6 from __future__ import print_function |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
7 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
8 import errno |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
9 import os |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
10 import shutil |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
11 import sys |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
12 sys.path.append("/home/richard/galaxy/lib") |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
13 from json import dump, load, loads |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
14 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
15 from galaxy.datatypes import sniff |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
16 from galaxy.datatypes.registry import Registry |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
17 from galaxy.datatypes.upload_util import handle_upload, UploadProblemException |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
18 from galaxy.util import ( |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
19 bunch, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
20 safe_makedirs, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
21 unicodify |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
22 ) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
23 from galaxy.util.compression_utils import CompressedFile |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
24 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
25 assert sys.version_info[:2] >= (2, 7) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
26 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
27 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
28 _file_sources = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
29 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
30 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
31 def get_file_sources(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
32 global _file_sources |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
33 if _file_sources is None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
34 from galaxy.files import ConfiguredFileSources |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
35 file_sources = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
36 if os.path.exists("file_sources.json"): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
37 file_sources_as_dict = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
38 with open("file_sources.json") as f: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
39 file_sources_as_dict = load(f) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
40 if file_sources_as_dict is not None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
41 file_sources = ConfiguredFileSources.from_dict(file_sources_as_dict) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
42 if file_sources is None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
43 ConfiguredFileSources.from_dict([]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
44 _file_sources = file_sources |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
45 return _file_sources |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
46 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
47 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
48 def file_err(msg, dataset): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
49 # never remove a server-side upload |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
50 if dataset.type not in ('server_dir', 'path_paste'): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
51 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
52 os.remove(dataset.path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
53 except Exception: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
54 pass |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
55 return dict(type='dataset', |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
56 ext='data', |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
57 dataset_id=dataset.dataset_id, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
58 stderr=msg, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
59 failed=True) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
60 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
61 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
62 def safe_dict(d): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
63 """Recursively clone JSON structure with unicode dictionary keys.""" |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
64 if isinstance(d, dict): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
65 return {unicodify(k): safe_dict(v) for k, v in d.items()} |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
66 elif isinstance(d, list): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
67 return [safe_dict(x) for x in d] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
68 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
69 return d |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
70 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
71 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
72 def parse_outputs(args): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
73 rval = {} |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
74 for arg in args: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
75 id, files_path, path = arg.split(':', 2) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
76 rval[int(id)] = (path, files_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
77 return rval |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
78 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
79 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
80 def add_file(dataset, registry, output_path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
81 ext = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
82 compression_type = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
83 line_count = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
84 link_data_only_str = dataset.get('link_data_only', 'copy_files') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
85 if link_data_only_str not in ['link_to_files', 'copy_files']: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
86 raise UploadProblemException("Invalid setting '%s' for option link_data_only - upload request misconfigured" % link_data_only_str) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
87 link_data_only = link_data_only_str == 'link_to_files' |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
88 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
89 # run_as_real_user is estimated from galaxy config (external chmod indicated of inputs executed) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
90 # If this is True we always purge supplied upload inputs so they are cleaned up and we reuse their |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
91 # paths during data conversions since this user already owns that path. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
92 # Older in_place check for upload jobs created before 18.01, TODO remove in 19.XX. xref #5206 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
93 run_as_real_user = dataset.get('run_as_real_user', False) or dataset.get("in_place", False) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
94 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
95 # purge_source defaults to True unless this is an FTP import and |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
96 # ftp_upload_purge has been overridden to False in Galaxy's config. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
97 # We set purge_source to False if: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
98 # - the job does not have write access to the file, e.g. when running as the |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
99 # real user |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
100 # - the files are uploaded from external paths. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
101 purge_source = dataset.get('purge_source', True) and not run_as_real_user and dataset.type not in ('server_dir', 'path_paste') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
102 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
103 # in_place is True unless we are running as a real user or importing external paths (i.e. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
104 # this is a real upload and not a path paste or ftp import). |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
105 # in_place should always be False if running as real user because the uploaded file will |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
106 # be owned by Galaxy and not the user and it should be False for external paths so Galaxy doesn't |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
107 # modify files not controlled by Galaxy. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
108 in_place = not run_as_real_user and dataset.type not in ('server_dir', 'path_paste', 'ftp_import') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
109 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
110 # Base on the check_upload_content Galaxy config option and on by default, this enables some |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
111 # security related checks on the uploaded content, but can prevent uploads from working in some cases. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
112 check_content = dataset.get('check_content' , True) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
113 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
114 # auto_decompress is a request flag that can be swapped off to prevent Galaxy from automatically |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
115 # decompressing archive files before sniffing. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
116 auto_decompress = dataset.get('auto_decompress', True) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
117 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
118 dataset.file_type |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
119 except AttributeError: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
120 raise UploadProblemException('Unable to process uploaded file, missing file_type parameter.') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
121 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
122 if dataset.type == 'url': |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
123 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
124 dataset.path = sniff.stream_url_to_file(dataset.path, file_sources=get_file_sources()) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
125 except Exception as e: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
126 raise UploadProblemException('Unable to fetch %s\n%s' % (dataset.path, unicodify(e))) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
127 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
128 # See if we have an empty file |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
129 if not os.path.exists(dataset.path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
130 raise UploadProblemException('Uploaded temporary file (%s) does not exist.' % dataset.path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
131 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
132 stdout, ext, datatype, is_binary, converted_path = handle_upload( |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
133 registry=registry, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
134 path=dataset.path, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
135 requested_ext=dataset.file_type, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
136 name=dataset.name, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
137 tmp_prefix='data_id_%s_upload_' % dataset.dataset_id, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
138 tmp_dir=output_adjacent_tmpdir(output_path), |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
139 check_content=check_content, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
140 link_data_only=link_data_only, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
141 in_place=in_place, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
142 auto_decompress=auto_decompress, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
143 convert_to_posix_lines=dataset.to_posix_lines, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
144 convert_spaces_to_tabs=dataset.space_to_tab, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
145 ) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
146 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
147 # Strip compression extension from name |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
148 if compression_type and not getattr(datatype, 'compressed', False) and dataset.name.endswith('.' + compression_type): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
149 dataset.name = dataset.name[:-len('.' + compression_type)] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
150 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
151 # Move dataset |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
152 if link_data_only: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
153 # Never alter a file that will not be copied to Galaxy's local file store. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
154 if datatype.dataset_content_needs_grooming(dataset.path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
155 err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \ |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
156 '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.' |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
157 raise UploadProblemException(err_msg) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
158 if not link_data_only: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
159 # Move the dataset to its "real" path. converted_path is a tempfile so we move it even if purge_source is False. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
160 if purge_source or converted_path: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
161 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
162 # If user has indicated that the original file to be purged and have converted_path tempfile |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
163 if purge_source and converted_path: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
164 shutil.move(converted_path, output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
165 os.remove(dataset.path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
166 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
167 shutil.move(converted_path or dataset.path, output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
168 except OSError as e: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
169 # We may not have permission to remove the input |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
170 if e.errno != errno.EACCES: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
171 raise |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
172 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
173 shutil.copy(dataset.path, output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
174 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
175 # Write the job info |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
176 stdout = stdout or 'uploaded %s file' % ext |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
177 info = dict(type='dataset', |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
178 dataset_id=dataset.dataset_id, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
179 ext=ext, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
180 stdout=stdout, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
181 name=dataset.name, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
182 line_count=line_count) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
183 if dataset.get('uuid', None) is not None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
184 info['uuid'] = dataset.get('uuid') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
185 # FIXME: does this belong here? also not output-adjacent-tmpdir aware =/ |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
186 if not link_data_only and datatype and datatype.dataset_content_needs_grooming(output_path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
187 # Groom the dataset content if necessary |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
188 datatype.groom_dataset_content(output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
189 return info |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
190 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
191 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
192 def add_composite_file(dataset, registry, output_path, files_path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
193 datatype = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
194 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
195 # Find data type |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
196 if dataset.file_type is not None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
197 datatype = registry.get_datatype_by_extension(dataset.file_type) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
198 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
199 def to_path(path_or_url): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
200 is_url = path_or_url.find('://') != -1 # todo fixme |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
201 if is_url: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
202 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
203 temp_name = sniff.stream_url_to_file(path_or_url, file_sources=get_file_sources()) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
204 except Exception as e: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
205 raise UploadProblemException('Unable to fetch %s\n%s' % (path_or_url, unicodify(e))) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
206 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
207 return temp_name, is_url |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
208 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
209 return path_or_url, is_url |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
210 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
211 def make_files_path(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
212 safe_makedirs(files_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
213 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
214 def stage_file(name, composite_file_path, is_binary=False): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
215 dp = composite_file_path['path'] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
216 path, is_url = to_path(dp) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
217 if is_url: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
218 dataset.path = path |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
219 dp = path |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
220 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
221 auto_decompress = composite_file_path.get('auto_decompress', True) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
222 if auto_decompress and not datatype.composite_type and CompressedFile.can_decompress(dp): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
223 # It isn't an explicitly composite datatype, so these are just extra files to attach |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
224 # as composite data. It'd be better if Galaxy was communicating this to the tool |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
225 # a little more explicitly so we didn't need to dispatch on the datatype and so we |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
226 # could attach arbitrary extra composite data to an existing composite datatype if |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
227 # if need be? Perhaps that would be a mistake though. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
228 CompressedFile(dp).extract(files_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
229 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
230 tmpdir = output_adjacent_tmpdir(output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
231 tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
232 sniff.handle_composite_file( |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
233 datatype, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
234 dp, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
235 files_path, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
236 name, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
237 is_binary, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
238 tmpdir, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
239 tmp_prefix, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
240 composite_file_path, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
241 ) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
242 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
243 # Do we have pre-defined composite files from the datatype definition. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
244 if dataset.composite_files: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
245 make_files_path() |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
246 for name, value in dataset.composite_files.items(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
247 value = bunch.Bunch(**value) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
248 if value.name not in dataset.composite_file_paths: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
249 raise UploadProblemException("Failed to find file_path %s in %s" % (value.name, dataset.composite_file_paths)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
250 if dataset.composite_file_paths[value.name] is None and not value.optional: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
251 raise UploadProblemException('A required composite data file was not provided (%s)' % name) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
252 elif dataset.composite_file_paths[value.name] is not None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
253 composite_file_path = dataset.composite_file_paths[value.name] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
254 stage_file(name, composite_file_path, value.is_binary) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
255 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
256 # Do we have ad-hoc user supplied composite files. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
257 elif dataset.composite_file_paths: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
258 make_files_path() |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
259 for key, composite_file in dataset.composite_file_paths.items(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
260 stage_file(key, composite_file) # TODO: replace these defaults |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
261 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
262 # Move the dataset to its "real" path |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
263 primary_file_path, _ = to_path(dataset.primary_file) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
264 shutil.move(primary_file_path, output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
265 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
266 # Write the job info |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
267 return dict(type='dataset', |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
268 dataset_id=dataset.dataset_id, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
269 stdout='uploaded %s file' % dataset.file_type) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
270 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
271 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
272 def __read_paramfile(path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
273 with open(path) as fh: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
274 obj = load(fh) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
275 # If there's a single dataset in an old-style paramfile it'll still parse, but it'll be a dict |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
276 assert type(obj) == list |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
277 return obj |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
278 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
279 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
280 def __read_old_paramfile(path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
281 datasets = [] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
282 with open(path) as fh: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
283 for line in fh: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
284 datasets.append(loads(line)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
285 return datasets |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
286 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
287 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
288 def __write_job_metadata(metadata): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
289 # TODO: make upload/set_metadata compatible with https://github.com/galaxyproject/galaxy/pull/4437 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
290 with open('galaxy.json', 'w') as fh: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
291 for meta in metadata: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
292 dump(meta, fh) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
293 fh.write('\n') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
294 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
295 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
296 def output_adjacent_tmpdir(output_path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
297 """ For temp files that will ultimately be moved to output_path anyway |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
298 just create the file directly in output_path's directory so shutil.move |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
299 will work optimally. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
300 """ |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
301 return os.path.dirname(output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
302 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
303 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
304 def __main__(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
305 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
306 if len(sys.argv) < 4: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
307 print('usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...', file=sys.stderr) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
308 sys.exit(1) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
309 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
310 output_paths = parse_outputs(sys.argv[4:]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
311 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
312 registry = Registry() |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
313 registry.load_datatypes(root_dir=sys.argv[1], config=sys.argv[2]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
314 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
315 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
316 datasets = __read_paramfile(sys.argv[3]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
317 except (ValueError, AssertionError): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
318 datasets = __read_old_paramfile(sys.argv[3]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
319 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
320 metadata = [] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
321 for dataset in datasets: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
322 dataset = bunch.Bunch(**safe_dict(dataset)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
323 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
324 output_path = output_paths[int(dataset.dataset_id)][0] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
325 except Exception: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
326 print('Output path for dataset %s not found on command line' % dataset.dataset_id, file=sys.stderr) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
327 sys.exit(1) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
328 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
329 if dataset.type == 'composite': |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
330 files_path = output_paths[int(dataset.dataset_id)][1] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
331 metadata.append(add_composite_file(dataset, registry, output_path, files_path)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
332 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
333 metadata.append(add_file(dataset, registry, output_path)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
334 except UploadProblemException as e: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
335 metadata.append(file_err(unicodify(e), dataset)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
336 __write_job_metadata(metadata) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
337 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
338 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
339 if __name__ == '__main__': |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
340 __main__() |