annotate irods_upload.py @ 4:84f685c067ad draft default tip

"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
author rhohensinner
date Wed, 04 Aug 2021 12:01:55 +0000
parents d2be2eb8350f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
1 #!/usr/bin/env python
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
2 # Processes uploads from the user.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
3
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
4 # WARNING: Changes in this tool (particularly as related to parsing) may need
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
5 # to be reflected in galaxy.web.controllers.tool_runner and galaxy.tools
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
6 from __future__ import print_function
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
7
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
8 import errno
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
9 import os
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
10 import shutil
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
11 import sys
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
12 from json import dump, load, loads
4
84f685c067ad "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents: 3
diff changeset
13 with open("python__path.txt", "r") as pp:
84f685c067ad "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents: 3
diff changeset
14 ppstr = pp.read()
84f685c067ad "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents: 3
diff changeset
15 temp = ppstr.split(",")[:-1]
84f685c067ad "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents: 3
diff changeset
16 for it in temp:
84f685c067ad "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents: 3
diff changeset
17 sys.path.append(it)
84f685c067ad "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents: 3
diff changeset
18
2
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
19 from galaxy.datatypes import sniff
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
20 from galaxy.datatypes.registry import Registry
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
21 from galaxy.datatypes.upload_util import handle_upload, UploadProblemException
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
22 from galaxy.util import (
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
23 bunch,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
24 safe_makedirs,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
25 unicodify
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
26 )
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
27 from galaxy.util.compression_utils import CompressedFile
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
28
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
29 assert sys.version_info[:2] >= (2, 7)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
30
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
31
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
32 _file_sources = None
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
33
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
34
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
35 def get_file_sources():
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
36 global _file_sources
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
37 if _file_sources is None:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
38 from galaxy.files import ConfiguredFileSources
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
39 file_sources = None
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
40 if os.path.exists("file_sources.json"):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
41 file_sources_as_dict = None
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
42 with open("file_sources.json") as f:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
43 file_sources_as_dict = load(f)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
44 if file_sources_as_dict is not None:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
45 file_sources = ConfiguredFileSources.from_dict(file_sources_as_dict)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
46 if file_sources is None:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
47 ConfiguredFileSources.from_dict([])
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
48 _file_sources = file_sources
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
49 return _file_sources
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
50
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
51
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
52 def file_err(msg, dataset):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
53 # never remove a server-side upload
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
54 if dataset.type not in ('server_dir', 'path_paste'):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
55 try:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
56 os.remove(dataset.path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
57 except Exception:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
58 pass
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
59 return dict(type='dataset',
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
60 ext='data',
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
61 dataset_id=dataset.dataset_id,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
62 stderr=msg,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
63 failed=True)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
64
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
65
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
66 def safe_dict(d):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
67 """Recursively clone JSON structure with unicode dictionary keys."""
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
68 if isinstance(d, dict):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
69 return {unicodify(k): safe_dict(v) for k, v in d.items()}
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
70 elif isinstance(d, list):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
71 return [safe_dict(x) for x in d]
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
72 else:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
73 return d
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
74
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
75
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
76 def parse_outputs(args):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
77 rval = {}
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
78 for arg in args:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
79 id, files_path, path = arg.split(':', 2)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
80 rval[int(id)] = (path, files_path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
81 return rval
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
82
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
83
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
84 def add_file(dataset, registry, output_path):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
85 ext = None
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
86 compression_type = None
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
87 line_count = None
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
88 link_data_only_str = dataset.get('link_data_only', 'copy_files')
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
89 if link_data_only_str not in ['link_to_files', 'copy_files']:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
90 raise UploadProblemException("Invalid setting '%s' for option link_data_only - upload request misconfigured" % link_data_only_str)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
91 link_data_only = link_data_only_str == 'link_to_files'
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
92
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
93 # run_as_real_user is estimated from galaxy config (external chmod indicated of inputs executed)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
94 # If this is True we always purge supplied upload inputs so they are cleaned up and we reuse their
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
95 # paths during data conversions since this user already owns that path.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
96 # Older in_place check for upload jobs created before 18.01, TODO remove in 19.XX. xref #5206
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
97 run_as_real_user = dataset.get('run_as_real_user', False) or dataset.get("in_place", False)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
98
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
99 # purge_source defaults to True unless this is an FTP import and
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
100 # ftp_upload_purge has been overridden to False in Galaxy's config.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
101 # We set purge_source to False if:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
102 # - the job does not have write access to the file, e.g. when running as the
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
103 # real user
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
104 # - the files are uploaded from external paths.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
105 purge_source = dataset.get('purge_source', True) and not run_as_real_user and dataset.type not in ('server_dir', 'path_paste')
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
106
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
107 # in_place is True unless we are running as a real user or importing external paths (i.e.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
108 # this is a real upload and not a path paste or ftp import).
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
109 # in_place should always be False if running as real user because the uploaded file will
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
110 # be owned by Galaxy and not the user and it should be False for external paths so Galaxy doesn't
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
111 # modify files not controlled by Galaxy.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
112 in_place = not run_as_real_user and dataset.type not in ('server_dir', 'path_paste', 'ftp_import')
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
113
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
114 # Base on the check_upload_content Galaxy config option and on by default, this enables some
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
115 # security related checks on the uploaded content, but can prevent uploads from working in some cases.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
116 check_content = dataset.get('check_content' , True)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
117
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
118 # auto_decompress is a request flag that can be swapped off to prevent Galaxy from automatically
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
119 # decompressing archive files before sniffing.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
120 auto_decompress = dataset.get('auto_decompress', True)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
121 try:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
122 dataset.file_type
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
123 except AttributeError:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
124 raise UploadProblemException('Unable to process uploaded file, missing file_type parameter.')
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
125
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
126 if dataset.type == 'url':
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
127 try:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
128 dataset.path = sniff.stream_url_to_file(dataset.path, file_sources=get_file_sources())
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
129 except Exception as e:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
130 raise UploadProblemException('Unable to fetch %s\n%s' % (dataset.path, unicodify(e)))
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
131
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
132 # See if we have an empty file
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
133 if not os.path.exists(dataset.path):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
134 raise UploadProblemException('Uploaded temporary file (%s) does not exist.' % dataset.path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
135
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
136 stdout, ext, datatype, is_binary, converted_path = handle_upload(
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
137 registry=registry,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
138 path=dataset.path,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
139 requested_ext=dataset.file_type,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
140 name=dataset.name,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
141 tmp_prefix='data_id_%s_upload_' % dataset.dataset_id,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
142 tmp_dir=output_adjacent_tmpdir(output_path),
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
143 check_content=check_content,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
144 link_data_only=link_data_only,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
145 in_place=in_place,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
146 auto_decompress=auto_decompress,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
147 convert_to_posix_lines=dataset.to_posix_lines,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
148 convert_spaces_to_tabs=dataset.space_to_tab,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
149 )
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
150
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
151 # Strip compression extension from name
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
152 if compression_type and not getattr(datatype, 'compressed', False) and dataset.name.endswith('.' + compression_type):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
153 dataset.name = dataset.name[:-len('.' + compression_type)]
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
154
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
155 # Move dataset
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
156 if link_data_only:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
157 # Never alter a file that will not be copied to Galaxy's local file store.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
158 if datatype.dataset_content_needs_grooming(dataset.path):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
159 err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
160 '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
161 raise UploadProblemException(err_msg)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
162 if not link_data_only:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
163 # Move the dataset to its "real" path. converted_path is a tempfile so we move it even if purge_source is False.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
164 if purge_source or converted_path:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
165 try:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
166 # If user has indicated that the original file to be purged and have converted_path tempfile
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
167 if purge_source and converted_path:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
168 shutil.move(converted_path, output_path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
169 os.remove(dataset.path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
170 else:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
171 shutil.move(converted_path or dataset.path, output_path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
172 except OSError as e:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
173 # We may not have permission to remove the input
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
174 if e.errno != errno.EACCES:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
175 raise
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
176 else:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
177 shutil.copy(dataset.path, output_path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
178
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
179 # Write the job info
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
180 stdout = stdout or 'uploaded %s file' % ext
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
181 info = dict(type='dataset',
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
182 dataset_id=dataset.dataset_id,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
183 ext=ext,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
184 stdout=stdout,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
185 name=dataset.name,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
186 line_count=line_count)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
187 if dataset.get('uuid', None) is not None:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
188 info['uuid'] = dataset.get('uuid')
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
189 # FIXME: does this belong here? also not output-adjacent-tmpdir aware =/
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
190 if not link_data_only and datatype and datatype.dataset_content_needs_grooming(output_path):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
191 # Groom the dataset content if necessary
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
192 datatype.groom_dataset_content(output_path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
193 return info
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
194
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
195
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
196 def add_composite_file(dataset, registry, output_path, files_path):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
197 datatype = None
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
198
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
199 # Find data type
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
200 if dataset.file_type is not None:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
201 datatype = registry.get_datatype_by_extension(dataset.file_type)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
202
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
203 def to_path(path_or_url):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
204 is_url = path_or_url.find('://') != -1 # todo fixme
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
205 if is_url:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
206 try:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
207 temp_name = sniff.stream_url_to_file(path_or_url, file_sources=get_file_sources())
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
208 except Exception as e:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
209 raise UploadProblemException('Unable to fetch %s\n%s' % (path_or_url, unicodify(e)))
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
210
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
211 return temp_name, is_url
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
212
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
213 return path_or_url, is_url
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
214
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
215 def make_files_path():
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
216 safe_makedirs(files_path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
217
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
218 def stage_file(name, composite_file_path, is_binary=False):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
219 dp = composite_file_path['path']
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
220 path, is_url = to_path(dp)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
221 if is_url:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
222 dataset.path = path
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
223 dp = path
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
224
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
225 auto_decompress = composite_file_path.get('auto_decompress', True)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
226 if auto_decompress and not datatype.composite_type and CompressedFile.can_decompress(dp):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
227 # It isn't an explicitly composite datatype, so these are just extra files to attach
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
228 # as composite data. It'd be better if Galaxy was communicating this to the tool
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
229 # a little more explicitly so we didn't need to dispatch on the datatype and so we
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
230 # could attach arbitrary extra composite data to an existing composite datatype if
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
231 # if need be? Perhaps that would be a mistake though.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
232 CompressedFile(dp).extract(files_path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
233 else:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
234 tmpdir = output_adjacent_tmpdir(output_path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
235 tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
236 sniff.handle_composite_file(
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
237 datatype,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
238 dp,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
239 files_path,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
240 name,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
241 is_binary,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
242 tmpdir,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
243 tmp_prefix,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
244 composite_file_path,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
245 )
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
246
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
247 # Do we have pre-defined composite files from the datatype definition.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
248 if dataset.composite_files:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
249 make_files_path()
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
250 for name, value in dataset.composite_files.items():
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
251 value = bunch.Bunch(**value)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
252 if value.name not in dataset.composite_file_paths:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
253 raise UploadProblemException("Failed to find file_path %s in %s" % (value.name, dataset.composite_file_paths))
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
254 if dataset.composite_file_paths[value.name] is None and not value.optional:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
255 raise UploadProblemException('A required composite data file was not provided (%s)' % name)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
256 elif dataset.composite_file_paths[value.name] is not None:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
257 composite_file_path = dataset.composite_file_paths[value.name]
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
258 stage_file(name, composite_file_path, value.is_binary)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
259
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
260 # Do we have ad-hoc user supplied composite files.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
261 elif dataset.composite_file_paths:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
262 make_files_path()
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
263 for key, composite_file in dataset.composite_file_paths.items():
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
264 stage_file(key, composite_file) # TODO: replace these defaults
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
265
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
266 # Move the dataset to its "real" path
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
267 primary_file_path, _ = to_path(dataset.primary_file)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
268 shutil.move(primary_file_path, output_path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
269
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
270 # Write the job info
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
271 return dict(type='dataset',
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
272 dataset_id=dataset.dataset_id,
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
273 stdout='uploaded %s file' % dataset.file_type)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
274
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
275
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
276 def __read_paramfile(path):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
277 with open(path) as fh:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
278 obj = load(fh)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
279 # If there's a single dataset in an old-style paramfile it'll still parse, but it'll be a dict
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
280 assert type(obj) == list
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
281 return obj
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
282
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
283
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
284 def __read_old_paramfile(path):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
285 datasets = []
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
286 with open(path) as fh:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
287 for line in fh:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
288 datasets.append(loads(line))
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
289 return datasets
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
290
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
291
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
292 def __write_job_metadata(metadata):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
293 # TODO: make upload/set_metadata compatible with https://github.com/galaxyproject/galaxy/pull/4437
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
294 with open('galaxy.json', 'w') as fh:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
295 for meta in metadata:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
296 dump(meta, fh)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
297 fh.write('\n')
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
298
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
299
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
300 def output_adjacent_tmpdir(output_path):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
301 """ For temp files that will ultimately be moved to output_path anyway
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
302 just create the file directly in output_path's directory so shutil.move
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
303 will work optimally.
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
304 """
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
305 return os.path.dirname(output_path)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
306
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
307
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
308 def __main__():
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
309
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
310 if len(sys.argv) < 4:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
311 print('usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...', file=sys.stderr)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
312 sys.exit(1)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
313
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
314 output_paths = parse_outputs(sys.argv[4:])
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
315
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
316 registry = Registry()
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
317 registry.load_datatypes(root_dir=sys.argv[1], config=sys.argv[2])
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
318
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
319 try:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
320 datasets = __read_paramfile(sys.argv[3])
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
321 except (ValueError, AssertionError):
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
322 datasets = __read_old_paramfile(sys.argv[3])
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
323
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
324 metadata = []
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
325 for dataset in datasets:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
326 dataset = bunch.Bunch(**safe_dict(dataset))
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
327 try:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
328 output_path = output_paths[int(dataset.dataset_id)][0]
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
329 except Exception:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
330 print('Output path for dataset %s not found on command line' % dataset.dataset_id, file=sys.stderr)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
331 sys.exit(1)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
332 try:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
333 if dataset.type == 'composite':
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
334 files_path = output_paths[int(dataset.dataset_id)][1]
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
335 metadata.append(add_composite_file(dataset, registry, output_path, files_path))
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
336 else:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
337 metadata.append(add_file(dataset, registry, output_path))
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
338 except UploadProblemException as e:
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
339 metadata.append(file_err(unicodify(e), dataset))
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
340 __write_job_metadata(metadata)
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
341
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
342
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
343 if __name__ == '__main__':
0641ea2f75b1 "planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff changeset
344 __main__()