Mercurial > repos > rhohensinner > galaxy_irods_interface
annotate irods_upload.py @ 4:84f685c067ad draft default tip
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
author | rhohensinner |
---|---|
date | Wed, 04 Aug 2021 12:01:55 +0000 |
parents | d2be2eb8350f |
children |
rev | line source |
---|---|
2
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
1 #!/usr/bin/env python |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
2 # Processes uploads from the user. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
3 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
4 # WARNING: Changes in this tool (particularly as related to parsing) may need |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
5 # to be reflected in galaxy.web.controllers.tool_runner and galaxy.tools |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
6 from __future__ import print_function |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
7 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
8 import errno |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
9 import os |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
10 import shutil |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
11 import sys |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
12 from json import dump, load, loads |
4
84f685c067ad
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
3
diff
changeset
|
13 with open("python__path.txt", "r") as pp: |
84f685c067ad
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
3
diff
changeset
|
14 ppstr = pp.read() |
84f685c067ad
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
3
diff
changeset
|
15 temp = ppstr.split(",")[:-1] |
84f685c067ad
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
3
diff
changeset
|
16 for it in temp: |
84f685c067ad
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
3
diff
changeset
|
17 sys.path.append(it) |
84f685c067ad
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
3
diff
changeset
|
18 |
2
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
19 from galaxy.datatypes import sniff |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
20 from galaxy.datatypes.registry import Registry |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
21 from galaxy.datatypes.upload_util import handle_upload, UploadProblemException |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
22 from galaxy.util import ( |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
23 bunch, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
24 safe_makedirs, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
25 unicodify |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
26 ) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
27 from galaxy.util.compression_utils import CompressedFile |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
28 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
29 assert sys.version_info[:2] >= (2, 7) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
30 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
31 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
32 _file_sources = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
33 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
34 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
35 def get_file_sources(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
36 global _file_sources |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
37 if _file_sources is None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
38 from galaxy.files import ConfiguredFileSources |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
39 file_sources = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
40 if os.path.exists("file_sources.json"): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
41 file_sources_as_dict = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
42 with open("file_sources.json") as f: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
43 file_sources_as_dict = load(f) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
44 if file_sources_as_dict is not None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
45 file_sources = ConfiguredFileSources.from_dict(file_sources_as_dict) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
46 if file_sources is None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
47 ConfiguredFileSources.from_dict([]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
48 _file_sources = file_sources |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
49 return _file_sources |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
50 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
51 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
52 def file_err(msg, dataset): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
53 # never remove a server-side upload |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
54 if dataset.type not in ('server_dir', 'path_paste'): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
55 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
56 os.remove(dataset.path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
57 except Exception: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
58 pass |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
59 return dict(type='dataset', |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
60 ext='data', |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
61 dataset_id=dataset.dataset_id, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
62 stderr=msg, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
63 failed=True) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
64 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
65 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
66 def safe_dict(d): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
67 """Recursively clone JSON structure with unicode dictionary keys.""" |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
68 if isinstance(d, dict): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
69 return {unicodify(k): safe_dict(v) for k, v in d.items()} |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
70 elif isinstance(d, list): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
71 return [safe_dict(x) for x in d] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
72 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
73 return d |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
74 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
75 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
76 def parse_outputs(args): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
77 rval = {} |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
78 for arg in args: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
79 id, files_path, path = arg.split(':', 2) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
80 rval[int(id)] = (path, files_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
81 return rval |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
82 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
83 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
84 def add_file(dataset, registry, output_path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
85 ext = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
86 compression_type = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
87 line_count = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
88 link_data_only_str = dataset.get('link_data_only', 'copy_files') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
89 if link_data_only_str not in ['link_to_files', 'copy_files']: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
90 raise UploadProblemException("Invalid setting '%s' for option link_data_only - upload request misconfigured" % link_data_only_str) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
91 link_data_only = link_data_only_str == 'link_to_files' |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
92 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
93 # run_as_real_user is estimated from galaxy config (external chmod indicated of inputs executed) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
94 # If this is True we always purge supplied upload inputs so they are cleaned up and we reuse their |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
95 # paths during data conversions since this user already owns that path. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
96 # Older in_place check for upload jobs created before 18.01, TODO remove in 19.XX. xref #5206 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
97 run_as_real_user = dataset.get('run_as_real_user', False) or dataset.get("in_place", False) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
98 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
99 # purge_source defaults to True unless this is an FTP import and |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
100 # ftp_upload_purge has been overridden to False in Galaxy's config. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
101 # We set purge_source to False if: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
102 # - the job does not have write access to the file, e.g. when running as the |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
103 # real user |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
104 # - the files are uploaded from external paths. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
105 purge_source = dataset.get('purge_source', True) and not run_as_real_user and dataset.type not in ('server_dir', 'path_paste') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
106 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
107 # in_place is True unless we are running as a real user or importing external paths (i.e. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
108 # this is a real upload and not a path paste or ftp import). |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
109 # in_place should always be False if running as real user because the uploaded file will |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
110 # be owned by Galaxy and not the user and it should be False for external paths so Galaxy doesn't |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
111 # modify files not controlled by Galaxy. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
112 in_place = not run_as_real_user and dataset.type not in ('server_dir', 'path_paste', 'ftp_import') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
113 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
114 # Base on the check_upload_content Galaxy config option and on by default, this enables some |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
115 # security related checks on the uploaded content, but can prevent uploads from working in some cases. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
116 check_content = dataset.get('check_content' , True) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
117 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
118 # auto_decompress is a request flag that can be swapped off to prevent Galaxy from automatically |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
119 # decompressing archive files before sniffing. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
120 auto_decompress = dataset.get('auto_decompress', True) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
121 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
122 dataset.file_type |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
123 except AttributeError: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
124 raise UploadProblemException('Unable to process uploaded file, missing file_type parameter.') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
125 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
126 if dataset.type == 'url': |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
127 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
128 dataset.path = sniff.stream_url_to_file(dataset.path, file_sources=get_file_sources()) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
129 except Exception as e: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
130 raise UploadProblemException('Unable to fetch %s\n%s' % (dataset.path, unicodify(e))) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
131 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
132 # See if we have an empty file |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
133 if not os.path.exists(dataset.path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
134 raise UploadProblemException('Uploaded temporary file (%s) does not exist.' % dataset.path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
135 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
136 stdout, ext, datatype, is_binary, converted_path = handle_upload( |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
137 registry=registry, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
138 path=dataset.path, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
139 requested_ext=dataset.file_type, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
140 name=dataset.name, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
141 tmp_prefix='data_id_%s_upload_' % dataset.dataset_id, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
142 tmp_dir=output_adjacent_tmpdir(output_path), |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
143 check_content=check_content, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
144 link_data_only=link_data_only, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
145 in_place=in_place, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
146 auto_decompress=auto_decompress, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
147 convert_to_posix_lines=dataset.to_posix_lines, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
148 convert_spaces_to_tabs=dataset.space_to_tab, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
149 ) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
150 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
151 # Strip compression extension from name |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
152 if compression_type and not getattr(datatype, 'compressed', False) and dataset.name.endswith('.' + compression_type): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
153 dataset.name = dataset.name[:-len('.' + compression_type)] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
154 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
155 # Move dataset |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
156 if link_data_only: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
157 # Never alter a file that will not be copied to Galaxy's local file store. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
158 if datatype.dataset_content_needs_grooming(dataset.path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
159 err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \ |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
160 '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.' |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
161 raise UploadProblemException(err_msg) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
162 if not link_data_only: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
163 # Move the dataset to its "real" path. converted_path is a tempfile so we move it even if purge_source is False. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
164 if purge_source or converted_path: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
165 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
166 # If user has indicated that the original file to be purged and have converted_path tempfile |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
167 if purge_source and converted_path: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
168 shutil.move(converted_path, output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
169 os.remove(dataset.path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
170 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
171 shutil.move(converted_path or dataset.path, output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
172 except OSError as e: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
173 # We may not have permission to remove the input |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
174 if e.errno != errno.EACCES: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
175 raise |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
176 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
177 shutil.copy(dataset.path, output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
178 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
179 # Write the job info |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
180 stdout = stdout or 'uploaded %s file' % ext |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
181 info = dict(type='dataset', |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
182 dataset_id=dataset.dataset_id, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
183 ext=ext, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
184 stdout=stdout, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
185 name=dataset.name, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
186 line_count=line_count) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
187 if dataset.get('uuid', None) is not None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
188 info['uuid'] = dataset.get('uuid') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
189 # FIXME: does this belong here? also not output-adjacent-tmpdir aware =/ |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
190 if not link_data_only and datatype and datatype.dataset_content_needs_grooming(output_path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
191 # Groom the dataset content if necessary |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
192 datatype.groom_dataset_content(output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
193 return info |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
194 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
195 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
196 def add_composite_file(dataset, registry, output_path, files_path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
197 datatype = None |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
198 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
199 # Find data type |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
200 if dataset.file_type is not None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
201 datatype = registry.get_datatype_by_extension(dataset.file_type) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
202 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
203 def to_path(path_or_url): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
204 is_url = path_or_url.find('://') != -1 # todo fixme |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
205 if is_url: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
206 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
207 temp_name = sniff.stream_url_to_file(path_or_url, file_sources=get_file_sources()) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
208 except Exception as e: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
209 raise UploadProblemException('Unable to fetch %s\n%s' % (path_or_url, unicodify(e))) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
210 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
211 return temp_name, is_url |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
212 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
213 return path_or_url, is_url |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
214 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
215 def make_files_path(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
216 safe_makedirs(files_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
217 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
218 def stage_file(name, composite_file_path, is_binary=False): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
219 dp = composite_file_path['path'] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
220 path, is_url = to_path(dp) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
221 if is_url: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
222 dataset.path = path |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
223 dp = path |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
224 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
225 auto_decompress = composite_file_path.get('auto_decompress', True) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
226 if auto_decompress and not datatype.composite_type and CompressedFile.can_decompress(dp): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
227 # It isn't an explicitly composite datatype, so these are just extra files to attach |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
228 # as composite data. It'd be better if Galaxy was communicating this to the tool |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
229 # a little more explicitly so we didn't need to dispatch on the datatype and so we |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
230 # could attach arbitrary extra composite data to an existing composite datatype if |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
231 # if need be? Perhaps that would be a mistake though. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
232 CompressedFile(dp).extract(files_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
233 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
234 tmpdir = output_adjacent_tmpdir(output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
235 tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
236 sniff.handle_composite_file( |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
237 datatype, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
238 dp, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
239 files_path, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
240 name, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
241 is_binary, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
242 tmpdir, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
243 tmp_prefix, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
244 composite_file_path, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
245 ) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
246 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
247 # Do we have pre-defined composite files from the datatype definition. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
248 if dataset.composite_files: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
249 make_files_path() |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
250 for name, value in dataset.composite_files.items(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
251 value = bunch.Bunch(**value) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
252 if value.name not in dataset.composite_file_paths: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
253 raise UploadProblemException("Failed to find file_path %s in %s" % (value.name, dataset.composite_file_paths)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
254 if dataset.composite_file_paths[value.name] is None and not value.optional: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
255 raise UploadProblemException('A required composite data file was not provided (%s)' % name) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
256 elif dataset.composite_file_paths[value.name] is not None: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
257 composite_file_path = dataset.composite_file_paths[value.name] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
258 stage_file(name, composite_file_path, value.is_binary) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
259 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
260 # Do we have ad-hoc user supplied composite files. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
261 elif dataset.composite_file_paths: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
262 make_files_path() |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
263 for key, composite_file in dataset.composite_file_paths.items(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
264 stage_file(key, composite_file) # TODO: replace these defaults |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
265 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
266 # Move the dataset to its "real" path |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
267 primary_file_path, _ = to_path(dataset.primary_file) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
268 shutil.move(primary_file_path, output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
269 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
270 # Write the job info |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
271 return dict(type='dataset', |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
272 dataset_id=dataset.dataset_id, |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
273 stdout='uploaded %s file' % dataset.file_type) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
274 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
275 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
276 def __read_paramfile(path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
277 with open(path) as fh: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
278 obj = load(fh) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
279 # If there's a single dataset in an old-style paramfile it'll still parse, but it'll be a dict |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
280 assert type(obj) == list |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
281 return obj |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
282 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
283 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
284 def __read_old_paramfile(path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
285 datasets = [] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
286 with open(path) as fh: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
287 for line in fh: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
288 datasets.append(loads(line)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
289 return datasets |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
290 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
291 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
292 def __write_job_metadata(metadata): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
293 # TODO: make upload/set_metadata compatible with https://github.com/galaxyproject/galaxy/pull/4437 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
294 with open('galaxy.json', 'w') as fh: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
295 for meta in metadata: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
296 dump(meta, fh) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
297 fh.write('\n') |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
298 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
299 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
300 def output_adjacent_tmpdir(output_path): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
301 """ For temp files that will ultimately be moved to output_path anyway |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
302 just create the file directly in output_path's directory so shutil.move |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
303 will work optimally. |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
304 """ |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
305 return os.path.dirname(output_path) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
306 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
307 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
308 def __main__(): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
309 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
310 if len(sys.argv) < 4: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
311 print('usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...', file=sys.stderr) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
312 sys.exit(1) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
313 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
314 output_paths = parse_outputs(sys.argv[4:]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
315 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
316 registry = Registry() |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
317 registry.load_datatypes(root_dir=sys.argv[1], config=sys.argv[2]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
318 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
319 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
320 datasets = __read_paramfile(sys.argv[3]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
321 except (ValueError, AssertionError): |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
322 datasets = __read_old_paramfile(sys.argv[3]) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
323 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
324 metadata = [] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
325 for dataset in datasets: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
326 dataset = bunch.Bunch(**safe_dict(dataset)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
327 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
328 output_path = output_paths[int(dataset.dataset_id)][0] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
329 except Exception: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
330 print('Output path for dataset %s not found on command line' % dataset.dataset_id, file=sys.stderr) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
331 sys.exit(1) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
332 try: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
333 if dataset.type == 'composite': |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
334 files_path = output_paths[int(dataset.dataset_id)][1] |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
335 metadata.append(add_composite_file(dataset, registry, output_path, files_path)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
336 else: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
337 metadata.append(add_file(dataset, registry, output_path)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
338 except UploadProblemException as e: |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
339 metadata.append(file_err(unicodify(e), dataset)) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
340 __write_job_metadata(metadata) |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
341 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
342 |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
343 if __name__ == '__main__': |
0641ea2f75b1
"planemo upload commit b2a00d9c24285fef0fb131d1832ecf4c337e5038-dirty"
rhohensinner
parents:
diff
changeset
|
344 __main__() |