annotate env/lib/python3.7/site-packages/cwltool/provenance.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1 """Stores Research Object including provenance."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
2 from __future__ import absolute_import
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
3
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
4 import copy
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
5 import datetime
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
6 import hashlib
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
7 import logging
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
8 import os
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
9 import re
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
10 import shutil
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
11 import tempfile
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
12 import uuid
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
13 from collections import OrderedDict
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
14 from getpass import getuser
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
15 from io import BytesIO, FileIO, TextIOWrapper, open
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
16 from socket import getfqdn
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
17 from typing import (IO, Any, Callable, Dict, List, Generator,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
18 MutableMapping, Optional, Set, Tuple, Union, cast)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
19 from types import ModuleType
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
20
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
21 import prov.model as provM
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
22 import six
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
23 from prov.identifier import Identifier, Namespace
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
24 from prov.model import (PROV, ProvActivity, # pylint: disable=unused-import
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
25 ProvDocument, ProvEntity)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
26 from pathlib2 import Path, PurePosixPath, PurePath
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
27 from ruamel import yaml
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
28 from schema_salad.sourceline import SourceLine
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
29 from six.moves import urllib
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
30 from typing_extensions import (TYPE_CHECKING, # pylint: disable=unused-import
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
31 Text)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
32 # move to a regular typing import when Python 3.3-3.6 is no longer supported
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
33
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
34 from .context import RuntimeContext # pylint: disable=unused-import
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
35 from .errors import WorkflowException
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
36 from .loghandler import _logger
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
37 from .pathmapper import get_listing
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
38 from .process import Process, shortname # pylint: disable=unused-import
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
39 from .stdfsaccess import StdFsAccess # pylint: disable=unused-import
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
40 from .utils import json_dumps, versionstring, onWindows
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
41
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
42
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
43 # imports needed for retrieving user data
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
44 if onWindows():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
45 import ctypes # pylint: disable=unused-import
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
46 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
47 try:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
48 import pwd # pylint: disable=unused-import
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
49 except ImportError:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
50 pass
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
51
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
52 if TYPE_CHECKING:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
53 from .command_line_tool import CommandLineTool, ExpressionTool # pylint: disable=unused-import
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
54 from .workflow import Workflow # pylint: disable=unused-import
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
55
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
56 if six.PY2:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
57 class PermissionError(OSError): # pylint: disable=redefined-builtin
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
58 """Needed for Python2."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
59
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
60 pass
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
61 __citation__ = "https://doi.org/10.5281/zenodo.1208477"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
62
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
63 # NOTE: Semantic versioning of the CWLProv Research Object
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
64 # **and** the cwlprov files
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
65 #
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
66 # Rough guide (major.minor.patch):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
67 # 1. Bump major number if removing/"breaking" resources or PROV statements
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
68 # 2. Bump minor number if adding resources or PROV statements
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
69 # 3. Bump patch number for non-breaking non-adding changes,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
70 # e.g. fixing broken relative paths
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
71 CWLPROV_VERSION = "https://w3id.org/cwl/prov/0.6.0"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
72
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
73 # Research Object folders
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
74 METADATA = "metadata"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
75 DATA = "data"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
76 WORKFLOW = "workflow"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
77 SNAPSHOT = "snapshot"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
78 # sub-folders
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
79 MAIN = os.path.join(WORKFLOW, "main")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
80 PROVENANCE = os.path.join(METADATA, "provenance")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
81 LOGS = os.path.join(METADATA, "logs")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
82 WFDESC = Namespace("wfdesc", 'http://purl.org/wf4ever/wfdesc#')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
83 WFPROV = Namespace("wfprov", 'http://purl.org/wf4ever/wfprov#')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
84 WF4EVER = Namespace("wf4ever", 'http://purl.org/wf4ever/wf4ever#')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
85 RO = Namespace("ro", 'http://purl.org/wf4ever/ro#')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
86 ORE = Namespace("ore", 'http://www.openarchives.org/ore/terms/')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
87 FOAF = Namespace("foaf", 'http://xmlns.com/foaf/0.1/')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
88 SCHEMA = Namespace("schema", 'http://schema.org/')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
89 CWLPROV = Namespace('cwlprov', 'https://w3id.org/cwl/prov#')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
90 ORCID = Namespace("orcid", "https://orcid.org/")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
91 UUID = Namespace("id", "urn:uuid:")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
92
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
93 # BagIt and YAML always use UTF-8
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
94 ENCODING = "UTF-8"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
95 TEXT_PLAIN = 'text/plain; charset="%s"' % ENCODING
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
96
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
97 # sha1, compatible with the File type's "checksum" field
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
98 # e.g. "checksum" = "sha1$47a013e660d408619d894b20806b1d5086aab03b"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
99 # See ./cwltool/schemas/v1.0/Process.yml
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
100 Hasher = hashlib.sha1
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
101 SHA1 = "sha1"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
102 SHA256 = "sha256"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
103 SHA512 = "sha512"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
104
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
105 # TODO: Better identifiers for user, at least
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
106 # these should be preserved in ~/.config/cwl for every execution
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
107 # on this host
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
108 USER_UUID = uuid.uuid4().urn
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
109 ACCOUNT_UUID = uuid.uuid4().urn
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
110
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
111
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
112 def _posix_path(local_path):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
113 # type: (Text) -> Text
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
114 return str(PurePosixPath(Path(local_path)))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
115
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
116
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
117 def _local_path(posix_path):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
118 # type: (Text) -> Text
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
119 return str(Path(posix_path))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
120
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
121
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
122 def _whoami():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
123 # type: () -> Tuple[Text,Text]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
124 """Return the current operating system account as (username, fullname)."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
125 username = getuser()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
126 try:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
127 if onWindows():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
128 get_user_name = ctypes.windll.secur32.GetUserNameExW # type: ignore
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
129 size = ctypes.pointer(ctypes.c_ulong(0))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
130 get_user_name(3, None, size)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
131
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
132 name_buffer = ctypes.create_unicode_buffer(size.contents.value)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
133 get_user_name(3, name_buffer, size)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
134 fullname = str(name_buffer.value)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
135 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
136 fullname = pwd.getpwuid(os.getuid())[4].split(',')[0]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
137 except (KeyError, IndexError):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
138 fullname = username
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
139
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
140 return (username, fullname)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
141
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
142
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
143 class WritableBagFile(FileIO):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
144 """Writes files in research object."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
145
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
146 def __init__(self, research_object, rel_path):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
147 # type: (ResearchObject, Text) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
148 """Initialize an ROBagIt."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
149 self.research_object = research_object
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
150 if Path(rel_path).is_absolute():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
151 raise ValueError("rel_path must be relative: %s" % rel_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
152 self.rel_path = rel_path
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
153 self.hashes = {SHA1: hashlib.sha1(), # nosec
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
154 SHA256: hashlib.sha256(),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
155 SHA512: hashlib.sha512()}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
156 # Open file in Research Object folder
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
157 path = os.path.abspath(os.path.join(research_object.folder, _local_path(rel_path)))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
158 if not path.startswith(os.path.abspath(research_object.folder)):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
159 raise ValueError("Path is outside Research Object: %s" % path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
160 super(WritableBagFile, self).__init__(str(path), mode="w")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
161
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
162
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
163 def write(self, b):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
164 # type: (Union[bytes, Text]) -> int
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
165 if isinstance(b, bytes):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
166 real_b = b
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
167 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
168 real_b = b.encode('utf-8')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
169 total = 0
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
170 length = len(real_b)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
171 while total < length:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
172 ret = super(WritableBagFile, self).write(real_b)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
173 if ret:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
174 total += ret
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
175 for _ in self.hashes.values():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
176 _.update(real_b)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
177 return total
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
178
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
179 def close(self): # type: () -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
180 # FIXME: Convert below block to a ResearchObject method?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
181 if self.rel_path.startswith("data/"):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
182 self.research_object.bagged_size[self.rel_path] = self.tell()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
183 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
184 self.research_object.tagfiles.add(self.rel_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
185
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
186 super(WritableBagFile, self).close()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
187 # { "sha1": "f572d396fae9206628714fb2ce00f72e94f2258f" }
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
188 checksums = {}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
189 for name in self.hashes:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
190 checksums[name] = self.hashes[name].hexdigest().lower()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
191 self.research_object.add_to_manifest(self.rel_path, checksums)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
192
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
193 # To simplify our hash calculation we won't support
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
194 # seeking, reading or truncating, as we can't do
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
195 # similar seeks in the current hash.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
196 # TODO: Support these? At the expense of invalidating
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
197 # the current hash, then having to recalculate at close()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
198 def seekable(self): # type: () -> bool
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
199 return False
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
200
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
201 def readable(self): # type: () -> bool
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
202 return False
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
203
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
204 def truncate(self, size=None):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
205 # type: (Optional[int]) -> int
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
206 # FIXME: This breaks contract IOBase,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
207 # as it means we would have to recalculate the hash
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
208 if size is not None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
209 raise IOError("WritableBagFile can't truncate")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
210 return self.tell()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
211
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
212
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
213 def _check_mod_11_2(numeric_string):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
214 # type: (Text) -> bool
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
215 """
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
216 Validate numeric_string for its MOD-11-2 checksum.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
217
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
218 Any "-" in the numeric_string are ignored.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
219
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
220 The last digit of numeric_string is assumed to be the checksum, 0-9 or X.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
221
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
222 See ISO/IEC 7064:2003 and
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
223 https://support.orcid.org/knowledgebase/articles/116780-structure-of-the-orcid-identifier
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
224 """
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
225 # Strip -
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
226 nums = numeric_string.replace("-", "")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
227 total = 0
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
228 # skip last (check)digit
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
229 for num in nums[:-1]:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
230 digit = int(num)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
231 total = (total+digit)*2
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
232 remainder = total % 11
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
233 result = (12-remainder) % 11
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
234 if result == 10:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
235 checkdigit = "X"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
236 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
237 checkdigit = str(result)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
238 # Compare against last digit or X
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
239 return nums[-1].upper() == checkdigit
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
240
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
241
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
242 def _valid_orcid(orcid): # type: (Optional[Text]) -> Text
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
243 """
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
244 Ensure orcid is a valid ORCID identifier.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
245
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
246 The string must be equivalent to one of these forms:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
247
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
248 0000-0002-1825-0097
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
249 orcid.org/0000-0002-1825-0097
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
250 http://orcid.org/0000-0002-1825-0097
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
251 https://orcid.org/0000-0002-1825-0097
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
252
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
253 If the ORCID number or prefix is invalid, a ValueError is raised.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
254
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
255 The returned ORCID string is always in the form of:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
256 https://orcid.org/0000-0002-1825-0097
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
257 """
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
258 if orcid is None or not orcid:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
259 raise ValueError(u'ORCID cannot be unspecified')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
260 # Liberal in what we consume, e.g. ORCID.org/0000-0002-1825-009x
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
261 orcid = orcid.lower()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
262 match = re.match(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
263 # Note: concatinated r"" r"" below so we can add comments to pattern
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
264
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
265 # Optional hostname, with or without protocol
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
266 r"(http://orcid\.org/|https://orcid\.org/|orcid\.org/)?"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
267 # alternative pattern, but probably messier
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
268 # r"^((https?://)?orcid.org/)?"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
269
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
270 # ORCID number is always 4x4 numerical digits,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
271 # but last digit (modulus 11 checksum)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
272 # can also be X (but we made it lowercase above).
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
273 # e.g. 0000-0002-1825-0097
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
274 # or 0000-0002-1694-233x
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
275 r"(?P<orcid>(\d{4}-\d{4}-\d{4}-\d{3}[0-9x]))$",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
276 orcid)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
277
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
278 help_url = u"https://support.orcid.org/knowledgebase/articles/"\
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
279 "116780-structure-of-the-orcid-identifier"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
280 if not match:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
281 raise ValueError(u"Invalid ORCID: %s\n%s" % (orcid, help_url))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
282
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
283 # Conservative in what we produce:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
284 # a) Ensure any checksum digit is uppercase
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
285 orcid_num = match.group("orcid").upper()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
286 # b) ..and correct
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
287 if not _check_mod_11_2(orcid_num):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
288 raise ValueError(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
289 u"Invalid ORCID checksum: %s\n%s" % (orcid_num, help_url))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
290
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
291 # c) Re-add the official prefix https://orcid.org/
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
292 return u"https://orcid.org/%s" % orcid_num
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
293
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
294
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
295 class ProvenanceProfile():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
296 """
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
297 Provenance profile.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
298
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
299 Populated as the workflow runs.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
300 """
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
301
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
302 def __init__(self,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
303 research_object, # type: ResearchObject
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
304 full_name, # type: str
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
305 host_provenance, # type: bool
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
306 user_provenance, # type: bool
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
307 orcid, # type: str
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
308 fsaccess, # type: StdFsAccess
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
309 run_uuid=None # type: Optional[uuid.UUID]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
310 ): # type: (...) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
311 """Initialize the provenance profile."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
312 self.fsaccess = fsaccess
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
313 self.orcid = orcid
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
314 self.research_object = research_object
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
315 self.folder = self.research_object.folder
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
316 self.document = ProvDocument()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
317 self.host_provenance = host_provenance
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
318 self.user_provenance = user_provenance
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
319 self.engine_uuid = research_object.engine_uuid
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
320 self.add_to_manifest = self.research_object.add_to_manifest
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
321 if self.orcid:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
322 _logger.debug(u"[provenance] Creator ORCID: %s", self.orcid)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
323 self.full_name = full_name
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
324 if self.full_name:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
325 _logger.debug(u"[provenance] Creator Full name: %s",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
326 self.full_name)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
327 if run_uuid is None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
328 run_uuid = uuid.uuid4()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
329 self.workflow_run_uuid = run_uuid
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
330 self.workflow_run_uri = run_uuid.urn
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
331 self.generate_prov_doc()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
332
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
333 def __str__(self): # type: () -> str
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
334 """Represent this Provenvance profile as a string."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
335 return "ProvenanceProfile <%s> in <%s>" % (
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
336 self.workflow_run_uri, self.research_object)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
337
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
338 def generate_prov_doc(self):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
339 # type: () -> Tuple[str, ProvDocument]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
340 """Add basic namespaces."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
341 def host_provenance(document):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
342 # type: (ProvDocument) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
343 """Record host provenance."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
344 document.add_namespace(CWLPROV)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
345 document.add_namespace(UUID)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
346 document.add_namespace(FOAF)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
347
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
348 hostname = getfqdn()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
349 # won't have a foaf:accountServiceHomepage for unix hosts, but
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
350 # we can at least provide hostname
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
351 document.agent(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
352 ACCOUNT_UUID, {provM.PROV_TYPE: FOAF["OnlineAccount"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
353 "prov:location": hostname,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
354 CWLPROV["hostname"]: hostname})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
355
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
356 self.cwltool_version = "cwltool %s" % versionstring().split()[-1]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
357 self.document.add_namespace(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
358 'wfprov', 'http://purl.org/wf4ever/wfprov#')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
359 # document.add_namespace('prov', 'http://www.w3.org/ns/prov#')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
360 self.document.add_namespace(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
361 'wfdesc', 'http://purl.org/wf4ever/wfdesc#')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
362 # TODO: Make this ontology. For now only has cwlprov:image
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
363 self.document.add_namespace('cwlprov', 'https://w3id.org/cwl/prov#')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
364 self.document.add_namespace('foaf', 'http://xmlns.com/foaf/0.1/')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
365 self.document.add_namespace('schema', 'http://schema.org/')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
366 self.document.add_namespace('orcid', 'https://orcid.org/')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
367 self.document.add_namespace('id', 'urn:uuid:')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
368 # NOTE: Internet draft expired 2004-03-04 (!)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
369 # https://tools.ietf.org/html/draft-thiemann-hash-urn-01
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
370 # TODO: Change to nih:sha-256; hashes
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
371 # https://tools.ietf.org/html/rfc6920#section-7
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
372 self.document.add_namespace('data', 'urn:hash::sha1:')
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
373 # Also needed for docker images
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
374 self.document.add_namespace(SHA256, "nih:sha-256;")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
375
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
376 # info only, won't really be used by prov as sub-resources use /
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
377 self.document.add_namespace(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
378 'researchobject', self.research_object.base_uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
379 # annotations
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
380 self.metadata_ns = self.document.add_namespace(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
381 'metadata', self.research_object.base_uri + METADATA + "/")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
382 # Pre-register provenance directory so we can refer to its files
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
383 self.provenance_ns = self.document.add_namespace(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
384 'provenance', self.research_object.base_uri
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
385 + _posix_path(PROVENANCE) + "/")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
386 ro_identifier_workflow = self.research_object.base_uri \
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
387 + "workflow/packed.cwl#"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
388 self.wf_ns = self.document.add_namespace("wf", ro_identifier_workflow)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
389 ro_identifier_input = self.research_object.base_uri \
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
390 + "workflow/primary-job.json#"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
391 self.document.add_namespace("input", ro_identifier_input)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
392
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
393 # More info about the account (e.g. username, fullname)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
394 # may or may not have been previously logged by user_provenance()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
395 # .. but we always know cwltool was launched (directly or indirectly)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
396 # by a user account, as cwltool is a command line tool
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
397 account = self.document.agent(ACCOUNT_UUID)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
398 if self.orcid or self.full_name:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
399 person = {provM.PROV_TYPE: PROV["Person"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
400 "prov:type": SCHEMA["Person"]}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
401 if self.full_name:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
402 person["prov:label"] = self.full_name
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
403 person["foaf:name"] = self.full_name
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
404 person["schema:name"] = self.full_name
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
405 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
406 # TODO: Look up name from ORCID API?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
407 pass
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
408 agent = self.document.agent(self.orcid or uuid.uuid4().urn,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
409 person)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
410 self.document.actedOnBehalfOf(account, agent)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
411 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
412 if self.host_provenance:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
413 host_provenance(self.document)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
414 if self.user_provenance:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
415 self.research_object.user_provenance(self.document)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
416 # The execution of cwltool
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
417 wfengine = self.document.agent(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
418 self.engine_uuid,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
419 {provM.PROV_TYPE: PROV["SoftwareAgent"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
420 "prov:type": WFPROV["WorkflowEngine"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
421 "prov:label": self.cwltool_version})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
422 # FIXME: This datetime will be a bit too delayed, we should
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
423 # capture when cwltool.py earliest started?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
424 self.document.wasStartedBy(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
425 wfengine, None, account, datetime.datetime.now())
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
426 # define workflow run level activity
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
427 self.document.activity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
428 self.workflow_run_uri, datetime.datetime.now(), None,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
429 {provM.PROV_TYPE: WFPROV["WorkflowRun"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
430 "prov:label": "Run of workflow/packed.cwl#main"})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
431 # association between SoftwareAgent and WorkflowRun
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
432 main_workflow = "wf:main"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
433 self.document.wasAssociatedWith(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
434 self.workflow_run_uri, self.engine_uuid, main_workflow)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
435 self.document.wasStartedBy(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
436 self.workflow_run_uri, None, self.engine_uuid,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
437 datetime.datetime.now())
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
438 return (self.workflow_run_uri, self.document)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
439
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
440 def evaluate(self,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
441 process, # type: Process
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
442 job, # type: Any
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
443 job_order_object, # type: Dict[Text, Text]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
444 research_obj # type: ResearchObject
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
445 ): # type: (...) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
446 """Evaluate the nature of job."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
447 if not hasattr(process, "steps"):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
448 # record provenance of independent commandline tool executions
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
449 self.prospective_prov(job)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
450 customised_job = copy_job_order(job, job_order_object)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
451 self.used_artefacts(customised_job, self.workflow_run_uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
452 research_obj.create_job(customised_job, job)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
453 elif hasattr(job, "workflow"):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
454 # record provenance of workflow executions
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
455 self.prospective_prov(job)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
456 customised_job = copy_job_order(job, job_order_object)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
457 self.used_artefacts(customised_job, self.workflow_run_uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
458
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
459 def record_process_start(self, process, job, process_run_id=None):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
460 # type: (Process, Any, Optional[str]) -> Optional[str]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
461 if not hasattr(process, 'steps'):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
462 process_run_id = self.workflow_run_uri
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
463 elif not hasattr(job, 'workflow'):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
464 # commandline tool execution as part of workflow
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
465 name = str(job.name) if hasattr(job, 'name') else ''
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
466 process_name = urllib.parse.quote(name, safe=":/,#")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
467 process_run_id = self.start_process(process_name, datetime.datetime.now())
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
468 return process_run_id
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
469
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
470 def start_process(self, process_name, when, process_run_id=None):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
471 # type: (Text, datetime.datetime, Optional[str]) -> str
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
472 """Record the start of each Process."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
473 if process_run_id is None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
474 process_run_id = uuid.uuid4().urn
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
475 prov_label = "Run of workflow/packed.cwl#main/" + process_name
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
476 self.document.activity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
477 process_run_id, None, None,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
478 {provM.PROV_TYPE: WFPROV["ProcessRun"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
479 provM.PROV_LABEL: prov_label})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
480 self.document.wasAssociatedWith(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
481 process_run_id, self.engine_uuid, str("wf:main/" + process_name))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
482 self.document.wasStartedBy(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
483 process_run_id, None, self.workflow_run_uri,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
484 when, None, None)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
485 return process_run_id
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
486
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
487 def record_process_end(self, process_name, process_run_id, outputs, when):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
488 # type: (Text, str, Any, datetime.datetime) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
489 self.generate_output_prov(outputs, process_run_id, process_name)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
490 self.document.wasEndedBy(process_run_id, None, self.workflow_run_uri, when)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
491
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
492 def declare_file(self, value):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
493 # type: (MutableMapping[Text, Any]) -> Tuple[ProvEntity, ProvEntity, str]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
494 if value["class"] != "File":
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
495 raise ValueError("Must have class:File: %s" % value)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
496 # Need to determine file hash aka RO filename
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
497 entity = None # type: Optional[ProvEntity]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
498 checksum = None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
499 if 'checksum' in value:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
500 csum = value['checksum']
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
501 (method, checksum) = csum.split("$", 1)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
502 if method == SHA1 and \
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
503 self.research_object.has_data_file(checksum):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
504 entity = self.document.entity("data:" + checksum)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
505
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
506 if not entity and 'location' in value:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
507 location = str(value['location'])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
508 # If we made it here, we'll have to add it to the RO
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
509 with self.fsaccess.open(location, "rb") as fhandle:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
510 relative_path = self.research_object.add_data_file(fhandle)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
511 # FIXME: This naively relies on add_data_file setting hash as filename
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
512 checksum = PurePath(relative_path).name
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
513 entity = self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
514 "data:" + checksum, {provM.PROV_TYPE: WFPROV["Artifact"]})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
515 if "checksum" not in value:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
516 value["checksum"] = "%s$%s" % (SHA1, checksum)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
517
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
518
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
519 if not entity and 'contents' in value:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
520 # Anonymous file, add content as string
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
521 entity, checksum = self.declare_string(value["contents"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
522
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
523 # By here one of them should have worked!
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
524 if not entity or not checksum:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
525 raise ValueError("class:File but missing checksum/location/content: %r" % value)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
526
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
527
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
528 # Track filename and extension, this is generally useful only for
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
529 # secondaryFiles. Note that multiple uses of a file might thus record
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
530 # different names for the same entity, so we'll
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
531 # make/track a specialized entity by UUID
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
532 file_id = value.setdefault("@id", uuid.uuid4().urn)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
533 # A specialized entity that has just these names
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
534 file_entity = self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
535 file_id, [(provM.PROV_TYPE, WFPROV["Artifact"]),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
536 (provM.PROV_TYPE, WF4EVER["File"])]) # type: ProvEntity
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
537
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
538 if "basename" in value:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
539 file_entity.add_attributes({CWLPROV["basename"]: value["basename"]})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
540 if "nameroot" in value:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
541 file_entity.add_attributes({CWLPROV["nameroot"]: value["nameroot"]})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
542 if "nameext" in value:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
543 file_entity.add_attributes({CWLPROV["nameext"]: value["nameext"]})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
544 self.document.specializationOf(file_entity, entity)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
545
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
546 # Check for secondaries
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
547 for sec in value.get("secondaryFiles", ()):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
548 # TODO: Record these in a specializationOf entity with UUID?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
549 if sec['class'] == "File":
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
550 (sec_entity, _, _) = self.declare_file(sec)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
551 elif sec['class'] == "Directory":
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
552 sec_entity = self.declare_directory(sec)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
553 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
554 raise ValueError("Got unexpected secondaryFiles value: {}".format(sec))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
555 # We don't know how/when/where the secondary file was generated,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
556 # but CWL convention is a kind of summary/index derived
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
557 # from the original file. As its generally in a different format
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
558 # then prov:Quotation is not appropriate.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
559 self.document.derivation(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
560 sec_entity, file_entity,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
561 other_attributes={PROV["type"]: CWLPROV["SecondaryFile"]})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
562
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
563 return file_entity, entity, checksum
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
564
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
565 def declare_directory(self, value): # type: (MutableMapping[Text, Any]) -> ProvEntity
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
566 """Register any nested files/directories."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
567 # FIXME: Calculate a hash-like identifier for directory
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
568 # so we get same value if it's the same filenames/hashes
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
569 # in a different location.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
570 # For now, mint a new UUID to identify this directory, but
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
571 # attempt to keep it inside the value dictionary
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
572 dir_id = value.setdefault("@id", uuid.uuid4().urn)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
573
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
574 # New annotation file to keep the ORE Folder listing
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
575 ore_doc_fn = dir_id.replace("urn:uuid:", "directory-") + ".ttl"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
576 dir_bundle = self.document.bundle(self.metadata_ns[ore_doc_fn])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
577
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
578 coll = self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
579 dir_id, [(provM.PROV_TYPE, WFPROV["Artifact"]),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
580 (provM.PROV_TYPE, PROV["Collection"]),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
581 (provM.PROV_TYPE, PROV["Dictionary"]),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
582 (provM.PROV_TYPE, RO["Folder"])])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
583 # ORE description of ro:Folder, saved separately
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
584 coll_b = dir_bundle.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
585 dir_id, [(provM.PROV_TYPE, RO["Folder"]),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
586 (provM.PROV_TYPE, ORE["Aggregation"])])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
587 self.document.mentionOf(dir_id + "#ore", dir_id, dir_bundle.identifier)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
588
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
589 # dir_manifest = dir_bundle.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
590 # dir_bundle.identifier, {PROV["type"]: ORE["ResourceMap"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
591 # ORE["describes"]: coll_b.identifier})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
592
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
593 coll_attribs = [(ORE["isDescribedBy"], dir_bundle.identifier)]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
594 coll_b_attribs = [] # type: List[Tuple[Identifier, ProvEntity]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
595
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
596 # FIXME: .listing might not be populated yet - hopefully
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
597 # a later call to this method will sort that
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
598 is_empty = True
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
599
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
600 if "listing" not in value:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
601 get_listing(self.fsaccess, value)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
602 for entry in value.get("listing", []):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
603 is_empty = False
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
604 # Declare child-artifacts
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
605 entity = self.declare_artefact(entry)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
606 self.document.membership(coll, entity)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
607 # Membership relation aka our ORE Proxy
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
608 m_id = uuid.uuid4().urn
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
609 m_entity = self.document.entity(m_id)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
610 m_b = dir_bundle.entity(m_id)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
611
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
612 # PROV-O style Dictionary
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
613 # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
614 # ..as prov.py do not currently allow PROV-N extensions
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
615 # like hadDictionaryMember(..)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
616 m_entity.add_asserted_type(PROV["KeyEntityPair"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
617
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
618 m_entity.add_attributes({
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
619 PROV["pairKey"]: entry["basename"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
620 PROV["pairEntity"]: entity,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
621 })
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
622
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
623 # As well as a being a
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
624 # http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
625 m_b.add_asserted_type(RO["FolderEntry"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
626 m_b.add_asserted_type(ORE["Proxy"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
627 m_b.add_attributes({
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
628 RO["entryName"]: entry["basename"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
629 ORE["proxyIn"]: coll,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
630 ORE["proxyFor"]: entity,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
631
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
632 })
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
633 coll_attribs.append((PROV["hadDictionaryMember"], m_entity))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
634 coll_b_attribs.append((ORE["aggregates"], m_b))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
635
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
636 coll.add_attributes(coll_attribs)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
637 coll_b.add_attributes(coll_b_attribs)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
638
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
639 # Also Save ORE Folder as annotation metadata
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
640 ore_doc = ProvDocument()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
641 ore_doc.add_namespace(ORE)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
642 ore_doc.add_namespace(RO)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
643 ore_doc.add_namespace(UUID)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
644 ore_doc.add_bundle(dir_bundle)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
645 ore_doc = ore_doc.flattened()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
646 ore_doc_path = str(PurePosixPath(METADATA, ore_doc_fn))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
647 with self.research_object.write_bag_file(ore_doc_path) as provenance_file:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
648 ore_doc.serialize(provenance_file, format="rdf", rdf_format="turtle")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
649 self.research_object.add_annotation(dir_id, [ore_doc_fn], ORE["isDescribedBy"].uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
650
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
651 if is_empty:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
652 # Empty directory
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
653 coll.add_asserted_type(PROV["EmptyCollection"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
654 coll.add_asserted_type(PROV["EmptyDictionary"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
655 self.research_object.add_uri(coll.identifier.uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
656 return coll
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
657
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
658 def declare_string(self, value):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
659 # type: (Union[Text, str]) -> Tuple[ProvEntity,Text]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
660 """Save as string in UTF-8."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
661 byte_s = BytesIO(str(value).encode(ENCODING))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
662 data_file = self.research_object.add_data_file(byte_s, content_type=TEXT_PLAIN)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
663 checksum = PurePosixPath(data_file).name
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
664 # FIXME: Don't naively assume add_data_file uses hash in filename!
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
665 data_id = "data:%s" % PurePosixPath(data_file).stem
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
666 entity = self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
667 data_id, {provM.PROV_TYPE: WFPROV["Artifact"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
668 provM.PROV_VALUE: str(value)}) # type: ProvEntity
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
669 return entity, checksum
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
670
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
671 def declare_artefact(self, value):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
672 # type: (Any) -> ProvEntity
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
673 """Create data artefact entities for all file objects."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
674 if value is None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
675 # FIXME: If this can happen in CWL, we'll
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
676 # need a better way to represent this in PROV
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
677 return self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
678 CWLPROV["None"], {provM.PROV_LABEL: "None"})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
679
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
680 if isinstance(value, (bool, int, float)):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
681 # Typically used in job documents for flags
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
682
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
683 # FIXME: Make consistent hash URIs for these
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
684 # that somehow include the type
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
685 # (so "1" != 1 != "1.0" != true)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
686 entity = self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
687 uuid.uuid4().urn, {provM.PROV_VALUE: value})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
688 self.research_object.add_uri(entity.identifier.uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
689 return entity
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
690
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
691 if isinstance(value, (Text, str)):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
692 (entity, _) = self.declare_string(value)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
693 return entity
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
694
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
695 if isinstance(value, bytes):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
696 # If we got here then we must be in Python 3
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
697 byte_s = BytesIO(value)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
698 data_file = self.research_object.add_data_file(byte_s)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
699 # FIXME: Don't naively assume add_data_file uses hash in filename!
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
700 data_id = "data:%s" % PurePosixPath(data_file).stem
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
701 return self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
702 data_id, {provM.PROV_TYPE: WFPROV["Artifact"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
703 provM.PROV_VALUE: str(value)})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
704
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
705 if isinstance(value, MutableMapping):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
706 if "@id" in value:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
707 # Already processed this value, but it might not be in this PROV
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
708 entities = self.document.get_record(value["@id"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
709 if entities:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
710 return entities[0]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
711 # else, unknown in PROV, re-add below as if it's fresh
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
712
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
713 # Base case - we found a File we need to update
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
714 if value.get("class") == "File":
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
715 (entity, _, _) = self.declare_file(value)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
716 value["@id"] = entity.identifier.uri
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
717 return entity
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
718
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
719 if value.get("class") == "Directory":
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
720 entity = self.declare_directory(value)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
721 value["@id"] = entity.identifier.uri
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
722 return entity
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
723 coll_id = value.setdefault("@id", uuid.uuid4().urn)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
724 # some other kind of dictionary?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
725 # TODO: also Save as JSON
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
726 coll = self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
727 coll_id, [(provM.PROV_TYPE, WFPROV["Artifact"]),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
728 (provM.PROV_TYPE, PROV["Collection"]),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
729 (provM.PROV_TYPE, PROV["Dictionary"])])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
730
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
731 if value.get("class"):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
732 _logger.warning("Unknown data class %s.", value["class"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
733 # FIXME: The class might be "http://example.com/somethingelse"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
734 coll.add_asserted_type(CWLPROV[value["class"]])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
735
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
736 # Let's iterate and recurse
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
737 coll_attribs = [] # type: List[Tuple[Identifier, ProvEntity]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
738 for (key, val) in value.items():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
739 v_ent = self.declare_artefact(val)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
740 self.document.membership(coll, v_ent)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
741 m_entity = self.document.entity(uuid.uuid4().urn)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
742 # Note: only support PROV-O style dictionary
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
743 # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
744 # as prov.py do not easily allow PROV-N extensions
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
745 m_entity.add_asserted_type(PROV["KeyEntityPair"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
746 m_entity.add_attributes({
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
747 PROV["pairKey"]: str(key),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
748 PROV["pairEntity"]: v_ent
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
749 })
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
750 coll_attribs.append((PROV["hadDictionaryMember"], m_entity))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
751 coll.add_attributes(coll_attribs)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
752 self.research_object.add_uri(coll.identifier.uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
753 return coll
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
754
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
755 # some other kind of Collection?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
756 # TODO: also save as JSON
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
757 try:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
758 members = []
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
759 for each_input_obj in iter(value):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
760 # Recurse and register any nested objects
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
761 e = self.declare_artefact(each_input_obj)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
762 members.append(e)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
763
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
764 # If we reached this, then we were allowed to iterate
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
765 coll = self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
766 uuid.uuid4().urn, [(provM.PROV_TYPE, WFPROV["Artifact"]),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
767 (provM.PROV_TYPE, PROV["Collection"])])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
768 if not members:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
769 coll.add_asserted_type(PROV["EmptyCollection"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
770 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
771 for member in members:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
772 # FIXME: This won't preserve order, for that
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
773 # we would need to use PROV.Dictionary
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
774 # with numeric keys
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
775 self.document.membership(coll, member)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
776 self.research_object.add_uri(coll.identifier.uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
777 # FIXME: list value does not support adding "@id"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
778 return coll
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
779 except TypeError:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
780 _logger.warning("Unrecognized type %s of %r",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
781 type(value), value)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
782 # Let's just fall back to Python repr()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
783 entity = self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
784 uuid.uuid4().urn, {provM.PROV_LABEL: repr(value)})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
785 self.research_object.add_uri(entity.identifier.uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
786 return entity
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
787
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
788 def used_artefacts(self,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
789 job_order, # type: Union[Dict[Any, Any], List[Dict[Any, Any]]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
790 process_run_id, # type: str
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
791 name=None # type: Optional[str]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
792 ): # type: (...) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
793 """Add used() for each data artefact."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
794 if isinstance(job_order, list):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
795 for entry in job_order:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
796 self.used_artefacts(entry, process_run_id, name)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
797 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
798 # FIXME: Use workflow name in packed.cwl, "main" is wrong for nested workflows
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
799 base = "main"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
800 if name is not None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
801 base += "/" + name
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
802 for key, value in job_order.items():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
803 prov_role = self.wf_ns["%s/%s" % (base, key)]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
804 try:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
805 entity = self.declare_artefact(value)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
806 self.document.used(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
807 process_run_id, entity, datetime.datetime.now(), None,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
808 {"prov:role": prov_role})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
809 except OSError:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
810 pass
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
811
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
812 def generate_output_prov(self,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
813 final_output, # type: Union[Dict[Text, Any], List[Dict[Text, Any]]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
814 process_run_id, # type: Optional[str]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
815 name # type: Optional[Text]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
816 ): # type: (...) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
817 """Call wasGeneratedBy() for each output,copy the files into the RO."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
818 if isinstance(final_output, list):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
819 for entry in final_output:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
820 self.generate_output_prov(entry, process_run_id, name)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
821 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
822 # Timestamp should be created at the earliest
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
823 timestamp = datetime.datetime.now()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
824
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
825 # For each output, find/register the corresponding
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
826 # entity (UUID) and document it as generated in
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
827 # a role corresponding to the output
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
828 for output, value in final_output.items():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
829 entity = self.declare_artefact(value)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
830 if name is not None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
831 name = urllib.parse.quote(str(name), safe=":/,#")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
832 # FIXME: Probably not "main" in nested workflows
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
833 role = self.wf_ns["main/%s/%s" % (name, output)]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
834 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
835 role = self.wf_ns["main/%s" % output]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
836
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
837 if not process_run_id:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
838 process_run_id = self.workflow_run_uri
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
839
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
840 self.document.wasGeneratedBy(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
841 entity, process_run_id, timestamp, None, {"prov:role": role})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
842
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
843 def prospective_prov(self, job):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
844 # type: (Any) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
845 """Create prospective prov recording as wfdesc prov:Plan."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
846 if not hasattr(job, "steps"):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
847 # direct command line tool execution
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
848 self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
849 "wf:main", {provM.PROV_TYPE: WFDESC["Process"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
850 "prov:type": PROV["Plan"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
851 "prov:label":"Prospective provenance"})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
852 return
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
853
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
854 self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
855 "wf:main", {provM.PROV_TYPE: WFDESC["Workflow"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
856 "prov:type": PROV["Plan"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
857 "prov:label":"Prospective provenance"})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
858
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
859 for step in job.steps:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
860 stepnametemp = "wf:main/" + str(step.name)[5:]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
861 stepname = urllib.parse.quote(stepnametemp, safe=":/,#")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
862 step = self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
863 stepname, {provM.PROV_TYPE: WFDESC["Process"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
864 "prov:type": PROV["Plan"]})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
865 self.document.entity(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
866 "wf:main", {"wfdesc:hasSubProcess": step,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
867 "prov:label": "Prospective provenance"})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
868 # TODO: Declare roles/parameters as well
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
869
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
870 def activity_has_provenance(self, activity, prov_ids):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
871 # type: (str, List[Identifier]) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
872 """Add http://www.w3.org/TR/prov-aq/ relations to nested PROV files."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
873 # NOTE: The below will only work if the corresponding metadata/provenance arcp URI
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
874 # is a pre-registered namespace in the PROV Document
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
875 attribs = [(PROV["has_provenance"], prov_id) for prov_id in prov_ids]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
876 self.document.activity(activity, other_attributes=attribs)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
877 # Tip: we can't use https://www.w3.org/TR/prov-links/#term-mention
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
878 # as prov:mentionOf() is only for entities, not activities
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
879 uris = [i.uri for i in prov_ids]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
880 self.research_object.add_annotation(activity, uris, PROV["has_provenance"].uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
881
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
882 def finalize_prov_profile(self, name):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
883 # type: (Optional[Text]) -> List[Identifier]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
884 """Transfer the provenance related files to the RO."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
885 # NOTE: Relative posix path
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
886 if name is None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
887 # master workflow, fixed filenames
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
888 filename = "primary.cwlprov"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
889 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
890 # ASCII-friendly filename, avoiding % as we don't want %2520 in manifest.json
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
891 wf_name = urllib.parse.quote(str(name), safe="").replace("%", "_")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
892 # Note that the above could cause overlaps for similarly named
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
893 # workflows, but that's OK as we'll also include run uuid
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
894 # which also covers thhe case of this step being run in
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
895 # multiple places or iterations
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
896 filename = "%s.%s.cwlprov" % (wf_name, self.workflow_run_uuid)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
897
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
898 basename = str(PurePosixPath(PROVENANCE)/filename)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
899
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
900 # TODO: Also support other profiles than CWLProv, e.g. ProvOne
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
901
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
902 # list of prov identifiers of provenance files
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
903 prov_ids = []
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
904
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
905 # https://www.w3.org/TR/prov-xml/
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
906 with self.research_object.write_bag_file(basename + ".xml") as provenance_file:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
907 self.document.serialize(provenance_file, format="xml", indent=4)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
908 prov_ids.append(self.provenance_ns[filename + ".xml"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
909
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
910 # https://www.w3.org/TR/prov-n/
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
911 with self.research_object.write_bag_file(basename + ".provn") as provenance_file:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
912 self.document.serialize(provenance_file, format="provn", indent=2)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
913 prov_ids.append(self.provenance_ns[filename + ".provn"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
914
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
915 # https://www.w3.org/Submission/prov-json/
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
916 with self.research_object.write_bag_file(basename + ".json") as provenance_file:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
917 self.document.serialize(provenance_file, format="json", indent=2)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
918 prov_ids.append(self.provenance_ns[filename + ".json"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
919
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
920 # "rdf" aka https://www.w3.org/TR/prov-o/
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
921 # which can be serialized to ttl/nt/jsonld (and more!)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
922
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
923 # https://www.w3.org/TR/turtle/
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
924 with self.research_object.write_bag_file(basename + ".ttl") as provenance_file:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
925 self.document.serialize(provenance_file, format="rdf", rdf_format="turtle")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
926 prov_ids.append(self.provenance_ns[filename + ".ttl"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
927
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
928 # https://www.w3.org/TR/n-triples/
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
929 with self.research_object.write_bag_file(basename + ".nt") as provenance_file:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
930 self.document.serialize(provenance_file, format="rdf", rdf_format="ntriples")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
931 prov_ids.append(self.provenance_ns[filename + ".nt"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
932
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
933 # https://www.w3.org/TR/json-ld/
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
934 # TODO: Use a nice JSON-LD context
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
935 # see also https://eprints.soton.ac.uk/395985/
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
936 # 404 Not Found on https://provenance.ecs.soton.ac.uk/prov.jsonld :(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
937 with self.research_object.write_bag_file(basename + ".jsonld") as provenance_file:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
938 self.document.serialize(provenance_file, format="rdf", rdf_format="json-ld")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
939 prov_ids.append(self.provenance_ns[filename + ".jsonld"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
940
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
941 _logger.debug(u"[provenance] added provenance: %s", prov_ids)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
942 return prov_ids
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
943
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
944
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
945 class ResearchObject():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
946 """CWLProv Research Object."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
947
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
948 def __init__(self, fsaccess, temp_prefix_ro="tmp", orcid='', full_name=''):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
949 # type: (StdFsAccess, str, Text, Text) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
950 """Initialize the ResearchObject."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
951 self.temp_prefix = temp_prefix_ro
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
952 self.orcid = '' if not orcid else _valid_orcid(orcid)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
953 self.full_name = full_name
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
954 tmp_dir, tmp_prefix = os.path.split(temp_prefix_ro)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
955 self.folder = os.path.abspath(tempfile.mkdtemp(prefix=tmp_prefix,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
956 dir=tmp_dir)) # type: Text
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
957 self.closed = False
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
958 # map of filename "data/de/alsdklkas": 12398123 bytes
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
959 self.bagged_size = {} # type: Dict[Text, int]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
960 self.tagfiles = set() # type: Set[Text]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
961 self._file_provenance = {} # type: Dict[Text, Dict[Text, Text]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
962 self._external_aggregates = [] # type: List[Dict[Text, Text]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
963 self.annotations = [] # type: List[Dict[Text, Any]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
964 self._content_types = {} # type: Dict[Text,str]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
965 self.fsaccess = fsaccess
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
966 # These should be replaced by generate_prov_doc when workflow/run IDs are known:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
967 self.engine_uuid = "urn:uuid:%s" % uuid.uuid4()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
968 self.ro_uuid = uuid.uuid4()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
969 self.base_uri = "arcp://uuid,%s/" % self.ro_uuid
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
970 self.cwltool_version = "cwltool %s" % versionstring().split()[-1]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
971 ##
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
972 self.relativised_input_object = {} # type: Dict[Any, Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
973
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
974 self._initialize()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
975 _logger.debug(u"[provenance] Temporary research object: %s",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
976 self.folder)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
977
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
978 def self_check(self): # type: () -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
979 """Raise ValueError if this RO is closed."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
980 if self.closed:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
981 raise ValueError(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
982 "This ResearchObject has already been closed and is not "
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
983 "available for futher manipulation.")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
984
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
985 def __str__(self): # type: () -> str
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
986 """Represent this RO as a string."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
987 return "ResearchObject <{}> in <{}>".format(self.ro_uuid, self.folder)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
988
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
989 def _initialize(self): # type: () -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
990 for research_obj_folder in (METADATA, DATA, WORKFLOW, SNAPSHOT,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
991 PROVENANCE, LOGS):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
992 os.makedirs(os.path.join(self.folder, research_obj_folder))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
993 self._initialize_bagit()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
994
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
995 def _initialize_bagit(self): # type: () -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
996 """Write fixed bagit header."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
997 self.self_check()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
998 bagit = os.path.join(self.folder, "bagit.txt")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
999 # encoding: always UTF-8 (although ASCII would suffice here)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1000 # newline: ensure LF also on Windows
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1001 with open(bagit, "w", encoding=ENCODING, newline='\n') as bag_it_file:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1002 # TODO: \n or \r\n ?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1003 bag_it_file.write(u"BagIt-Version: 0.97\n")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1004 bag_it_file.write(u"Tag-File-Character-Encoding: %s\n" % ENCODING)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1005
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1006 def open_log_file_for_activity(self, uuid_uri): # type: (Text) -> WritableBagFile
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1007 self.self_check()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1008 # Ensure valid UUID for safe filenames
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1009 activity_uuid = uuid.UUID(uuid_uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1010 if activity_uuid.urn == self.engine_uuid:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1011 # It's the engine aka cwltool!
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1012 name = "engine"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1013 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1014 name = "activity"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1015 p = os.path.join(LOGS, "{}.{}.txt".format(name, activity_uuid))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1016 _logger.debug("[provenance] Opening log file for %s: %s" % (name, p))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1017 self.add_annotation(activity_uuid.urn, [p], CWLPROV["log"].uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1018 return self.write_bag_file(p)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1019
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1020 def _finalize(self): # type: () -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1021 self._write_ro_manifest()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1022 self._write_bag_info()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1023
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1024 def user_provenance(self, document): # type: (ProvDocument) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1025 """Add the user provenance."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1026 self.self_check()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1027 (username, fullname) = _whoami()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1028
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1029 if not self.full_name:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1030 self.full_name = fullname
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1031
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1032 document.add_namespace(UUID)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1033 document.add_namespace(ORCID)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1034 document.add_namespace(FOAF)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1035 account = document.agent(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1036 ACCOUNT_UUID, {provM.PROV_TYPE: FOAF["OnlineAccount"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1037 "prov:label": username,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1038 FOAF["accountName"]: username})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1039
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1040 user = document.agent(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1041 self.orcid or USER_UUID,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1042 {provM.PROV_TYPE: PROV["Person"],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1043 "prov:label": self.full_name,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1044 FOAF["name"]: self.full_name,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1045 FOAF["account"]: account})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1046 # cwltool may be started on the shell (directly by user),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1047 # by shell script (indirectly by user)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1048 # or from a different program
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1049 # (which again is launched by any of the above)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1050 #
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1051 # We can't tell in which way, but ultimately we're still
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1052 # acting in behalf of that user (even if we might
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1053 # get their name wrong!)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1054 document.actedOnBehalfOf(account, user)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1055
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1056 def write_bag_file(self, path, encoding=ENCODING):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1057 # type: (Text, Optional[str]) -> WritableBagFile
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1058 """Write the bag file into our research object."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1059 self.self_check()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1060 # For some reason below throws BlockingIOError
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1061 #fp = BufferedWriter(WritableBagFile(self, path))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1062 bag_file = WritableBagFile(self, path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1063 if encoding is not None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1064 # encoding: match Tag-File-Character-Encoding: UTF-8
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1065 # newline: ensure LF also on Windows
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1066 return cast(WritableBagFile,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1067 TextIOWrapper(cast(IO[bytes], bag_file), encoding=encoding,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1068 newline="\n"))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1069 return bag_file
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1070
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1071 def add_tagfile(self, path, timestamp=None):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1072 # type: (Text, Optional[datetime.datetime]) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1073 """Add tag files to our research object."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1074 self.self_check()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1075 checksums = {}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1076 # Read file to calculate its checksum
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1077 if os.path.isdir(path):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1078 return
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1079 # FIXME: do the right thing for directories
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1080 with open(path, "rb") as tag_file:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1081 # FIXME: Should have more efficient open_tagfile() that
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1082 # does all checksums in one go while writing through,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1083 # adding checksums after closing.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1084 # Below probably OK for now as metadata files
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1085 # are not too large..?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1086
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1087 checksums[SHA1] = checksum_copy(tag_file, hasher=hashlib.sha1)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1088
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1089 tag_file.seek(0)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1090 checksums[SHA256] = checksum_copy(tag_file, hasher=hashlib.sha256)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1091
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1092 tag_file.seek(0)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1093 checksums[SHA512] = checksum_copy(tag_file, hasher=hashlib.sha512)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1094
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1095 rel_path = _posix_path(os.path.relpath(path, self.folder))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1096 self.tagfiles.add(rel_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1097 self.add_to_manifest(rel_path, checksums)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1098 if timestamp is not None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1099 self._file_provenance[rel_path] = {"createdOn": timestamp.isoformat()}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1100
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1101 def _ro_aggregates(self):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1102 # type: () -> List[Dict[Text, Any]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1103 """Gather dictionary of files to be added to the manifest."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1104 def guess_mediatype(rel_path):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1105 # type: (Text) -> Dict[Text, Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1106 """Return the mediatypes."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1107 media_types = {
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1108 # Adapted from
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1109 # https://w3id.org/bundle/2014-11-05/#media-types
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1110
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1111 "txt": TEXT_PLAIN,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1112 "ttl": 'text/turtle; charset="UTF-8"',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1113 "rdf": 'application/rdf+xml',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1114 "json": 'application/json',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1115 "jsonld": 'application/ld+json',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1116 "xml": 'application/xml',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1117 ##
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1118 "cwl": 'text/x+yaml; charset="UTF-8"',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1119 "provn": 'text/provenance-notation; charset="UTF-8"',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1120 "nt": 'application/n-triples',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1121 } # type: Dict[Text, Text]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1122 conforms_to = {
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1123 "provn": 'http://www.w3.org/TR/2013/REC-prov-n-20130430/',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1124 "cwl": 'https://w3id.org/cwl/',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1125 } # type: Dict[Text, Text]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1126
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1127 prov_conforms_to = {
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1128 "provn": 'http://www.w3.org/TR/2013/REC-prov-n-20130430/',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1129 "rdf": 'http://www.w3.org/TR/2013/REC-prov-o-20130430/',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1130 "ttl": 'http://www.w3.org/TR/2013/REC-prov-o-20130430/',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1131 "nt": 'http://www.w3.org/TR/2013/REC-prov-o-20130430/',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1132 "jsonld": 'http://www.w3.org/TR/2013/REC-prov-o-20130430/',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1133 "xml": 'http://www.w3.org/TR/2013/NOTE-prov-xml-20130430/',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1134 "json": 'http://www.w3.org/Submission/2013/SUBM-prov-json-20130424/',
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1135 } # type: Dict[Text, Text]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1136
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1137
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1138 extension = rel_path.rsplit(".", 1)[-1].lower() # type: Optional[Text]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1139 if extension == rel_path:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1140 # No ".", no extension
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1141 extension = None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1142
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1143 local_aggregate = {} # type: Dict[Text, Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1144 if extension in media_types:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1145 local_aggregate["mediatype"] = media_types[extension]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1146
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1147 if extension in conforms_to:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1148 # TODO: Open CWL file to read its declared "cwlVersion", e.g.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1149 # cwlVersion = "v1.0"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1150 local_aggregate["conformsTo"] = conforms_to[extension]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1151
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1152 if (rel_path.startswith(_posix_path(PROVENANCE))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1153 and extension in prov_conforms_to):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1154 if ".cwlprov" in rel_path:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1155 # Our own!
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1156 local_aggregate["conformsTo"] = [prov_conforms_to[extension], CWLPROV_VERSION]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1157 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1158 # Some other PROV
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1159 # TODO: Recognize ProvOne etc.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1160 local_aggregate["conformsTo"] = prov_conforms_to[extension]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1161 return local_aggregate
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1162
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1163 aggregates = [] # type: List[Dict[Text, Any]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1164 for path in self.bagged_size.keys():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1165 aggregate_dict = {} # type: Dict[Text, Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1166
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1167 temp_path = PurePosixPath(path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1168 folder = temp_path.parent
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1169 filename = temp_path.name
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1170
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1171 # NOTE: Here we end up aggregating the abstract
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1172 # data items by their sha1 hash, so that it matches
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1173 # the entity() in the prov files.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1174
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1175 # TODO: Change to nih:sha-256; hashes
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1176 # https://tools.ietf.org/html/rfc6920#section-7
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1177 aggregate_dict["uri"] = 'urn:hash::sha1:' + filename
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1178 aggregate_dict["bundledAs"] = {
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1179 # The arcp URI is suitable ORE proxy; local to this Research Object.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1180 # (as long as we don't also aggregate it by relative path!)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1181 "uri": self.base_uri + path,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1182 # relate it to the data/ path
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1183 "folder": "/%s/" % folder,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1184 "filename": filename,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1185 }
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1186 if path in self._file_provenance:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1187 # Made by workflow run, merge captured provenance
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1188 aggregate_dict["bundledAs"].update(self._file_provenance[path])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1189 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1190 # Probably made outside wf run, part of job object?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1191 pass
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1192 if path in self._content_types:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1193 aggregate_dict["mediatype"] = self._content_types[path]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1194
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1195 aggregates.append(aggregate_dict)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1196
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1197 for path in self.tagfiles:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1198 if (not (path.startswith(METADATA) or path.startswith(WORKFLOW) or
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1199 path.startswith(SNAPSHOT))):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1200 # probably a bagit file
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1201 continue
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1202 if path == PurePosixPath(METADATA)/"manifest.json":
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1203 # Should not really be there yet! But anyway, we won't
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1204 # aggregate it.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1205 continue
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1206
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1207 rel_aggregates = {} # type: Dict[Text, Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1208 # These are local paths like metadata/provenance - but
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1209 # we need to relativize them for our current directory for
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1210 # as we are saved in metadata/manifest.json
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1211 uri = str(Path(os.pardir)/path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1212
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1213 rel_aggregates["uri"] = uri
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1214 rel_aggregates.update(guess_mediatype(path))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1215
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1216 if path in self._file_provenance:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1217 # Propagate file provenance (e.g. timestamp)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1218 rel_aggregates.update(self._file_provenance[path])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1219 elif not path.startswith(SNAPSHOT):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1220 # make new timestamp?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1221 rel_aggregates.update(self._self_made())
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1222 aggregates.append(rel_aggregates)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1223 aggregates.extend(self._external_aggregates)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1224 return aggregates
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1225
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1226 def add_uri(self, uri, timestamp=None):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1227 # type: (str, Optional[datetime.datetime]) -> Dict[Text, Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1228 self.self_check()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1229 aggr = self._self_made(timestamp=timestamp)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1230 aggr["uri"] = uri
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1231 self._external_aggregates.append(aggr)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1232 return aggr
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1233
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1234 def add_annotation(self, about, content, motivated_by="oa:describing"):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1235 # type: (str, List[str], str) -> str
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1236 """Cheap URI relativize for current directory and /."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1237 self.self_check()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1238 curr = self.base_uri + METADATA + "/"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1239 content = [c.replace(curr, "").replace(self.base_uri, "../")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1240 for c in content]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1241 uri = uuid.uuid4().urn
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1242 ann = {
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1243 u"uri": uri,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1244 u"about": about,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1245 u"content": content,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1246 u"oa:motivatedBy": {"@id": motivated_by}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1247 }
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1248 self.annotations.append(ann)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1249 return uri
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1250
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1251 def _ro_annotations(self):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1252 # type: () -> List[Dict[Text, Any]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1253 annotations = [] # type: List[Dict[Text, Any]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1254 annotations.append({
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1255 "uri": uuid.uuid4().urn,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1256 "about": self.ro_uuid.urn,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1257 "content": "/",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1258 # https://www.w3.org/TR/annotation-vocab/#named-individuals
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1259 "oa:motivatedBy": {"@id": "oa:describing"}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1260 })
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1261
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1262 # How was it run?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1263 # FIXME: Only primary*
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1264 prov_files = [str(PurePosixPath(p).relative_to(METADATA)) for p in self.tagfiles
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1265 if p.startswith(_posix_path(PROVENANCE))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1266 and "/primary." in p]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1267 annotations.append({
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1268 "uri": uuid.uuid4().urn,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1269 "about": self.ro_uuid.urn,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1270 "content": prov_files,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1271 # Modulation of https://www.w3.org/TR/prov-aq/
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1272 "oa:motivatedBy": {"@id": "http://www.w3.org/ns/prov#has_provenance"}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1273 })
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1274
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1275 # Where is the main workflow?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1276 annotations.append({
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1277 "uri": uuid.uuid4().urn,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1278 "about": str(PurePosixPath("..")/WORKFLOW/"packed.cwl"),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1279 "oa:motivatedBy": {"@id": "oa:highlighting"}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1280 })
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1281
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1282 annotations.append({
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1283 "uri": uuid.uuid4().urn,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1284 "about": self.ro_uuid.urn,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1285 "content": [str(PurePosixPath("..")/WORKFLOW/"packed.cwl"),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1286 str(PurePosixPath("..")/WORKFLOW/"primary-job.json")],
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1287 "oa:motivatedBy": {"@id": "oa:linking"}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1288 })
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1289 # Add user-added annotations at end
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1290 annotations.extend(self.annotations)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1291 return annotations
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1292
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1293 def _authored_by(self):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1294 # type: () -> Dict[Text, Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1295 authored_by = {}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1296 if self.orcid:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1297 authored_by["orcid"] = self.orcid
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1298 if self.full_name:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1299 authored_by["name"] = self.full_name
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1300 if not self.orcid:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1301 authored_by["uri"] = USER_UUID
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1302
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1303 if authored_by:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1304 return {"authoredBy": authored_by}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1305 return {}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1306
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1307
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1308 def _write_ro_manifest(self):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1309 # type: () -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1310
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1311 # Does not have to be this order, but it's nice to be consistent
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1312 manifest = OrderedDict() # type: Dict[Text, Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1313 manifest["@context"] = [
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1314 {"@base": "%s%s/" % (self.base_uri, _posix_path(METADATA))},
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1315 "https://w3id.org/bundle/context"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1316 ]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1317 manifest["id"] = "/"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1318 manifest["conformsTo"] = CWLPROV_VERSION
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1319 filename = "manifest.json"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1320 manifest["manifest"] = filename
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1321 manifest.update(self._self_made())
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1322 manifest.update(self._authored_by())
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1323 manifest["aggregates"] = self._ro_aggregates()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1324 manifest["annotations"] = self._ro_annotations()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1325
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1326 json_manifest = json_dumps(manifest, indent=4, ensure_ascii=False)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1327 rel_path = str(PurePosixPath(METADATA)/filename)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1328 json_manifest += "\n"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1329 with self.write_bag_file(rel_path) as manifest_file:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1330 manifest_file.write(json_manifest)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1331
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1332 def _write_bag_info(self):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1333 # type: () -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1334
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1335 with self.write_bag_file("bag-info.txt") as info_file:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1336 info_file.write(u"Bag-Software-Agent: %s\n" % self.cwltool_version)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1337 # FIXME: require sha-512 of payload to comply with profile?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1338 # FIXME: Update profile
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1339 info_file.write(u"BagIt-Profile-Identifier: https://w3id.org/ro/bagit/profile\n")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1340 info_file.write(u"Bagging-Date: %s\n" % datetime.date.today().isoformat())
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1341 info_file.write(u"External-Description: Research Object of CWL workflow run\n")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1342 if self.full_name:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1343 info_file.write(u"Contact-Name: %s\n" % self.full_name)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1344
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1345 # NOTE: We can't use the urn:uuid:{UUID} of the workflow run (a prov:Activity)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1346 # as identifier for the RO/bagit (a prov:Entity). However the arcp base URI is good.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1347 info_file.write(u"External-Identifier: %s\n" % self.base_uri)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1348
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1349 # Calculate size of data/ (assuming no external fetch.txt files)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1350 total_size = sum(self.bagged_size.values())
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1351 num_files = len(self.bagged_size)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1352 info_file.write(u"Payload-Oxum: %d.%d\n" % (total_size, num_files))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1353 _logger.debug(u"[provenance] Generated bagit metadata: %s",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1354 self.folder)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1355
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1356 def generate_snapshot(self, prov_dep):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1357 # type: (MutableMapping[Text, Any]) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1358 """Copy all of the CWL files to the snapshot/ directory."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1359 self.self_check()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1360 for key, value in prov_dep.items():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1361 if key == "location" and value.split("/")[-1]:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1362 filename = value.split("/")[-1]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1363 path = os.path.join(self.folder, SNAPSHOT, filename)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1364 filepath = ''
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1365 if "file://" in value:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1366 filepath = value[7:]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1367 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1368 filepath = value
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1369
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1370 # FIXME: What if destination path already exists?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1371 if os.path.exists(filepath):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1372 try:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1373 if os.path.isdir(filepath):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1374 shutil.copytree(filepath, path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1375 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1376 shutil.copy(filepath, path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1377 timestamp = datetime.datetime.fromtimestamp(os.path.getmtime(filepath))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1378 self.add_tagfile(path, timestamp)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1379 except PermissionError:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1380 pass # FIXME: avoids duplicate snapshotting; need better solution
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1381 elif key in ("secondaryFiles", "listing"):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1382 for files in value:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1383 if isinstance(files, MutableMapping):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1384 self.generate_snapshot(files)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1385 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1386 pass
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1387
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1388 def packed_workflow(self, packed): # type: (Text) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1389 """Pack CWL description to generate re-runnable CWL object in RO."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1390 self.self_check()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1391 rel_path = str(PurePosixPath(WORKFLOW)/"packed.cwl")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1392 # Write as binary
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1393 with self.write_bag_file(rel_path, encoding=None) as write_pack:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1394 # YAML is always UTF8, but json.dumps gives us str in py2
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1395 write_pack.write(packed.encode(ENCODING))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1396 _logger.debug(u"[provenance] Added packed workflow: %s", rel_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1397
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1398 def has_data_file(self, sha1hash): # type: (str) -> bool
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1399 """Confirm the presence of the given file in the RO."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1400 folder = os.path.join(self.folder, DATA, sha1hash[0:2])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1401 hash_path = os.path.join(folder, sha1hash)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1402 return os.path.isfile(hash_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1403
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1404 def add_data_file(self, from_fp, timestamp=None, content_type=None):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1405 # type: (IO[Any], Optional[datetime.datetime], Optional[str]) -> Text
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1406 """Copy inputs to data/ folder."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1407 self.self_check()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1408 tmp_dir, tmp_prefix = os.path.split(self.temp_prefix)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1409 with tempfile.NamedTemporaryFile(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1410 prefix=tmp_prefix, dir=tmp_dir, delete=False) as tmp:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1411 checksum = checksum_copy(from_fp, tmp)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1412
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1413 # Calculate hash-based file path
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1414 folder = os.path.join(self.folder, DATA, checksum[0:2])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1415 path = os.path.join(folder, checksum)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1416 # os.rename assumed safe, as our temp file should
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1417 # be in same file system as our temp folder
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1418 if not os.path.isdir(folder):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1419 os.makedirs(folder)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1420 os.rename(tmp.name, path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1421
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1422 # Relative posix path
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1423 # (to avoid \ on Windows)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1424 rel_path = _posix_path(os.path.relpath(path, self.folder))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1425
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1426 # Register in bagit checksum
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1427 if Hasher == hashlib.sha1:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1428 self._add_to_bagit(rel_path, sha1=checksum)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1429 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1430 _logger.warning(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1431 u"[provenance] Unknown hash method %s for bagit manifest",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1432 Hasher)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1433 # Inefficient, bagit support need to checksum again
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1434 self._add_to_bagit(rel_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1435 _logger.debug(u"[provenance] Added data file %s", path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1436 if timestamp is not None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1437 self._file_provenance[rel_path] = self._self_made(timestamp)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1438 _logger.debug(u"[provenance] Relative path for data file %s", rel_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1439
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1440 if content_type is not None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1441 self._content_types[rel_path] = content_type
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1442 return rel_path
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1443
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1444 def _self_made(self, timestamp=None):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1445 # type: (Optional[datetime.datetime]) -> Dict[Text, Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1446 if timestamp is None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1447 timestamp = datetime.datetime.now()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1448 return {
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1449 "createdOn": timestamp.isoformat(),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1450 "createdBy": {"uri": self.engine_uuid,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1451 "name": self.cwltool_version}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1452 }
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1453
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1454 def add_to_manifest(self, rel_path, checksums):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1455 # type: (Text, Dict[str,str]) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1456 """Add files to the research object manifest."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1457 self.self_check()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1458 if PurePosixPath(rel_path).is_absolute():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1459 raise ValueError("rel_path must be relative: %s" % rel_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1460
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1461 if os.path.commonprefix(["data/", rel_path]) == "data/":
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1462 # payload file, go to manifest
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1463 manifest = "manifest"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1464 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1465 # metadata file, go to tag manifest
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1466 manifest = "tagmanifest"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1467
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1468 # Add checksums to corresponding manifest files
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1469 for (method, hash_value) in checksums.items():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1470 # File not in manifest because we bailed out on
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1471 # existence in bagged_size above
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1472 manifestpath = os.path.join(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1473 self.folder, "%s-%s.txt" % (manifest, method.lower()))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1474 # encoding: match Tag-File-Character-Encoding: UTF-8
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1475 # newline: ensure LF also on Windows
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1476 with open(manifestpath, "a", encoding=ENCODING, newline='\n') \
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1477 as checksum_file:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1478 line = u"%s %s\n" % (hash_value, rel_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1479 _logger.debug(u"[provenance] Added to %s: %s", manifestpath, line)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1480 checksum_file.write(line)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1481
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1482
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1483 def _add_to_bagit(self, rel_path, **checksums):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1484 # type: (Text, Any) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1485 if PurePosixPath(rel_path).is_absolute():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1486 raise ValueError("rel_path must be relative: %s" % rel_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1487 local_path = os.path.join(self.folder, _local_path(rel_path))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1488 if not os.path.exists(local_path):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1489 raise IOError("File %s does not exist within RO: %s" % (rel_path, local_path))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1490
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1491 if rel_path in self.bagged_size:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1492 # Already added, assume checksum OK
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1493 return
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1494 self.bagged_size[rel_path] = os.path.getsize(local_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1495
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1496 if SHA1 not in checksums:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1497 # ensure we always have sha1
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1498 checksums = dict(checksums)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1499 with open(local_path, "rb") as file_path:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1500 # FIXME: Need sha-256 / sha-512 as well for Research Object BagIt profile?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1501 checksums[SHA1] = checksum_copy(file_path, hasher=hashlib.sha1)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1502
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1503 self.add_to_manifest(rel_path, checksums)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1504
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1505 def create_job(self,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1506 builder_job, # type: Dict[Text, Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1507 wf_job=None, # type: Optional[Callable[[Dict[Text, Text], Callable[[Any, Any], Any], RuntimeContext], Generator[Any, None, None]]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1508 is_output=False # type: bool
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1509 ): # type: (...) -> Dict[Text, Text]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1510 #TODO customise the file
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1511 """Generate the new job object with RO specific relative paths."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1512 copied = copy.deepcopy(builder_job)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1513 relativised_input_objecttemp = {} # type: Dict[Text, Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1514 self._relativise_files(copied)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1515 def jdefault(o): # type: (Any) -> Dict[Any, Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1516 return dict(o)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1517 if is_output:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1518 rel_path = PurePosixPath(WORKFLOW)/"primary-output.json"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1519 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1520 rel_path = PurePosixPath(WORKFLOW)/"primary-job.json"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1521 j = json_dumps(copied, indent=4, ensure_ascii=False, default=jdefault)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1522 with self.write_bag_file(str(rel_path)) as file_path:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1523 file_path.write(j + u"\n")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1524 _logger.debug(u"[provenance] Generated customised job file: %s",
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1525 rel_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1526 # Generate dictionary with keys as workflow level input IDs and values
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1527 # as
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1528 # 1) for files the relativised location containing hash
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1529 # 2) for other attributes, the actual value.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1530 relativised_input_objecttemp = {}
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1531 for key, value in copied.items():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1532 if isinstance(value, MutableMapping):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1533 if value.get("class") in ("File", "Directory"):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1534 relativised_input_objecttemp[key] = value
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1535 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1536 relativised_input_objecttemp[key] = value
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1537 self.relativised_input_object.update(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1538 {k: v for k, v in relativised_input_objecttemp.items() if v})
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1539 return self.relativised_input_object
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1540
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1541 def _relativise_files(self, structure):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1542 # type: (Dict[Any, Any]) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1543 """Save any file objects into the RO and update the local paths."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1544 # Base case - we found a File we need to update
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1545 _logger.debug(u"[provenance] Relativising: %s", structure)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1546
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1547 if isinstance(structure, MutableMapping):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1548 if structure.get("class") == "File":
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1549 relative_path = None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1550 if "checksum" in structure:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1551 alg, checksum = structure["checksum"].split("$")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1552 if alg != SHA1:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1553 raise TypeError(
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1554 "Only SHA1 CWL checksums are currently supported: "
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1555 "{}".format(structure))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1556 if self.has_data_file(checksum):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1557 prefix = checksum[0:2]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1558 relative_path = PurePosixPath("data")/prefix/checksum
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1559
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1560 if not relative_path is not None and "location" in structure:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1561 # Register in RO; but why was this not picked
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1562 # up by used_artefacts?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1563 _logger.info("[provenance] Adding to RO %s", structure["location"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1564 with self.fsaccess.open(structure["location"], "rb") as fp:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1565 relative_path = self.add_data_file(fp)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1566 checksum = PurePosixPath(relative_path).name
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1567 structure["checksum"] = "%s$%s" % (SHA1, checksum)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1568 if relative_path is not None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1569 # RO-relative path as new location
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1570 structure["location"] = str(PurePosixPath("..")/relative_path)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1571 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1572 _logger.warning("Could not determine RO path for file %s", structure)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1573 if "path" in structure:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1574 del structure["path"]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1575
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1576 if structure.get("class") == "Directory":
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1577 # TODO: Generate anonymoys Directory with a "listing"
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1578 # pointing to the hashed files
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1579 del structure["location"]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1580
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1581 for val in structure.values():
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1582 try:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1583 self._relativise_files(val)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1584 except OSError:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1585 pass
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1586 return
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1587
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1588 if isinstance(structure, (str, Text)):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1589 # Just a string value, no need to iterate further
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1590 return
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1591 try:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1592 for obj in iter(structure):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1593 # Recurse and rewrite any nested File objects
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1594 self._relativise_files(obj)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1595 except TypeError:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1596 pass
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1597
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1598 def close(self, save_to=None):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1599 # type: (Optional[str]) -> None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1600 """Close the Research Object, optionally saving to specified folder.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1601
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1602 Closing will remove any temporary files used by this research object.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1603 After calling this method, this ResearchObject instance can no longer
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1604 be used, except for no-op calls to .close().
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1605
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1606 The 'saveTo' folder should not exist - if it does, it will be deleted.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1607
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1608 It is safe to call this function multiple times without the
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1609 'saveTo' argument, e.g. within a try..finally block to
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1610 ensure the temporary files of this Research Object are removed.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1611 """
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1612 if save_to is None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1613 if not self.closed:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1614 _logger.debug(u"[provenance] Deleting temporary %s", self.folder)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1615 shutil.rmtree(self.folder, ignore_errors=True)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1616 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1617 save_to = os.path.abspath(save_to)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1618 _logger.info(u"[provenance] Finalizing Research Object")
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1619 self._finalize() # write manifest etc.
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1620 # TODO: Write as archive (.zip or .tar) based on extension?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1621
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1622 if os.path.isdir(save_to):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1623 _logger.info(u"[provenance] Deleting existing %s", save_to)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1624 shutil.rmtree(save_to)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1625 shutil.move(self.folder, save_to)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1626 _logger.info(u"[provenance] Research Object saved to %s", save_to)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1627 self.folder = save_to
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1628 self.closed = True
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1629
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1630 def checksum_copy(src_file, # type: IO[Any]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1631 dst_file=None, # type: Optional[IO[Any]]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1632 hasher=Hasher, # type: Callable[[], hashlib._Hash]
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1633 buffersize=1024*1024 # type: int
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1634 ): # type: (...) -> str
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1635 """Compute checksums while copying a file."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1636 # TODO: Use hashlib.new(Hasher_str) instead?
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1637 checksum = hasher()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1638 contents = src_file.read(buffersize)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1639 if dst_file and hasattr(dst_file, "name") and hasattr(src_file, "name"):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1640 temp_location = os.path.join(os.path.dirname(dst_file.name),
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1641 str(uuid.uuid4()))
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1642 try:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1643 os.rename(dst_file.name, temp_location)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1644 os.link(src_file.name, dst_file.name)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1645 dst_file = None
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1646 os.unlink(temp_location)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1647 except OSError:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1648 pass
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1649 if os.path.exists(temp_location):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1650 os.rename(temp_location, dst_file.name) # type: ignore
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1651 while contents != b"":
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1652 if dst_file is not None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1653 dst_file.write(contents)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1654 checksum.update(contents)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1655 contents = src_file.read(buffersize)
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1656 if dst_file is not None:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1657 dst_file.flush()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1658 return checksum.hexdigest().lower()
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1659
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1660 def copy_job_order(job, job_order_object):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1661 # type: (Any, Any) -> Any
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1662 """Create copy of job object for provenance."""
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1663 if not hasattr(job, "tool"):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1664 # direct command line tool execution
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1665 return job_order_object
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1666 customised_job = {} # new job object for RO
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1667 for each, i in enumerate(job.tool["inputs"]):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1668 with SourceLine(job.tool["inputs"], each, WorkflowException,
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1669 _logger.isEnabledFor(logging.DEBUG)):
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1670 iid = shortname(i["id"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1671 if iid in job_order_object:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1672 customised_job[iid] = copy.deepcopy(job_order_object[iid])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1673 # add the input element in dictionary for provenance
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1674 elif "default" in i:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1675 customised_job[iid] = copy.deepcopy(i["default"])
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1676 # add the default elements in the dictionary for provenance
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1677 else:
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1678 pass
26e78fe6e8c4 "planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
shellac
parents:
diff changeset
1679 return customised_job