Mercurial > repos > shellac > sam_consensus_v3
annotate env/lib/python3.9/site-packages/cwltool/provenance_profile.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
rev | line source |
---|---|
0
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
1 import copy |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
2 import datetime |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
3 import logging |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
4 import urllib |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
5 import uuid |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
6 from io import BytesIO |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
7 from pathlib import PurePath, PurePosixPath |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
8 from socket import getfqdn |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
9 from typing import List, MutableMapping, MutableSequence, Optional, Tuple, Union, cast |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
10 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
11 from prov.identifier import Identifier |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
12 from prov.model import PROV, PROV_LABEL, PROV_TYPE, PROV_VALUE, ProvDocument, ProvEntity |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
13 from schema_salad.sourceline import SourceLine |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
14 from typing_extensions import TYPE_CHECKING |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
15 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
16 from .errors import WorkflowException |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
17 from .job import CommandLineJob, JobBase |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
18 from .loghandler import _logger |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
19 from .process import Process, shortname |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
20 from .provenance_constants import ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
21 ACCOUNT_UUID, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
22 CWLPROV, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
23 ENCODING, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
24 FOAF, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
25 METADATA, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
26 ORE, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
27 PROVENANCE, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
28 RO, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
29 SCHEMA, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
30 SHA1, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
31 SHA256, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
32 TEXT_PLAIN, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
33 UUID, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
34 WF4EVER, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
35 WFDESC, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
36 WFPROV, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
37 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
38 from .stdfsaccess import StdFsAccess |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
39 from .utils import ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
40 CWLObjectType, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
41 CWLOutputType, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
42 JobsType, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
43 get_listing, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
44 posix_path, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
45 versionstring, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
46 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
47 from .workflow_job import WorkflowJob |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
48 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
49 if TYPE_CHECKING: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
50 from .provenance import ResearchObject |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
51 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
52 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
53 def copy_job_order( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
54 job: Union[Process, JobsType], job_order_object: CWLObjectType |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
55 ) -> CWLObjectType: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
56 """Create copy of job object for provenance.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
57 if not isinstance(job, WorkflowJob): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
58 # direct command line tool execution |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
59 return job_order_object |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
60 customised_job = {} # type: CWLObjectType |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
61 # new job object for RO |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
62 for each, i in enumerate(job.tool["inputs"]): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
63 with SourceLine( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
64 job.tool["inputs"], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
65 each, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
66 WorkflowException, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
67 _logger.isEnabledFor(logging.DEBUG), |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
68 ): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
69 iid = shortname(i["id"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
70 if iid in job_order_object: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
71 customised_job[iid] = copy.deepcopy(job_order_object[iid]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
72 # add the input element in dictionary for provenance |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
73 elif "default" in i: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
74 customised_job[iid] = copy.deepcopy(i["default"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
75 # add the default elements in the dictionary for provenance |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
76 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
77 pass |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
78 return customised_job |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
79 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
80 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
81 class ProvenanceProfile: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
82 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
83 Provenance profile. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
84 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
85 Populated as the workflow runs. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
86 """ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
87 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
88 def __init__( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
89 self, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
90 research_object: "ResearchObject", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
91 full_name: str, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
92 host_provenance: bool, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
93 user_provenance: bool, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
94 orcid: str, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
95 fsaccess: StdFsAccess, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
96 run_uuid: Optional[uuid.UUID] = None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
97 ) -> None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
98 """Initialize the provenance profile.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
99 self.fsaccess = fsaccess |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
100 self.orcid = orcid |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
101 self.research_object = research_object |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
102 self.folder = self.research_object.folder |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
103 self.document = ProvDocument() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
104 self.host_provenance = host_provenance |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
105 self.user_provenance = user_provenance |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
106 self.engine_uuid = research_object.engine_uuid # type: str |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
107 self.add_to_manifest = self.research_object.add_to_manifest |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
108 if self.orcid: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
109 _logger.debug("[provenance] Creator ORCID: %s", self.orcid) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
110 self.full_name = full_name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
111 if self.full_name: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
112 _logger.debug("[provenance] Creator Full name: %s", self.full_name) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
113 self.workflow_run_uuid = run_uuid or uuid.uuid4() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
114 self.workflow_run_uri = self.workflow_run_uuid.urn # type: str |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
115 self.generate_prov_doc() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
116 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
117 def __str__(self) -> str: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
118 """Represent this Provenvance profile as a string.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
119 return "ProvenanceProfile <{}> in <{}>".format( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
120 self.workflow_run_uri, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
121 self.research_object, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
122 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
123 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
124 def generate_prov_doc(self) -> Tuple[str, ProvDocument]: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
125 """Add basic namespaces.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
126 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
127 def host_provenance(document: ProvDocument) -> None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
128 """Record host provenance.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
129 document.add_namespace(CWLPROV) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
130 document.add_namespace(UUID) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
131 document.add_namespace(FOAF) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
132 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
133 hostname = getfqdn() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
134 # won't have a foaf:accountServiceHomepage for unix hosts, but |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
135 # we can at least provide hostname |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
136 document.agent( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
137 ACCOUNT_UUID, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
138 { |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
139 PROV_TYPE: FOAF["OnlineAccount"], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
140 "prov:location": hostname, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
141 CWLPROV["hostname"]: hostname, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
142 }, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
143 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
144 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
145 self.cwltool_version = "cwltool %s" % versionstring().split()[-1] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
146 self.document.add_namespace("wfprov", "http://purl.org/wf4ever/wfprov#") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
147 # document.add_namespace('prov', 'http://www.w3.org/ns/prov#') |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
148 self.document.add_namespace("wfdesc", "http://purl.org/wf4ever/wfdesc#") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
149 # TODO: Make this ontology. For now only has cwlprov:image |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
150 self.document.add_namespace("cwlprov", "https://w3id.org/cwl/prov#") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
151 self.document.add_namespace("foaf", "http://xmlns.com/foaf/0.1/") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
152 self.document.add_namespace("schema", "http://schema.org/") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
153 self.document.add_namespace("orcid", "https://orcid.org/") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
154 self.document.add_namespace("id", "urn:uuid:") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
155 # NOTE: Internet draft expired 2004-03-04 (!) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
156 # https://tools.ietf.org/html/draft-thiemann-hash-urn-01 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
157 # TODO: Change to nih:sha-256; hashes |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
158 # https://tools.ietf.org/html/rfc6920#section-7 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
159 self.document.add_namespace("data", "urn:hash::sha1:") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
160 # Also needed for docker images |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
161 self.document.add_namespace(SHA256, "nih:sha-256;") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
162 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
163 # info only, won't really be used by prov as sub-resources use / |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
164 self.document.add_namespace("researchobject", self.research_object.base_uri) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
165 # annotations |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
166 self.metadata_ns = self.document.add_namespace( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
167 "metadata", self.research_object.base_uri + METADATA + "/" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
168 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
169 # Pre-register provenance directory so we can refer to its files |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
170 self.provenance_ns = self.document.add_namespace( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
171 "provenance", self.research_object.base_uri + posix_path(PROVENANCE) + "/" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
172 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
173 ro_identifier_workflow = self.research_object.base_uri + "workflow/packed.cwl#" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
174 self.wf_ns = self.document.add_namespace("wf", ro_identifier_workflow) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
175 ro_identifier_input = ( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
176 self.research_object.base_uri + "workflow/primary-job.json#" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
177 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
178 self.document.add_namespace("input", ro_identifier_input) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
179 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
180 # More info about the account (e.g. username, fullname) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
181 # may or may not have been previously logged by user_provenance() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
182 # .. but we always know cwltool was launched (directly or indirectly) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
183 # by a user account, as cwltool is a command line tool |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
184 account = self.document.agent(ACCOUNT_UUID) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
185 if self.orcid or self.full_name: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
186 person = {PROV_TYPE: PROV["Person"], "prov:type": SCHEMA["Person"]} |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
187 if self.full_name: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
188 person["prov:label"] = self.full_name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
189 person["foaf:name"] = self.full_name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
190 person["schema:name"] = self.full_name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
191 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
192 # TODO: Look up name from ORCID API? |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
193 pass |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
194 agent = self.document.agent(self.orcid or uuid.uuid4().urn, person) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
195 self.document.actedOnBehalfOf(account, agent) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
196 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
197 if self.host_provenance: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
198 host_provenance(self.document) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
199 if self.user_provenance: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
200 self.research_object.user_provenance(self.document) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
201 # The execution of cwltool |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
202 wfengine = self.document.agent( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
203 self.engine_uuid, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
204 { |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
205 PROV_TYPE: PROV["SoftwareAgent"], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
206 "prov:type": WFPROV["WorkflowEngine"], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
207 "prov:label": self.cwltool_version, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
208 }, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
209 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
210 # FIXME: This datetime will be a bit too delayed, we should |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
211 # capture when cwltool.py earliest started? |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
212 self.document.wasStartedBy(wfengine, None, account, datetime.datetime.now()) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
213 # define workflow run level activity |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
214 self.document.activity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
215 self.workflow_run_uri, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
216 datetime.datetime.now(), |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
217 None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
218 { |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
219 PROV_TYPE: WFPROV["WorkflowRun"], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
220 "prov:label": "Run of workflow/packed.cwl#main", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
221 }, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
222 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
223 # association between SoftwareAgent and WorkflowRun |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
224 main_workflow = "wf:main" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
225 self.document.wasAssociatedWith( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
226 self.workflow_run_uri, self.engine_uuid, main_workflow |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
227 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
228 self.document.wasStartedBy( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
229 self.workflow_run_uri, None, self.engine_uuid, datetime.datetime.now() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
230 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
231 return (self.workflow_run_uri, self.document) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
232 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
233 def evaluate( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
234 self, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
235 process: Process, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
236 job: JobsType, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
237 job_order_object: CWLObjectType, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
238 research_obj: "ResearchObject", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
239 ) -> None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
240 """Evaluate the nature of job.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
241 if not hasattr(process, "steps"): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
242 # record provenance of independent commandline tool executions |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
243 self.prospective_prov(job) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
244 customised_job = copy_job_order(job, job_order_object) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
245 self.used_artefacts(customised_job, self.workflow_run_uri) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
246 research_obj.create_job(customised_job) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
247 elif hasattr(job, "workflow"): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
248 # record provenance of workflow executions |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
249 self.prospective_prov(job) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
250 customised_job = copy_job_order(job, job_order_object) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
251 self.used_artefacts(customised_job, self.workflow_run_uri) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
252 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
253 def record_process_start( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
254 self, process: Process, job: JobsType, process_run_id: Optional[str] = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
255 ) -> Optional[str]: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
256 if not hasattr(process, "steps"): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
257 process_run_id = self.workflow_run_uri |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
258 elif not hasattr(job, "workflow"): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
259 # commandline tool execution as part of workflow |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
260 name = "" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
261 if isinstance(job, (CommandLineJob, JobBase, WorkflowJob)): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
262 name = job.name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
263 process_name = urllib.parse.quote(name, safe=":/,#") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
264 process_run_id = self.start_process(process_name, datetime.datetime.now()) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
265 return process_run_id |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
266 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
267 def start_process( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
268 self, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
269 process_name: str, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
270 when: datetime.datetime, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
271 process_run_id: Optional[str] = None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
272 ) -> str: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
273 """Record the start of each Process.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
274 if process_run_id is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
275 process_run_id = uuid.uuid4().urn |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
276 prov_label = "Run of workflow/packed.cwl#main/" + process_name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
277 self.document.activity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
278 process_run_id, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
279 None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
280 None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
281 {PROV_TYPE: WFPROV["ProcessRun"], PROV_LABEL: prov_label}, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
282 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
283 self.document.wasAssociatedWith( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
284 process_run_id, self.engine_uuid, str("wf:main/" + process_name) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
285 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
286 self.document.wasStartedBy( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
287 process_run_id, None, self.workflow_run_uri, when, None, None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
288 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
289 return process_run_id |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
290 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
291 def record_process_end( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
292 self, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
293 process_name: str, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
294 process_run_id: str, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
295 outputs: Union[CWLObjectType, MutableSequence[CWLObjectType], None], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
296 when: datetime.datetime, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
297 ) -> None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
298 self.generate_output_prov(outputs, process_run_id, process_name) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
299 self.document.wasEndedBy(process_run_id, None, self.workflow_run_uri, when) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
300 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
301 def declare_file(self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, str]: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
302 if value["class"] != "File": |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
303 raise ValueError("Must have class:File: %s" % value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
304 # Need to determine file hash aka RO filename |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
305 entity = None # type: Optional[ProvEntity] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
306 checksum = None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
307 if "checksum" in value: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
308 csum = cast(str, value["checksum"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
309 (method, checksum) = csum.split("$", 1) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
310 if method == SHA1 and self.research_object.has_data_file(checksum): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
311 entity = self.document.entity("data:" + checksum) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
312 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
313 if not entity and "location" in value: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
314 location = str(value["location"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
315 # If we made it here, we'll have to add it to the RO |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
316 with self.fsaccess.open(location, "rb") as fhandle: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
317 relative_path = self.research_object.add_data_file(fhandle) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
318 # FIXME: This naively relies on add_data_file setting hash as filename |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
319 checksum = PurePath(relative_path).name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
320 entity = self.document.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
321 "data:" + checksum, {PROV_TYPE: WFPROV["Artifact"]} |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
322 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
323 if "checksum" not in value: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
324 value["checksum"] = f"{SHA1}${checksum}" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
325 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
326 if not entity and "contents" in value: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
327 # Anonymous file, add content as string |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
328 entity, checksum = self.declare_string(cast(str, value["contents"])) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
329 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
330 # By here one of them should have worked! |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
331 if not entity or not checksum: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
332 raise ValueError( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
333 "class:File but missing checksum/location/content: %r" % value |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
334 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
335 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
336 # Track filename and extension, this is generally useful only for |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
337 # secondaryFiles. Note that multiple uses of a file might thus record |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
338 # different names for the same entity, so we'll |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
339 # make/track a specialized entity by UUID |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
340 file_id = value.setdefault("@id", uuid.uuid4().urn) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
341 # A specialized entity that has just these names |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
342 file_entity = self.document.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
343 file_id, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
344 [(PROV_TYPE, WFPROV["Artifact"]), (PROV_TYPE, WF4EVER["File"])], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
345 ) # type: ProvEntity |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
346 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
347 if "basename" in value: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
348 file_entity.add_attributes({CWLPROV["basename"]: value["basename"]}) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
349 if "nameroot" in value: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
350 file_entity.add_attributes({CWLPROV["nameroot"]: value["nameroot"]}) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
351 if "nameext" in value: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
352 file_entity.add_attributes({CWLPROV["nameext"]: value["nameext"]}) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
353 self.document.specializationOf(file_entity, entity) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
354 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
355 # Check for secondaries |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
356 for sec in cast( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
357 MutableSequence[CWLObjectType], value.get("secondaryFiles", []) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
358 ): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
359 # TODO: Record these in a specializationOf entity with UUID? |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
360 if sec["class"] == "File": |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
361 (sec_entity, _, _) = self.declare_file(sec) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
362 elif sec["class"] == "Directory": |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
363 sec_entity = self.declare_directory(sec) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
364 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
365 raise ValueError(f"Got unexpected secondaryFiles value: {sec}") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
366 # We don't know how/when/where the secondary file was generated, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
367 # but CWL convention is a kind of summary/index derived |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
368 # from the original file. As its generally in a different format |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
369 # then prov:Quotation is not appropriate. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
370 self.document.derivation( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
371 sec_entity, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
372 file_entity, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
373 other_attributes={PROV["type"]: CWLPROV["SecondaryFile"]}, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
374 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
375 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
376 return file_entity, entity, checksum |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
377 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
378 def declare_directory(self, value: CWLObjectType) -> ProvEntity: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
379 """Register any nested files/directories.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
380 # FIXME: Calculate a hash-like identifier for directory |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
381 # so we get same value if it's the same filenames/hashes |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
382 # in a different location. |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
383 # For now, mint a new UUID to identify this directory, but |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
384 # attempt to keep it inside the value dictionary |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
385 dir_id = cast(str, value.setdefault("@id", uuid.uuid4().urn)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
386 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
387 # New annotation file to keep the ORE Folder listing |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
388 ore_doc_fn = dir_id.replace("urn:uuid:", "directory-") + ".ttl" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
389 dir_bundle = self.document.bundle(self.metadata_ns[ore_doc_fn]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
390 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
391 coll = self.document.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
392 dir_id, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
393 [ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
394 (PROV_TYPE, WFPROV["Artifact"]), |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
395 (PROV_TYPE, PROV["Collection"]), |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
396 (PROV_TYPE, PROV["Dictionary"]), |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
397 (PROV_TYPE, RO["Folder"]), |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
398 ], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
399 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
400 # ORE description of ro:Folder, saved separately |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
401 coll_b = dir_bundle.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
402 dir_id, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
403 [(PROV_TYPE, RO["Folder"]), (PROV_TYPE, ORE["Aggregation"])], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
404 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
405 self.document.mentionOf(dir_id + "#ore", dir_id, dir_bundle.identifier) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
406 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
407 # dir_manifest = dir_bundle.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
408 # dir_bundle.identifier, {PROV["type"]: ORE["ResourceMap"], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
409 # ORE["describes"]: coll_b.identifier}) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
410 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
411 coll_attribs = [(ORE["isDescribedBy"], dir_bundle.identifier)] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
412 coll_b_attribs = [] # type: List[Tuple[Identifier, ProvEntity]] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
413 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
414 # FIXME: .listing might not be populated yet - hopefully |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
415 # a later call to this method will sort that |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
416 is_empty = True |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
417 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
418 if "listing" not in value: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
419 get_listing(self.fsaccess, value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
420 for entry in cast(MutableSequence[CWLObjectType], value.get("listing", [])): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
421 is_empty = False |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
422 # Declare child-artifacts |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
423 entity = self.declare_artefact(entry) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
424 self.document.membership(coll, entity) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
425 # Membership relation aka our ORE Proxy |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
426 m_id = uuid.uuid4().urn |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
427 m_entity = self.document.entity(m_id) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
428 m_b = dir_bundle.entity(m_id) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
429 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
430 # PROV-O style Dictionary |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
431 # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
432 # ..as prov.py do not currently allow PROV-N extensions |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
433 # like hadDictionaryMember(..) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
434 m_entity.add_asserted_type(PROV["KeyEntityPair"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
435 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
436 m_entity.add_attributes( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
437 { |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
438 PROV["pairKey"]: entry["basename"], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
439 PROV["pairEntity"]: entity, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
440 } |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
441 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
442 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
443 # As well as a being a |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
444 # http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
445 m_b.add_asserted_type(RO["FolderEntry"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
446 m_b.add_asserted_type(ORE["Proxy"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
447 m_b.add_attributes( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
448 { |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
449 RO["entryName"]: entry["basename"], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
450 ORE["proxyIn"]: coll, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
451 ORE["proxyFor"]: entity, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
452 } |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
453 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
454 coll_attribs.append((PROV["hadDictionaryMember"], m_entity)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
455 coll_b_attribs.append((ORE["aggregates"], m_b)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
456 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
457 coll.add_attributes(coll_attribs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
458 coll_b.add_attributes(coll_b_attribs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
459 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
460 # Also Save ORE Folder as annotation metadata |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
461 ore_doc = ProvDocument() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
462 ore_doc.add_namespace(ORE) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
463 ore_doc.add_namespace(RO) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
464 ore_doc.add_namespace(UUID) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
465 ore_doc.add_bundle(dir_bundle) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
466 ore_doc = ore_doc.flattened() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
467 ore_doc_path = str(PurePosixPath(METADATA, ore_doc_fn)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
468 with self.research_object.write_bag_file(ore_doc_path) as provenance_file: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
469 ore_doc.serialize(provenance_file, format="rdf", rdf_format="turtle") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
470 self.research_object.add_annotation( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
471 dir_id, [ore_doc_fn], ORE["isDescribedBy"].uri |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
472 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
473 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
474 if is_empty: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
475 # Empty directory |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
476 coll.add_asserted_type(PROV["EmptyCollection"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
477 coll.add_asserted_type(PROV["EmptyDictionary"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
478 self.research_object.add_uri(coll.identifier.uri) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
479 return coll |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
480 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
481 def declare_string(self, value: str) -> Tuple[ProvEntity, str]: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
482 """Save as string in UTF-8.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
483 byte_s = BytesIO(str(value).encode(ENCODING)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
484 data_file = self.research_object.add_data_file(byte_s, content_type=TEXT_PLAIN) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
485 checksum = PurePosixPath(data_file).name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
486 # FIXME: Don't naively assume add_data_file uses hash in filename! |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
487 data_id = "data:%s" % PurePosixPath(data_file).stem |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
488 entity = self.document.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
489 data_id, {PROV_TYPE: WFPROV["Artifact"], PROV_VALUE: str(value)} |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
490 ) # type: ProvEntity |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
491 return entity, checksum |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
492 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
493 def declare_artefact(self, value: Optional[CWLOutputType]) -> ProvEntity: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
494 """Create data artefact entities for all file objects.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
495 if value is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
496 # FIXME: If this can happen in CWL, we'll |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
497 # need a better way to represent this in PROV |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
498 return self.document.entity(CWLPROV["None"], {PROV_LABEL: "None"}) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
499 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
500 if isinstance(value, (bool, int, float)): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
501 # Typically used in job documents for flags |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
502 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
503 # FIXME: Make consistent hash URIs for these |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
504 # that somehow include the type |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
505 # (so "1" != 1 != "1.0" != true) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
506 entity = self.document.entity(uuid.uuid4().urn, {PROV_VALUE: value}) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
507 self.research_object.add_uri(entity.identifier.uri) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
508 return entity |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
509 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
510 if isinstance(value, (str, str)): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
511 (entity, _) = self.declare_string(value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
512 return entity |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
513 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
514 if isinstance(value, bytes): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
515 # If we got here then we must be in Python 3 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
516 byte_s = BytesIO(value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
517 data_file = self.research_object.add_data_file(byte_s) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
518 # FIXME: Don't naively assume add_data_file uses hash in filename! |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
519 data_id = "data:%s" % PurePosixPath(data_file).stem |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
520 return self.document.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
521 data_id, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
522 {PROV_TYPE: WFPROV["Artifact"], PROV_VALUE: str(value)}, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
523 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
524 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
525 if isinstance(value, MutableMapping): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
526 if "@id" in value: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
527 # Already processed this value, but it might not be in this PROV |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
528 entities = self.document.get_record(value["@id"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
529 if entities: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
530 return entities[0] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
531 # else, unknown in PROV, re-add below as if it's fresh |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
532 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
533 # Base case - we found a File we need to update |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
534 if value.get("class") == "File": |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
535 (entity, _, _) = self.declare_file(value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
536 value["@id"] = entity.identifier.uri |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
537 return entity |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
538 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
539 if value.get("class") == "Directory": |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
540 entity = self.declare_directory(value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
541 value["@id"] = entity.identifier.uri |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
542 return entity |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
543 coll_id = value.setdefault("@id", uuid.uuid4().urn) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
544 # some other kind of dictionary? |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
545 # TODO: also Save as JSON |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
546 coll = self.document.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
547 coll_id, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
548 [ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
549 (PROV_TYPE, WFPROV["Artifact"]), |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
550 (PROV_TYPE, PROV["Collection"]), |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
551 (PROV_TYPE, PROV["Dictionary"]), |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
552 ], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
553 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
554 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
555 if value.get("class"): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
556 _logger.warning("Unknown data class %s.", value["class"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
557 # FIXME: The class might be "http://example.com/somethingelse" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
558 coll.add_asserted_type(CWLPROV[value["class"]]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
559 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
560 # Let's iterate and recurse |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
561 coll_attribs = [] # type: List[Tuple[Identifier, ProvEntity]] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
562 for (key, val) in value.items(): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
563 v_ent = self.declare_artefact(val) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
564 self.document.membership(coll, v_ent) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
565 m_entity = self.document.entity(uuid.uuid4().urn) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
566 # Note: only support PROV-O style dictionary |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
567 # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
568 # as prov.py do not easily allow PROV-N extensions |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
569 m_entity.add_asserted_type(PROV["KeyEntityPair"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
570 m_entity.add_attributes( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
571 {PROV["pairKey"]: str(key), PROV["pairEntity"]: v_ent} |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
572 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
573 coll_attribs.append((PROV["hadDictionaryMember"], m_entity)) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
574 coll.add_attributes(coll_attribs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
575 self.research_object.add_uri(coll.identifier.uri) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
576 return coll |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
577 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
578 # some other kind of Collection? |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
579 # TODO: also save as JSON |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
580 try: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
581 members = [] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
582 for each_input_obj in iter(value): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
583 # Recurse and register any nested objects |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
584 e = self.declare_artefact(each_input_obj) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
585 members.append(e) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
586 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
587 # If we reached this, then we were allowed to iterate |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
588 coll = self.document.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
589 uuid.uuid4().urn, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
590 [ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
591 (PROV_TYPE, WFPROV["Artifact"]), |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
592 (PROV_TYPE, PROV["Collection"]), |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
593 ], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
594 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
595 if not members: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
596 coll.add_asserted_type(PROV["EmptyCollection"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
597 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
598 for member in members: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
599 # FIXME: This won't preserve order, for that |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
600 # we would need to use PROV.Dictionary |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
601 # with numeric keys |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
602 self.document.membership(coll, member) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
603 self.research_object.add_uri(coll.identifier.uri) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
604 # FIXME: list value does not support adding "@id" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
605 return coll |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
606 except TypeError: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
607 _logger.warning("Unrecognized type %s of %r", type(value), value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
608 # Let's just fall back to Python repr() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
609 entity = self.document.entity(uuid.uuid4().urn, {PROV_LABEL: repr(value)}) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
610 self.research_object.add_uri(entity.identifier.uri) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
611 return entity |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
612 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
613 def used_artefacts( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
614 self, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
615 job_order: Union[CWLObjectType, List[CWLObjectType]], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
616 process_run_id: str, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
617 name: Optional[str] = None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
618 ) -> None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
619 """Add used() for each data artefact.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
620 if isinstance(job_order, list): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
621 for entry in job_order: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
622 self.used_artefacts(entry, process_run_id, name) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
623 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
624 # FIXME: Use workflow name in packed.cwl, "main" is wrong for nested workflows |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
625 base = "main" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
626 if name is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
627 base += "/" + name |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
628 for key, value in job_order.items(): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
629 prov_role = self.wf_ns[f"{base}/{key}"] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
630 try: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
631 entity = self.declare_artefact(value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
632 self.document.used( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
633 process_run_id, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
634 entity, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
635 datetime.datetime.now(), |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
636 None, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
637 {"prov:role": prov_role}, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
638 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
639 except OSError: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
640 pass |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
641 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
642 def generate_output_prov( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
643 self, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
644 final_output: Union[CWLObjectType, MutableSequence[CWLObjectType], None], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
645 process_run_id: Optional[str], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
646 name: Optional[str], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
647 ) -> None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
648 """Call wasGeneratedBy() for each output,copy the files into the RO.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
649 if isinstance(final_output, MutableSequence): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
650 for entry in final_output: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
651 self.generate_output_prov(entry, process_run_id, name) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
652 elif final_output is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
653 # Timestamp should be created at the earliest |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
654 timestamp = datetime.datetime.now() |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
655 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
656 # For each output, find/register the corresponding |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
657 # entity (UUID) and document it as generated in |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
658 # a role corresponding to the output |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
659 for output, value in final_output.items(): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
660 entity = self.declare_artefact(value) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
661 if name is not None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
662 name = urllib.parse.quote(str(name), safe=":/,#") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
663 # FIXME: Probably not "main" in nested workflows |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
664 role = self.wf_ns[f"main/{name}/{output}"] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
665 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
666 role = self.wf_ns["main/%s" % output] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
667 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
668 if not process_run_id: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
669 process_run_id = self.workflow_run_uri |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
670 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
671 self.document.wasGeneratedBy( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
672 entity, process_run_id, timestamp, None, {"prov:role": role} |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
673 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
674 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
675 def prospective_prov(self, job: JobsType) -> None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
676 """Create prospective prov recording as wfdesc prov:Plan.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
677 if not isinstance(job, WorkflowJob): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
678 # direct command line tool execution |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
679 self.document.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
680 "wf:main", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
681 { |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
682 PROV_TYPE: WFDESC["Process"], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
683 "prov:type": PROV["Plan"], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
684 "prov:label": "Prospective provenance", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
685 }, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
686 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
687 return |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
688 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
689 self.document.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
690 "wf:main", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
691 { |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
692 PROV_TYPE: WFDESC["Workflow"], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
693 "prov:type": PROV["Plan"], |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
694 "prov:label": "Prospective provenance", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
695 }, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
696 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
697 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
698 for step in job.steps: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
699 stepnametemp = "wf:main/" + str(step.name)[5:] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
700 stepname = urllib.parse.quote(stepnametemp, safe=":/,#") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
701 provstep = self.document.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
702 stepname, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
703 {PROV_TYPE: WFDESC["Process"], "prov:type": PROV["Plan"]}, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
704 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
705 self.document.entity( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
706 "wf:main", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
707 { |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
708 "wfdesc:hasSubProcess": provstep, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
709 "prov:label": "Prospective provenance", |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
710 }, |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
711 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
712 # TODO: Declare roles/parameters as well |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
713 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
714 def activity_has_provenance(self, activity, prov_ids): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
715 # type: (str, List[Identifier]) -> None |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
716 """Add http://www.w3.org/TR/prov-aq/ relations to nested PROV files.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
717 # NOTE: The below will only work if the corresponding metadata/provenance arcp URI |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
718 # is a pre-registered namespace in the PROV Document |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
719 attribs = [(PROV["has_provenance"], prov_id) for prov_id in prov_ids] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
720 self.document.activity(activity, other_attributes=attribs) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
721 # Tip: we can't use https://www.w3.org/TR/prov-links/#term-mention |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
722 # as prov:mentionOf() is only for entities, not activities |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
723 uris = [i.uri for i in prov_ids] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
724 self.research_object.add_annotation(activity, uris, PROV["has_provenance"].uri) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
725 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
726 def finalize_prov_profile(self, name): |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
727 # type: (Optional[str]) -> List[Identifier] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
728 """Transfer the provenance related files to the RO.""" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
729 # NOTE: Relative posix path |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
730 if name is None: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
731 # main workflow, fixed filenames |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
732 filename = "primary.cwlprov" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
733 else: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
734 # ASCII-friendly filename, avoiding % as we don't want %2520 in manifest.json |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
735 wf_name = urllib.parse.quote(str(name), safe="").replace("%", "_") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
736 # Note that the above could cause overlaps for similarly named |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
737 # workflows, but that's OK as we'll also include run uuid |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
738 # which also covers thhe case of this step being run in |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
739 # multiple places or iterations |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
740 filename = f"{wf_name}.{self.workflow_run_uuid}.cwlprov" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
741 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
742 basename = str(PurePosixPath(PROVENANCE) / filename) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
743 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
744 # TODO: Also support other profiles than CWLProv, e.g. ProvOne |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
745 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
746 # list of prov identifiers of provenance files |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
747 prov_ids = [] |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
748 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
749 # https://www.w3.org/TR/prov-xml/ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
750 with self.research_object.write_bag_file(basename + ".xml") as provenance_file: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
751 self.document.serialize(provenance_file, format="xml", indent=4) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
752 prov_ids.append(self.provenance_ns[filename + ".xml"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
753 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
754 # https://www.w3.org/TR/prov-n/ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
755 with self.research_object.write_bag_file( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
756 basename + ".provn" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
757 ) as provenance_file: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
758 self.document.serialize(provenance_file, format="provn", indent=2) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
759 prov_ids.append(self.provenance_ns[filename + ".provn"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
760 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
761 # https://www.w3.org/Submission/prov-json/ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
762 with self.research_object.write_bag_file(basename + ".json") as provenance_file: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
763 self.document.serialize(provenance_file, format="json", indent=2) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
764 prov_ids.append(self.provenance_ns[filename + ".json"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
765 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
766 # "rdf" aka https://www.w3.org/TR/prov-o/ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
767 # which can be serialized to ttl/nt/jsonld (and more!) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
768 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
769 # https://www.w3.org/TR/turtle/ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
770 with self.research_object.write_bag_file(basename + ".ttl") as provenance_file: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
771 self.document.serialize(provenance_file, format="rdf", rdf_format="turtle") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
772 prov_ids.append(self.provenance_ns[filename + ".ttl"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
773 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
774 # https://www.w3.org/TR/n-triples/ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
775 with self.research_object.write_bag_file(basename + ".nt") as provenance_file: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
776 self.document.serialize( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
777 provenance_file, format="rdf", rdf_format="ntriples" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
778 ) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
779 prov_ids.append(self.provenance_ns[filename + ".nt"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
780 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
781 # https://www.w3.org/TR/json-ld/ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
782 # TODO: Use a nice JSON-LD context |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
783 # see also https://eprints.soton.ac.uk/395985/ |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
784 # 404 Not Found on https://provenance.ecs.soton.ac.uk/prov.jsonld :( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
785 with self.research_object.write_bag_file( |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
786 basename + ".jsonld" |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
787 ) as provenance_file: |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
788 self.document.serialize(provenance_file, format="rdf", rdf_format="json-ld") |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
789 prov_ids.append(self.provenance_ns[filename + ".jsonld"]) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
790 |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
791 _logger.debug("[provenance] added provenance: %s", prov_ids) |
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset
|
792 return prov_ids |