comparison env/lib/python3.7/site-packages/planemo/lint.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 """Utilities to help linting various targets."""
2 from __future__ import absolute_import
3
4 import os
5
6 import requests
7 from galaxy.tool_util.lint import LintContext
8 from six.moves.urllib.request import urlopen
9
10 import planemo.linters.biocontainer_registered
11 import planemo.linters.conda_requirements
12 import planemo.linters.doi
13 import planemo.linters.urls
14 import planemo.linters.xsd
15 from planemo.io import error
16 from planemo.shed import find_urls_for_xml
17 from planemo.xml import validation
18
19
20 def build_lint_args(ctx, **kwds):
21 """Handle common report, error, and skip linting arguments."""
22 report_level = kwds.get("report_level", "all")
23 fail_level = kwds.get("fail_level", "warn")
24 skip = kwds.get("skip", None)
25 if skip is None:
26 skip = ctx.global_config.get("lint_skip", "")
27 if isinstance(skip, list):
28 skip = ",".join(skip)
29
30 skip_types = [s.strip() for s in skip.split(",")]
31 lint_args = dict(
32 level=report_level,
33 fail_level=fail_level,
34 extra_modules=_lint_extra_modules(**kwds),
35 skip_types=skip_types,
36 )
37 return lint_args
38
39
40 # TODO: Move this back to tool_lint.
41 def _lint_extra_modules(**kwds):
42 linters = []
43 if kwds.get("xsd", True):
44 linters.append(planemo.linters.xsd)
45
46 if kwds.get("doi", False):
47 linters.append(planemo.linters.doi)
48
49 if kwds.get("urls", False):
50 linters.append(planemo.linters.urls)
51
52 if kwds.get("conda_requirements", False):
53 linters.append(planemo.linters.conda_requirements)
54
55 if kwds.get("biocontainer", False):
56 linters.append(planemo.linters.biocontainer_registered)
57
58 return linters
59
60
61 def setup_lint(ctx, **kwds):
62 """Setup lint_args and lint_ctx to begin linting a target."""
63 lint_args = build_lint_args(ctx, **kwds)
64 lint_ctx = LintContext(lint_args["level"])
65 return lint_args, lint_ctx
66
67
68 def handle_lint_complete(lint_ctx, lint_args, failed=False):
69 """Complete linting of a target and decide exit code."""
70 if not failed:
71 failed = lint_ctx.failed(lint_args["fail_level"])
72 if failed:
73 error("Failed linting")
74 return 1 if failed else 0
75
76
77 def lint_dois(tool_xml, lint_ctx):
78 """Find referenced DOIs and check they have valid with https://doi.org."""
79 dois = find_dois_for_xml(tool_xml)
80 for publication in dois:
81 is_doi(publication, lint_ctx)
82
83
84 def find_dois_for_xml(tool_xml):
85 dois = []
86 for element in tool_xml.getroot().findall("citations"):
87 for citation in list(element):
88 if citation.tag == 'citation' and citation.attrib.get('type', '') == 'doi':
89 dois.append(citation.text)
90 return dois
91
92
93 def is_doi(publication_id, lint_ctx):
94 """Check if dx.doi knows about the ``publication_id``."""
95 base_url = "https://doi.org"
96 if publication_id is None:
97 lint_ctx.error('Empty DOI citation')
98 return
99 publication_id = publication_id.strip()
100 doiless_publication_id = publication_id.split("doi:", 1)[-1]
101 if not doiless_publication_id:
102 lint_ctx.error('Empty DOI citation')
103 return
104 url = "%s/%s" % (base_url, doiless_publication_id)
105 r = requests.get(url)
106 if r.status_code == 200:
107 if publication_id != doiless_publication_id:
108 lint_ctx.error("%s is valid, but Galaxy expects DOI without 'doi:' prefix" % publication_id)
109 else:
110 lint_ctx.info("%s is a valid DOI" % publication_id)
111 elif r.status_code == 404:
112 lint_ctx.error("%s is not a valid DOI" % publication_id)
113 else:
114 lint_ctx.warn("dx.doi returned unexpected status code %d" % r.status_code)
115
116
117 def lint_xsd(lint_ctx, schema_path, path):
118 """Lint XML at specified path with supplied schema."""
119 name = lint_ctx.object_name or os.path.basename(path)
120 validator = validation.get_validator(require=True)
121 validation_result = validator.validate(schema_path, path)
122 if not validation_result.passed:
123 msg = "Invalid XML found in file: %s. Errors [%s]"
124 msg = msg % (name, validation_result.output)
125 lint_ctx.error(msg)
126 else:
127 lint_ctx.info("File validates against XML schema.")
128
129
130 def lint_urls(root, lint_ctx):
131 """Find referenced URLs and verify they are valid."""
132 urls, docs = find_urls_for_xml(root)
133
134 # This is from Google Chome on macOS, current at time of writing:
135 BROWSER_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36"
136
137 def validate_url(url, lint_ctx, user_agent=None):
138 is_valid = True
139 if url.startswith('http://') or url.startswith('https://'):
140 if user_agent:
141 headers = {"User-Agent": user_agent, 'Accept': '*/*'}
142 else:
143 headers = None
144 r = None
145 try:
146 r = requests.get(url, headers=headers, stream=True)
147 r.raise_for_status()
148 next(r.iter_content(1000))
149 except Exception as e:
150 if r and r.status_code == 429:
151 # too many requests
152 pass
153 else:
154 is_valid = False
155 lint_ctx.error("Error '%s' accessing %s" % (e, url))
156 else:
157 try:
158 with urlopen(url) as handle:
159 handle.read(100)
160 except Exception as e:
161 is_valid = False
162 lint_ctx.error("Error '%s' accessing %s" % (e, url))
163 if is_valid:
164 lint_ctx.info("URL OK %s" % url)
165
166 for url in urls:
167 validate_url(url, lint_ctx)
168 for url in docs:
169 validate_url(url, lint_ctx, BROWSER_USER_AGENT)
170
171
172 __all__ = (
173 "build_lint_args",
174 "handle_lint_complete",
175 "lint_dois",
176 "lint_urls",
177 "lint_xsd",
178 )