Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/docutils/utils/punctuation_chars.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
comparison
equal
deleted
inserted
replaced
4:79f47841a781 | 5:9b1c78e6ba9c |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 # :Id: $Id: punctuation_chars.py 8016 2017-01-17 15:06:17Z milde $ | |
4 # :Copyright: © 2011, 2017 Günter Milde. | |
5 # :License: Released under the terms of the `2-Clause BSD license`_, in short: | |
6 # | |
7 # Copying and distribution of this file, with or without modification, | |
8 # are permitted in any medium without royalty provided the copyright | |
9 # notice and this notice are preserved. | |
10 # This file is offered as-is, without any warranty. | |
11 # | |
12 # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause | |
13 # | |
14 # This file is generated by | |
15 # ``docutils/tools/dev/generate_punctuation_chars.py``. | |
16 # :: | |
17 | |
18 import sys, re | |
19 import unicodedata | |
20 | |
21 """Docutils character category patterns. | |
22 | |
23 Patterns for the implementation of the `inline markup recognition rules`_ | |
24 in the reStructuredText parser `docutils.parsers.rst.states.py` based | |
25 on Unicode character categories. | |
26 The patterns are used inside ``[ ]`` in regular expressions. | |
27 | |
28 Rule (5) requires determination of matching open/close pairs. However, the | |
29 pairing of open/close quotes is ambiguous due to different typographic | |
30 conventions in different languages. The ``quote_pairs`` function tests | |
31 whether two characters form an open/close pair. | |
32 | |
33 The patterns are generated by | |
34 ``docutils/tools/dev/generate_punctuation_chars.py`` to prevent dependence | |
35 on the Python version and avoid the time-consuming generation with every | |
36 Docutils run. See there for motives and implementation details. | |
37 | |
38 The category of some characters changed with the development of the | |
39 Unicode standard. The current lists are generated with the help of the | |
40 "unicodedata" module of Python 2.7.13 (based on Unicode version 5.2.0). | |
41 | |
42 .. _inline markup recognition rules: | |
43 http://docutils.sf.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules | |
44 """ | |
45 | |
46 openers = (u'"\'(<\\[{\u0f3a\u0f3c\u169b\u2045\u207d\u208d\u2329\u2768' | |
47 u'\u276a\u276c\u276e\u2770\u2772\u2774\u27c5\u27e6\u27e8\u27ea' | |
48 u'\u27ec\u27ee\u2983\u2985\u2987\u2989\u298b\u298d\u298f\u2991' | |
49 u'\u2993\u2995\u2997\u29d8\u29da\u29fc\u2e22\u2e24\u2e26\u2e28' | |
50 u'\u3008\u300a\u300c\u300e\u3010\u3014\u3016\u3018\u301a\u301d' | |
51 u'\u301d\ufd3e\ufe17\ufe35\ufe37\ufe39\ufe3b\ufe3d\ufe3f\ufe41' | |
52 u'\ufe43\ufe47\ufe59\ufe5b\ufe5d\uff08\uff3b\uff5b\uff5f\uff62' | |
53 u'\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c\u2e1c\u2e20' | |
54 u'\u201a\u201e\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d' | |
55 u'\u2e1d\u2e21\u201b\u201f') | |
56 closers = (u'"\')>\\]}\u0f3b\u0f3d\u169c\u2046\u207e\u208e\u232a\u2769' | |
57 u'\u276b\u276d\u276f\u2771\u2773\u2775\u27c6\u27e7\u27e9\u27eb' | |
58 u'\u27ed\u27ef\u2984\u2986\u2988\u298a\u298c\u298e\u2990\u2992' | |
59 u'\u2994\u2996\u2998\u29d9\u29db\u29fd\u2e23\u2e25\u2e27\u2e29' | |
60 u'\u3009\u300b\u300d\u300f\u3011\u3015\u3017\u3019\u301b\u301e' | |
61 u'\u301f\ufd3f\ufe18\ufe36\ufe38\ufe3a\ufe3c\ufe3e\ufe40\ufe42' | |
62 u'\ufe44\ufe48\ufe5a\ufe5c\ufe5e\uff09\uff3d\uff5d\uff60\uff63' | |
63 u'\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d\u2e1d\u2e21' | |
64 u'\u201b\u201f\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c' | |
65 u'\u2e1c\u2e20\u201a\u201e') | |
66 delimiters = (u'\\-/:\u058a\xa1\xb7\xbf\u037e\u0387\u055a-\u055f\u0589' | |
67 u'\u05be\u05c0\u05c3\u05c6\u05f3\u05f4\u0609\u060a\u060c' | |
68 u'\u060d\u061b\u061e\u061f\u066a-\u066d\u06d4\u0700-\u070d' | |
69 u'\u07f7-\u07f9\u0830-\u083e\u0964\u0965\u0970\u0df4\u0e4f' | |
70 u'\u0e5a\u0e5b\u0f04-\u0f12\u0f85\u0fd0-\u0fd4\u104a-\u104f' | |
71 u'\u10fb\u1361-\u1368\u1400\u166d\u166e\u16eb-\u16ed\u1735' | |
72 u'\u1736\u17d4-\u17d6\u17d8-\u17da\u1800-\u180a\u1944\u1945' | |
73 u'\u19de\u19df\u1a1e\u1a1f\u1aa0-\u1aa6\u1aa8-\u1aad\u1b5a-' | |
74 u'\u1b60\u1c3b-\u1c3f\u1c7e\u1c7f\u1cd3\u2010-\u2017\u2020-' | |
75 u'\u2027\u2030-\u2038\u203b-\u203e\u2041-\u2043\u2047-' | |
76 u'\u2051\u2053\u2055-\u205e\u2cf9-\u2cfc\u2cfe\u2cff\u2e00' | |
77 u'\u2e01\u2e06-\u2e08\u2e0b\u2e0e-\u2e1b\u2e1e\u2e1f\u2e2a-' | |
78 u'\u2e2e\u2e30\u2e31\u3001-\u3003\u301c\u3030\u303d\u30a0' | |
79 u'\u30fb\ua4fe\ua4ff\ua60d-\ua60f\ua673\ua67e\ua6f2-\ua6f7' | |
80 u'\ua874-\ua877\ua8ce\ua8cf\ua8f8-\ua8fa\ua92e\ua92f\ua95f' | |
81 u'\ua9c1-\ua9cd\ua9de\ua9df\uaa5c-\uaa5f\uaade\uaadf\uabeb' | |
82 u'\ufe10-\ufe16\ufe19\ufe30-\ufe32\ufe45\ufe46\ufe49-\ufe4c' | |
83 u'\ufe50-\ufe52\ufe54-\ufe58\ufe5f-\ufe61\ufe63\ufe68\ufe6a' | |
84 u'\ufe6b\uff01-\uff03\uff05-\uff07\uff0a\uff0c-\uff0f\uff1a' | |
85 u'\uff1b\uff1f\uff20\uff3c\uff61\uff64\uff65') | |
86 if sys.maxunicode >= 0x10FFFF: # "wide" build | |
87 delimiters += (u'\U00010100\U00010101\U0001039f\U000103d0\U00010857' | |
88 u'\U0001091f\U0001093f\U00010a50-\U00010a58\U00010a7f' | |
89 u'\U00010b39-\U00010b3f\U000110bb\U000110bc\U000110be-' | |
90 u'\U000110c1\U00012470-\U00012473') | |
91 closing_delimiters = u'\\\\.,;!?' | |
92 | |
93 | |
94 # Matching open/close quotes | |
95 # -------------------------- | |
96 | |
97 quote_pairs = {# open char: matching closing characters # usage example | |
98 u'\xbb': u'\xbb', # » » Swedish | |
99 u'\u2018': u'\u201a', # ‘ ‚ Albanian/Greek/Turkish | |
100 u'\u2019': u'\u2019', # ’ ’ Swedish | |
101 u'\u201a': u'\u2018\u2019', # ‚ ‘ German ‚ ’ Polish | |
102 u'\u201c': u'\u201e', # “ „ Albanian/Greek/Turkish | |
103 u'\u201e': u'\u201c\u201d', # „ “ German „ ” Polish | |
104 u'\u201d': u'\u201d', # ” ” Swedish | |
105 u'\u203a': u'\u203a', # › › Swedish | |
106 } | |
107 """Additional open/close quote pairs.""" | |
108 | |
109 def match_chars(c1, c2): | |
110 """Test whether `c1` and `c2` are a matching open/close character pair. | |
111 | |
112 Matching open/close pairs are at the same position in | |
113 `punctuation_chars.openers` and `punctuation_chars.closers`. | |
114 The pairing of open/close quotes is ambiguous due to different | |
115 typographic conventions in different languages, | |
116 so we test for additional matches stored in `quote_pairs`. | |
117 """ | |
118 try: | |
119 i = openers.index(c1) | |
120 except ValueError: # c1 not in openers | |
121 return False | |
122 return c2 == closers[i] or c2 in quote_pairs.get(c1, u'') |