Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/lxml/includes/libxml/encoding.h @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 /* | |
2 * Summary: interface for the encoding conversion functions | |
3 * Description: interface for the encoding conversion functions needed for | |
4 * XML basic encoding and iconv() support. | |
5 * | |
6 * Related specs are | |
7 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies | |
8 * [ISO-10646] UTF-8 and UTF-16 in Annexes | |
9 * [ISO-8859-1] ISO Latin-1 characters codes. | |
10 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- | |
11 * Worldwide Character Encoding -- Version 1.0", Addison- | |
12 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is | |
13 * described in Unicode Technical Report #4. | |
14 * [US-ASCII] Coded Character Set--7-bit American Standard Code for | |
15 * Information Interchange, ANSI X3.4-1986. | |
16 * | |
17 * Copy: See Copyright for the status of this software. | |
18 * | |
19 * Author: Daniel Veillard | |
20 */ | |
21 | |
22 #ifndef __XML_CHAR_ENCODING_H__ | |
23 #define __XML_CHAR_ENCODING_H__ | |
24 | |
25 #include <libxml/xmlversion.h> | |
26 | |
27 #ifdef LIBXML_ICONV_ENABLED | |
28 #include <iconv.h> | |
29 #endif | |
30 #ifdef LIBXML_ICU_ENABLED | |
31 #include <unicode/ucnv.h> | |
32 #endif | |
33 #ifdef __cplusplus | |
34 extern "C" { | |
35 #endif | |
36 | |
37 /* | |
38 * xmlCharEncoding: | |
39 * | |
40 * Predefined values for some standard encodings. | |
41 * Libxml does not do beforehand translation on UTF8 and ISOLatinX. | |
42 * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default. | |
43 * | |
44 * Anything else would have to be translated to UTF8 before being | |
45 * given to the parser itself. The BOM for UTF16 and the encoding | |
46 * declaration are looked at and a converter is looked for at that | |
47 * point. If not found the parser stops here as asked by the XML REC. A | |
48 * converter can be registered by the user using xmlRegisterCharEncodingHandler | |
49 * but the current form doesn't allow stateful transcoding (a serious | |
50 * problem agreed !). If iconv has been found it will be used | |
51 * automatically and allow stateful transcoding, the simplest is then | |
52 * to be sure to enable iconv and to provide iconv libs for the encoding | |
53 * support needed. | |
54 * | |
55 * Note that the generic "UTF-16" is not a predefined value. Instead, only | |
56 * the specific UTF-16LE and UTF-16BE are present. | |
57 */ | |
58 typedef enum { | |
59 XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ | |
60 XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ | |
61 XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ | |
62 XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ | |
63 XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ | |
64 XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ | |
65 XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ | |
66 XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ | |
67 XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ | |
68 XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ | |
69 XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ | |
70 XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ | |
71 XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ | |
72 XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ | |
73 XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ | |
74 XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ | |
75 XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ | |
76 XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ | |
77 XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ | |
78 XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ | |
79 XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ | |
80 XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ | |
81 XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ | |
82 XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */ | |
83 } xmlCharEncoding; | |
84 | |
85 /** | |
86 * xmlCharEncodingInputFunc: | |
87 * @out: a pointer to an array of bytes to store the UTF-8 result | |
88 * @outlen: the length of @out | |
89 * @in: a pointer to an array of chars in the original encoding | |
90 * @inlen: the length of @in | |
91 * | |
92 * Take a block of chars in the original encoding and try to convert | |
93 * it to an UTF-8 block of chars out. | |
94 * | |
95 * Returns the number of bytes written, -1 if lack of space, or -2 | |
96 * if the transcoding failed. | |
97 * The value of @inlen after return is the number of octets consumed | |
98 * if the return value is positive, else unpredictiable. | |
99 * The value of @outlen after return is the number of octets consumed. | |
100 */ | |
101 typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen, | |
102 const unsigned char *in, int *inlen); | |
103 | |
104 | |
105 /** | |
106 * xmlCharEncodingOutputFunc: | |
107 * @out: a pointer to an array of bytes to store the result | |
108 * @outlen: the length of @out | |
109 * @in: a pointer to an array of UTF-8 chars | |
110 * @inlen: the length of @in | |
111 * | |
112 * Take a block of UTF-8 chars in and try to convert it to another | |
113 * encoding. | |
114 * Note: a first call designed to produce heading info is called with | |
115 * in = NULL. If stateful this should also initialize the encoder state. | |
116 * | |
117 * Returns the number of bytes written, -1 if lack of space, or -2 | |
118 * if the transcoding failed. | |
119 * The value of @inlen after return is the number of octets consumed | |
120 * if the return value is positive, else unpredictiable. | |
121 * The value of @outlen after return is the number of octets produced. | |
122 */ | |
123 typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen, | |
124 const unsigned char *in, int *inlen); | |
125 | |
126 | |
127 /* | |
128 * Block defining the handlers for non UTF-8 encodings. | |
129 * If iconv is supported, there are two extra fields. | |
130 */ | |
131 #ifdef LIBXML_ICU_ENABLED | |
132 /* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */ | |
133 #define ICU_PIVOT_BUF_SIZE 1024 | |
134 struct _uconv_t { | |
135 UConverter *uconv; /* for conversion between an encoding and UTF-16 */ | |
136 UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ | |
137 UChar pivot_buf[ICU_PIVOT_BUF_SIZE]; | |
138 UChar *pivot_source; | |
139 UChar *pivot_target; | |
140 }; | |
141 typedef struct _uconv_t uconv_t; | |
142 #endif | |
143 | |
144 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; | |
145 typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; | |
146 struct _xmlCharEncodingHandler { | |
147 char *name; | |
148 xmlCharEncodingInputFunc input; | |
149 xmlCharEncodingOutputFunc output; | |
150 #ifdef LIBXML_ICONV_ENABLED | |
151 iconv_t iconv_in; | |
152 iconv_t iconv_out; | |
153 #endif /* LIBXML_ICONV_ENABLED */ | |
154 #ifdef LIBXML_ICU_ENABLED | |
155 uconv_t *uconv_in; | |
156 uconv_t *uconv_out; | |
157 #endif /* LIBXML_ICU_ENABLED */ | |
158 }; | |
159 | |
160 #ifdef __cplusplus | |
161 } | |
162 #endif | |
163 #include <libxml/tree.h> | |
164 #ifdef __cplusplus | |
165 extern "C" { | |
166 #endif | |
167 | |
168 /* | |
169 * Interfaces for encoding handlers. | |
170 */ | |
171 XMLPUBFUN void XMLCALL | |
172 xmlInitCharEncodingHandlers (void); | |
173 XMLPUBFUN void XMLCALL | |
174 xmlCleanupCharEncodingHandlers (void); | |
175 XMLPUBFUN void XMLCALL | |
176 xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); | |
177 XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL | |
178 xmlGetCharEncodingHandler (xmlCharEncoding enc); | |
179 XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL | |
180 xmlFindCharEncodingHandler (const char *name); | |
181 XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL | |
182 xmlNewCharEncodingHandler (const char *name, | |
183 xmlCharEncodingInputFunc input, | |
184 xmlCharEncodingOutputFunc output); | |
185 | |
186 /* | |
187 * Interfaces for encoding names and aliases. | |
188 */ | |
189 XMLPUBFUN int XMLCALL | |
190 xmlAddEncodingAlias (const char *name, | |
191 const char *alias); | |
192 XMLPUBFUN int XMLCALL | |
193 xmlDelEncodingAlias (const char *alias); | |
194 XMLPUBFUN const char * XMLCALL | |
195 xmlGetEncodingAlias (const char *alias); | |
196 XMLPUBFUN void XMLCALL | |
197 xmlCleanupEncodingAliases (void); | |
198 XMLPUBFUN xmlCharEncoding XMLCALL | |
199 xmlParseCharEncoding (const char *name); | |
200 XMLPUBFUN const char * XMLCALL | |
201 xmlGetCharEncodingName (xmlCharEncoding enc); | |
202 | |
203 /* | |
204 * Interfaces directly used by the parsers. | |
205 */ | |
206 XMLPUBFUN xmlCharEncoding XMLCALL | |
207 xmlDetectCharEncoding (const unsigned char *in, | |
208 int len); | |
209 | |
210 XMLPUBFUN int XMLCALL | |
211 xmlCharEncOutFunc (xmlCharEncodingHandler *handler, | |
212 xmlBufferPtr out, | |
213 xmlBufferPtr in); | |
214 | |
215 XMLPUBFUN int XMLCALL | |
216 xmlCharEncInFunc (xmlCharEncodingHandler *handler, | |
217 xmlBufferPtr out, | |
218 xmlBufferPtr in); | |
219 XMLPUBFUN int XMLCALL | |
220 xmlCharEncFirstLine (xmlCharEncodingHandler *handler, | |
221 xmlBufferPtr out, | |
222 xmlBufferPtr in); | |
223 XMLPUBFUN int XMLCALL | |
224 xmlCharEncCloseFunc (xmlCharEncodingHandler *handler); | |
225 | |
226 /* | |
227 * Export a few useful functions | |
228 */ | |
229 #ifdef LIBXML_OUTPUT_ENABLED | |
230 XMLPUBFUN int XMLCALL | |
231 UTF8Toisolat1 (unsigned char *out, | |
232 int *outlen, | |
233 const unsigned char *in, | |
234 int *inlen); | |
235 #endif /* LIBXML_OUTPUT_ENABLED */ | |
236 XMLPUBFUN int XMLCALL | |
237 isolat1ToUTF8 (unsigned char *out, | |
238 int *outlen, | |
239 const unsigned char *in, | |
240 int *inlen); | |
241 #ifdef __cplusplus | |
242 } | |
243 #endif | |
244 | |
245 #endif /* __XML_CHAR_ENCODING_H__ */ |