Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/lxml/includes/libxml/HTMLparser.h @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 /* | |
2 * Summary: interface for an HTML 4.0 non-verifying parser | |
3 * Description: this module implements an HTML 4.0 non-verifying parser | |
4 * with API compatible with the XML parser ones. It should | |
5 * be able to parse "real world" HTML, even if severely | |
6 * broken from a specification point of view. | |
7 * | |
8 * Copy: See Copyright for the status of this software. | |
9 * | |
10 * Author: Daniel Veillard | |
11 */ | |
12 | |
13 #ifndef __HTML_PARSER_H__ | |
14 #define __HTML_PARSER_H__ | |
15 #include <libxml/xmlversion.h> | |
16 #include <libxml/parser.h> | |
17 | |
18 #ifdef LIBXML_HTML_ENABLED | |
19 | |
20 #ifdef __cplusplus | |
21 extern "C" { | |
22 #endif | |
23 | |
24 /* | |
25 * Most of the back-end structures from XML and HTML are shared. | |
26 */ | |
27 typedef xmlParserCtxt htmlParserCtxt; | |
28 typedef xmlParserCtxtPtr htmlParserCtxtPtr; | |
29 typedef xmlParserNodeInfo htmlParserNodeInfo; | |
30 typedef xmlSAXHandler htmlSAXHandler; | |
31 typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; | |
32 typedef xmlParserInput htmlParserInput; | |
33 typedef xmlParserInputPtr htmlParserInputPtr; | |
34 typedef xmlDocPtr htmlDocPtr; | |
35 typedef xmlNodePtr htmlNodePtr; | |
36 | |
37 /* | |
38 * Internal description of an HTML element, representing HTML 4.01 | |
39 * and XHTML 1.0 (which share the same structure). | |
40 */ | |
41 typedef struct _htmlElemDesc htmlElemDesc; | |
42 typedef htmlElemDesc *htmlElemDescPtr; | |
43 struct _htmlElemDesc { | |
44 const char *name; /* The tag name */ | |
45 char startTag; /* Whether the start tag can be implied */ | |
46 char endTag; /* Whether the end tag can be implied */ | |
47 char saveEndTag; /* Whether the end tag should be saved */ | |
48 char empty; /* Is this an empty element ? */ | |
49 char depr; /* Is this a deprecated element ? */ | |
50 char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ | |
51 char isinline; /* is this a block 0 or inline 1 element */ | |
52 const char *desc; /* the description */ | |
53 | |
54 /* NRK Jan.2003 | |
55 * New fields encapsulating HTML structure | |
56 * | |
57 * Bugs: | |
58 * This is a very limited representation. It fails to tell us when | |
59 * an element *requires* subelements (we only have whether they're | |
60 * allowed or not), and it doesn't tell us where CDATA and PCDATA | |
61 * are allowed. Some element relationships are not fully represented: | |
62 * these are flagged with the word MODIFIER | |
63 */ | |
64 const char** subelts; /* allowed sub-elements of this element */ | |
65 const char* defaultsubelt; /* subelement for suggested auto-repair | |
66 if necessary or NULL */ | |
67 const char** attrs_opt; /* Optional Attributes */ | |
68 const char** attrs_depr; /* Additional deprecated attributes */ | |
69 const char** attrs_req; /* Required attributes */ | |
70 }; | |
71 | |
72 /* | |
73 * Internal description of an HTML entity. | |
74 */ | |
75 typedef struct _htmlEntityDesc htmlEntityDesc; | |
76 typedef htmlEntityDesc *htmlEntityDescPtr; | |
77 struct _htmlEntityDesc { | |
78 unsigned int value; /* the UNICODE value for the character */ | |
79 const char *name; /* The entity name */ | |
80 const char *desc; /* the description */ | |
81 }; | |
82 | |
83 /* | |
84 * There is only few public functions. | |
85 */ | |
86 XMLPUBFUN const htmlElemDesc * XMLCALL | |
87 htmlTagLookup (const xmlChar *tag); | |
88 XMLPUBFUN const htmlEntityDesc * XMLCALL | |
89 htmlEntityLookup(const xmlChar *name); | |
90 XMLPUBFUN const htmlEntityDesc * XMLCALL | |
91 htmlEntityValueLookup(unsigned int value); | |
92 | |
93 XMLPUBFUN int XMLCALL | |
94 htmlIsAutoClosed(htmlDocPtr doc, | |
95 htmlNodePtr elem); | |
96 XMLPUBFUN int XMLCALL | |
97 htmlAutoCloseTag(htmlDocPtr doc, | |
98 const xmlChar *name, | |
99 htmlNodePtr elem); | |
100 XMLPUBFUN const htmlEntityDesc * XMLCALL | |
101 htmlParseEntityRef(htmlParserCtxtPtr ctxt, | |
102 const xmlChar **str); | |
103 XMLPUBFUN int XMLCALL | |
104 htmlParseCharRef(htmlParserCtxtPtr ctxt); | |
105 XMLPUBFUN void XMLCALL | |
106 htmlParseElement(htmlParserCtxtPtr ctxt); | |
107 | |
108 XMLPUBFUN htmlParserCtxtPtr XMLCALL | |
109 htmlNewParserCtxt(void); | |
110 | |
111 XMLPUBFUN htmlParserCtxtPtr XMLCALL | |
112 htmlCreateMemoryParserCtxt(const char *buffer, | |
113 int size); | |
114 | |
115 XMLPUBFUN int XMLCALL | |
116 htmlParseDocument(htmlParserCtxtPtr ctxt); | |
117 XMLPUBFUN htmlDocPtr XMLCALL | |
118 htmlSAXParseDoc (const xmlChar *cur, | |
119 const char *encoding, | |
120 htmlSAXHandlerPtr sax, | |
121 void *userData); | |
122 XMLPUBFUN htmlDocPtr XMLCALL | |
123 htmlParseDoc (const xmlChar *cur, | |
124 const char *encoding); | |
125 XMLPUBFUN htmlDocPtr XMLCALL | |
126 htmlSAXParseFile(const char *filename, | |
127 const char *encoding, | |
128 htmlSAXHandlerPtr sax, | |
129 void *userData); | |
130 XMLPUBFUN htmlDocPtr XMLCALL | |
131 htmlParseFile (const char *filename, | |
132 const char *encoding); | |
133 XMLPUBFUN int XMLCALL | |
134 UTF8ToHtml (unsigned char *out, | |
135 int *outlen, | |
136 const unsigned char *in, | |
137 int *inlen); | |
138 XMLPUBFUN int XMLCALL | |
139 htmlEncodeEntities(unsigned char *out, | |
140 int *outlen, | |
141 const unsigned char *in, | |
142 int *inlen, int quoteChar); | |
143 XMLPUBFUN int XMLCALL | |
144 htmlIsScriptAttribute(const xmlChar *name); | |
145 XMLPUBFUN int XMLCALL | |
146 htmlHandleOmittedElem(int val); | |
147 | |
148 #ifdef LIBXML_PUSH_ENABLED | |
149 /** | |
150 * Interfaces for the Push mode. | |
151 */ | |
152 XMLPUBFUN htmlParserCtxtPtr XMLCALL | |
153 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, | |
154 void *user_data, | |
155 const char *chunk, | |
156 int size, | |
157 const char *filename, | |
158 xmlCharEncoding enc); | |
159 XMLPUBFUN int XMLCALL | |
160 htmlParseChunk (htmlParserCtxtPtr ctxt, | |
161 const char *chunk, | |
162 int size, | |
163 int terminate); | |
164 #endif /* LIBXML_PUSH_ENABLED */ | |
165 | |
166 XMLPUBFUN void XMLCALL | |
167 htmlFreeParserCtxt (htmlParserCtxtPtr ctxt); | |
168 | |
169 /* | |
170 * New set of simpler/more flexible APIs | |
171 */ | |
172 /** | |
173 * xmlParserOption: | |
174 * | |
175 * This is the set of XML parser options that can be passed down | |
176 * to the xmlReadDoc() and similar calls. | |
177 */ | |
178 typedef enum { | |
179 HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */ | |
180 HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */ | |
181 HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */ | |
182 HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */ | |
183 HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ | |
184 HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ | |
185 HTML_PARSE_NONET = 1<<11,/* Forbid network access */ | |
186 HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */ | |
187 HTML_PARSE_COMPACT = 1<<16,/* compact small text nodes */ | |
188 HTML_PARSE_IGNORE_ENC=1<<21 /* ignore internal document encoding hint */ | |
189 } htmlParserOption; | |
190 | |
191 XMLPUBFUN void XMLCALL | |
192 htmlCtxtReset (htmlParserCtxtPtr ctxt); | |
193 XMLPUBFUN int XMLCALL | |
194 htmlCtxtUseOptions (htmlParserCtxtPtr ctxt, | |
195 int options); | |
196 XMLPUBFUN htmlDocPtr XMLCALL | |
197 htmlReadDoc (const xmlChar *cur, | |
198 const char *URL, | |
199 const char *encoding, | |
200 int options); | |
201 XMLPUBFUN htmlDocPtr XMLCALL | |
202 htmlReadFile (const char *URL, | |
203 const char *encoding, | |
204 int options); | |
205 XMLPUBFUN htmlDocPtr XMLCALL | |
206 htmlReadMemory (const char *buffer, | |
207 int size, | |
208 const char *URL, | |
209 const char *encoding, | |
210 int options); | |
211 XMLPUBFUN htmlDocPtr XMLCALL | |
212 htmlReadFd (int fd, | |
213 const char *URL, | |
214 const char *encoding, | |
215 int options); | |
216 XMLPUBFUN htmlDocPtr XMLCALL | |
217 htmlReadIO (xmlInputReadCallback ioread, | |
218 xmlInputCloseCallback ioclose, | |
219 void *ioctx, | |
220 const char *URL, | |
221 const char *encoding, | |
222 int options); | |
223 XMLPUBFUN htmlDocPtr XMLCALL | |
224 htmlCtxtReadDoc (xmlParserCtxtPtr ctxt, | |
225 const xmlChar *cur, | |
226 const char *URL, | |
227 const char *encoding, | |
228 int options); | |
229 XMLPUBFUN htmlDocPtr XMLCALL | |
230 htmlCtxtReadFile (xmlParserCtxtPtr ctxt, | |
231 const char *filename, | |
232 const char *encoding, | |
233 int options); | |
234 XMLPUBFUN htmlDocPtr XMLCALL | |
235 htmlCtxtReadMemory (xmlParserCtxtPtr ctxt, | |
236 const char *buffer, | |
237 int size, | |
238 const char *URL, | |
239 const char *encoding, | |
240 int options); | |
241 XMLPUBFUN htmlDocPtr XMLCALL | |
242 htmlCtxtReadFd (xmlParserCtxtPtr ctxt, | |
243 int fd, | |
244 const char *URL, | |
245 const char *encoding, | |
246 int options); | |
247 XMLPUBFUN htmlDocPtr XMLCALL | |
248 htmlCtxtReadIO (xmlParserCtxtPtr ctxt, | |
249 xmlInputReadCallback ioread, | |
250 xmlInputCloseCallback ioclose, | |
251 void *ioctx, | |
252 const char *URL, | |
253 const char *encoding, | |
254 int options); | |
255 | |
256 /* NRK/Jan2003: further knowledge of HTML structure | |
257 */ | |
258 typedef enum { | |
259 HTML_NA = 0 , /* something we don't check at all */ | |
260 HTML_INVALID = 0x1 , | |
261 HTML_DEPRECATED = 0x2 , | |
262 HTML_VALID = 0x4 , | |
263 HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */ | |
264 } htmlStatus ; | |
265 | |
266 /* Using htmlElemDesc rather than name here, to emphasise the fact | |
267 that otherwise there's a lookup overhead | |
268 */ | |
269 XMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ; | |
270 XMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ; | |
271 XMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ; | |
272 XMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ; | |
273 /** | |
274 * htmlDefaultSubelement: | |
275 * @elt: HTML element | |
276 * | |
277 * Returns the default subelement for this element | |
278 */ | |
279 #define htmlDefaultSubelement(elt) elt->defaultsubelt | |
280 /** | |
281 * htmlElementAllowedHereDesc: | |
282 * @parent: HTML parent element | |
283 * @elt: HTML element | |
284 * | |
285 * Checks whether an HTML element description may be a | |
286 * direct child of the specified element. | |
287 * | |
288 * Returns 1 if allowed; 0 otherwise. | |
289 */ | |
290 #define htmlElementAllowedHereDesc(parent,elt) \ | |
291 htmlElementAllowedHere((parent), (elt)->name) | |
292 /** | |
293 * htmlRequiredAttrs: | |
294 * @elt: HTML element | |
295 * | |
296 * Returns the attributes required for the specified element. | |
297 */ | |
298 #define htmlRequiredAttrs(elt) (elt)->attrs_req | |
299 | |
300 | |
301 #ifdef __cplusplus | |
302 } | |
303 #endif | |
304 | |
305 #endif /* LIBXML_HTML_ENABLED */ | |
306 #endif /* __HTML_PARSER_H__ */ |