module Nokogumbo
Constants
- DEFAULT_MAX_ATTRIBUTES
The default maximum number of attributes per element.
- DEFAULT_MAX_ERRORS
The default maximum number of errors for parsing a document or a fragment.
- DEFAULT_MAX_TREE_DEPTH
The default maximum depth of the DOM tree produced by parsing a document or fragment.
- LINE_SUPPORTED
- VERSION
Public Class Methods
fragment(p1, p2, p3, p4, p5, p6)
click to toggle source
static VALUE fragment (
VALUE self,
VALUE doc_fragment,
VALUE tags,
VALUE ctx,
VALUE max_attributes,
VALUE max_errors,
VALUE max_depth
) {
ID name = rb_intern_const("name");
const char *ctx_tag;
GumboNamespaceEnum ctx_ns;
GumboQuirksModeEnum quirks_mode;
bool form = false;
const char *encoding = NULL;
if (NIL_P(ctx)) {
ctx_tag = "body";
ctx_ns = GUMBO_NAMESPACE_HTML;
} else if (TYPE(ctx) == T_STRING) {
ctx_tag = StringValueCStr(ctx);
ctx_ns = GUMBO_NAMESPACE_HTML;
size_t len = RSTRING_LEN(ctx);
const char *colon = memchr(ctx_tag, ':', len);
if (colon) {
switch (colon - ctx_tag) {
case 3:
if (st_strncasecmp(ctx_tag, "svg", 3) != 0)
goto error;
ctx_ns = GUMBO_NAMESPACE_SVG;
break;
case 4:
if (st_strncasecmp(ctx_tag, "html", 4) == 0)
ctx_ns = GUMBO_NAMESPACE_HTML;
else if (st_strncasecmp(ctx_tag, "math", 4) == 0)
ctx_ns = GUMBO_NAMESPACE_MATHML;
else
goto error;
break;
default:
error:
rb_raise(rb_eArgError, "Invalid context namespace '%*s'", (int)(colon - ctx_tag), ctx_tag);
}
ctx_tag = colon+1;
} else {
// For convenience, put 'svg' and 'math' in their namespaces.
if (len == 3 && st_strncasecmp(ctx_tag, "svg", 3) == 0)
ctx_ns = GUMBO_NAMESPACE_SVG;
else if (len == 4 && st_strncasecmp(ctx_tag, "math", 4) == 0)
ctx_ns = GUMBO_NAMESPACE_MATHML;
}
// Check if it's a form.
form = ctx_ns == GUMBO_NAMESPACE_HTML && st_strcasecmp(ctx_tag, "form") == 0;
} else {
ID element_ = rb_intern_const("element?");
// Context fragment name.
VALUE tag_name = rb_funcall(ctx, name, 0);
assert(RTEST(tag_name));
Check_Type(tag_name, T_STRING);
ctx_tag = StringValueCStr(tag_name);
// Context fragment namespace.
ctx_ns = lookup_namespace(ctx, true);
// Check for a form ancestor, including self.
for (VALUE node = ctx;
!NIL_P(node);
node = rb_respond_to(node, parent) ? rb_funcall(node, parent, 0) : Qnil) {
if (!RTEST(rb_funcall(node, element_, 0)))
continue;
VALUE element_name = rb_funcall(node, name, 0);
if (RSTRING_LEN(element_name) == 4
&& !st_strcasecmp(RSTRING_PTR(element_name), "form")
&& lookup_namespace(node, false) == GUMBO_NAMESPACE_HTML) {
form = true;
break;
}
}
// Encoding.
if (RSTRING_LEN(tag_name) == 14
&& !st_strcasecmp(ctx_tag, "annotation-xml")) {
VALUE enc = rb_funcall(ctx, rb_intern_const("[]"),
rb_utf8_str_new_static("encoding", 8));
if (RTEST(enc)) {
Check_Type(enc, T_STRING);
encoding = StringValueCStr(enc);
}
}
}
// Quirks mode.
VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0);
VALUE dtd = rb_funcall(doc, internal_subset, 0);
if (NIL_P(dtd)) {
quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS;
} else {
VALUE dtd_name = rb_funcall(dtd, name, 0);
VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0);
VALUE sysid = rb_funcall(dtd, rb_intern_const("system_id"), 0);
quirks_mode = gumbo_compute_quirks_mode (
NIL_P(dtd_name)? NULL:StringValueCStr(dtd_name),
NIL_P(pubid)? NULL:StringValueCStr(pubid),
NIL_P(sysid)? NULL:StringValueCStr(sysid)
);
}
// Perform a fragment parse.
int depth = NUM2INT(max_depth);
GumboOptions options = kGumboDefaultOptions;
options.max_attributes = NUM2INT(max_attributes);
options.max_errors = NUM2INT(max_errors);
// Add one to account for the HTML element.
options.max_tree_depth = depth < 0 ? -1 : (depth + 1);
options.fragment_context = ctx_tag;
options.fragment_namespace = ctx_ns;
options.fragment_encoding = encoding;
options.quirks_mode = quirks_mode;
options.fragment_context_has_form_ancestor = form;
GumboOutput *output = perform_parse(&options, tags);
ParseArgs args = {
.output = output,
.input = tags,
.url_or_frag = doc_fragment,
.doc = (xmlDocPtr)extract_xml_node(doc),
};
VALUE parse_args = wrap_parse_args(&args);
rb_ensure(fragment_continue, parse_args, parse_cleanup, parse_args);
return Qnil;
}
parse(p1, p2, p3, p4, p5)
click to toggle source
static VALUE parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth) {
GumboOptions options = kGumboDefaultOptions;
options.max_attributes = NUM2INT(max_attributes);
options.max_errors = NUM2INT(max_errors);
options.max_tree_depth = NUM2INT(max_depth);
GumboOutput *output = perform_parse(&options, input);
ParseArgs args = {
.output = output,
.input = input,
.url_or_frag = url,
.doc = NIL,
};
VALUE parse_args = wrap_parse_args(&args);
return rb_ensure(parse_continue, parse_args, parse_cleanup, parse_args);
}