lements->current_node_is( 'FORM' ) ) { // @todo Indicate a parse error once it's possible. This error does not impact the logic here. } $this->state->stack_of_open_elements->pop_until( 'FORM' ); return true; } break; /* * > An end tag whose tag name is "p" */ case '-P': if ( ! $this->state->stack_of_open_elements->has_p_in_button_scope() ) { $this->insert_html_element( $this->state->current_token ); } $this->close_a_p_element(); return true; /* * > An end tag whose tag name is "li" * > An end tag whose tag name is one of: "dd", "dt" */ case '-DD': case '-DT': case '-LI': if ( /* * An end tag whose tag name is "li": * If the stack of open elements does not have an li element in list item scope, * then this is a parse error; ignore the token. */ ( 'LI' === $token_name && ! $this->state->stack_of_open_elements->has_element_in_list_item_scope( 'LI' ) ) || /* * An end tag whose tag name is one of: "dd", "dt": * If the stack of open elements does not have an element in scope that is an * HTML element with the same tag name as that of the token, then this is a * parse error; ignore the token. */ ( 'LI' !== $token_name && ! $this->state->stack_of_open_elements->has_element_in_scope( $token_name ) ) ) { /* * This is a parse error, ignore the token. * * @todo Indicate a parse error once it's possible. */ return $this->step(); } $this->generate_implied_end_tags( $token_name ); if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { // @todo Indicate a parse error once it's possible. This error does not impact the logic here. } $this->state->stack_of_open_elements->pop_until( $token_name ); return true; /* * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" */ case '-H1': case '-H2': case '-H3': case '-H4': case '-H5': case '-H6': if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) { /* * This is a parse error; ignore the token. * * @todo Indicate a parse error once it's possible. */ return $this->step(); } $this->generate_implied_end_tags(); if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { // @todo Record parse error: this error doesn't impact parsing. } $this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' ); return true; /* * > A start tag whose tag name is "a" */ case '+A': foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { switch ( $item->node_name ) { case 'marker': break 2; case 'A': $this->run_adoption_agency_algorithm(); $this->state->active_formatting_elements->remove_node( $item ); $this->state->stack_of_open_elements->remove_node( $item ); break 2; } } $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); $this->state->active_formatting_elements->push( $this->state->current_token ); return true; /* * > A start tag whose tag name is one of: "b", "big", "code", "em", "font", "i", * > "s", "small", "strike", "strong", "tt", "u" */ case '+B': case '+BIG': case '+CODE': case '+EM': case '+FONT': case '+I': case '+S': case '+SMALL': case '+STRIKE': case '+STRONG': case '+TT': case '+U': $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); $this->state->active_formatting_elements->push( $this->state->current_token ); return true; /* * > A start tag whose tag name is "nobr" */ case '+NOBR': $this->reconstruct_active_formatting_elements(); if ( $this->state->stack_of_open_elements->has_element_in_scope( 'NOBR' ) ) { // Parse error. $this->run_adoption_agency_algorithm(); $this->reconstruct_active_formatting_elements(); } $this->insert_html_element( $this->state->current_token ); $this->state->active_formatting_elements->push( $this->state->current_token ); return true; /* * > An end tag whose tag name is one of: "a", "b", "big", "code", "em", "font", "i", * > "nobr", "s", "small", "strike", "strong", "tt", "u" */ case '-A': case '-B': case '-BIG': case '-CODE': case '-EM': case '-FONT': case '-I': case '-NOBR': case '-S': case '-SMALL': case '-STRIKE': case '-STRONG': case '-TT': case '-U': $this->run_adoption_agency_algorithm(); return true; /* * > A start tag whose tag name is one of: "applet", "marquee", "object" */ case '+APPLET': case '+MARQUEE': case '+OBJECT': $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); $this->state->active_formatting_elements->insert_marker(); $this->state->frameset_ok = false; return true; /* * > A end tag token whose tag name is one of: "applet", "marquee", "object" */ case '-APPLET': case '-MARQUEE': case '-OBJECT': if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $token_name ) ) { // Parse error: ignore the token. return $this->step(); } $this->generate_implied_end_tags(); if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { // This is a parse error. } $this->state->stack_of_open_elements->pop_until( $token_name ); $this->state->active_formatting_elements->clear_up_to_last_marker(); return true; /* * > A start tag whose tag name is "table" */ case '+TABLE': /* * > If the Document is not set to quirks mode, and the stack of open elements * > has a p element in button scope, then close a p element. */ if ( WP_HTML_Tag_Processor::QUIRKS_MODE !== $this->compat_mode && $this->state->stack_of_open_elements->has_p_in_button_scope() ) { $this->close_a_p_element(); } $this->insert_html_element( $this->state->current_token ); $this->state->frameset_ok = false; $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; return true; /* * > An end tag whose tag name is "br" * * This is prevented from happening because the Tag Processor * reports all closing BR tags as if they were opening tags. */ /* * > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr" */ case '+AREA': case '+BR': case '+EMBED': case '+IMG': case '+KEYGEN': case '+WBR': $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); $this->state->frameset_ok = false; return true; /* * > A start tag whose tag name is "input" */ case '+INPUT': $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); /* * > If the token does not have an attribute with the name "type", or if it does, * > but that attribute's value is not an ASCII case-insensitive match for the * > string "hidden", then: set the frameset-ok flag to "not ok". */ $type_attribute = $this->get_attribute( 'type' ); if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { $this->state->frameset_ok = false; } return true; /* * > A start tag whose tag name is one of: "param", "source", "track" */ case '+PARAM': case '+SOURCE': case '+TRACK': $this->insert_html_element( $this->state->current_token ); return true; /* * > A start tag whose tag name is "hr" */ case '+HR': if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { $this->close_a_p_element(); } $this->insert_html_element( $this->state->current_token ); $this->state->frameset_ok = false; return true; /* * > A start tag whose tag name is "image" */ case '+IMAGE': /* * > Parse error. Change the token's tag name to "img" and reprocess it. (Don't ask.) * * Note that this is handled elsewhere, so it should not be possible to reach this code. */ $this->bail( "Cannot process an IMAGE tag. (Don't ask.)" ); break; /* * > A start tag whose tag name is "textarea" */ case '+TEXTAREA': $this->insert_html_element( $this->state->current_token ); /* * > If the next token is a U+000A LINE FEED (LF) character token, then ignore * > that token and move on to the next one. (Newlines at the start of * > textarea elements are ignored as an authoring convenience.) * * This is handled in `get_modifiable_text()`. */ $this->state->frameset_ok = false; /* * > Switch the insertion mode to "text". * * As a self-contained node, this behavior is handled in the Tag Processor. */ return true; /* * > A start tag whose tag name is "xmp" */ case '+XMP': if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { $this->close_a_p_element(); } $this->reconstruct_active_formatting_elements(); $this->state->frameset_ok = false; /* * > Follow the generic raw text element parsing algorithm. * * As a self-contained node, this behavior is handled in the Tag Processor. */ $this->insert_html_element( $this->state->current_token ); return true; /* * A start tag whose tag name is "iframe" */ case '+IFRAME': $this->state->frameset_ok = false; /* * > Follow the generic raw text element parsing algorithm. * * As a self-contained node, this behavior is handled in the Tag Processor. */ $this->insert_html_element( $this->state->current_token ); return true; /* * > A start tag whose tag name is "noembed" * > A start tag whose tag name is "noscript", if the scripting flag is enabled * * The scripting flag is never enabled in this parser. */ case '+NOEMBED': $this->insert_html_element( $this->state->current_token ); return true; /* * > A start tag whose tag name is "select" */ case '+SELECT': $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); $this->state->frameset_ok = false; switch ( $this->state->insertion_mode ) { /* * > If the insertion mode is one of "in table", "in caption", "in table body", "in row", * > or "in cell", then switch the insertion mode to "in select in table". */ case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE: case WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION: case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY: case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW: case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL: $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE; break; /* * > Otherwise, switch the insertion mode to "in select". */ default: $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT; break; } return true; /* * > A start tag whose tag name is one of: "optgroup", "option" */ case '+OPTGROUP': case '+OPTION': if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) { $this->state->stack_of_open_elements->pop(); } $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); return true; /* * > A start tag whose tag name is one of: "rb", "rtc" */ case '+RB': case '+RTC': if ( $this->state->stack_of_open_elements->has_element_in_scope( 'RUBY' ) ) { $this->generate_implied_end_tags(); if ( $this->state->stack_of_open_elements->current_node_is( 'RUBY' ) ) { // @todo Indicate a parse error once it's possible. } } $this->insert_html_element( $this->state->current_token ); return true; /* * > A start tag whose tag name is one of: "rp", "rt" */ case '+RP': case '+RT': if ( $this->state->stack_of_open_elements->has_element_in_scope( 'RUBY' ) ) { $this->generate_implied_end_tags( 'RTC' ); $current_node_name = $this->state->stack_of_open_elements->current_node()->node_name; if ( 'RTC' === $current_node_name || 'RUBY' === $current_node_name ) { // @todo Indicate a parse error once it's possible. } } $this->insert_html_element( $this->state->current_token ); return true; /* * > A start tag whose tag name is "math" */ case '+MATH': $this->reconstruct_active_formatting_elements(); /* * @todo Adjust MathML attributes for the token. (This fixes the case of MathML attributes that are not all lowercase.) * @todo Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink.) * * These ought to be handled in the attribute methods. */ $this->state->current_token->namespace = 'math'; $this->insert_html_element( $this->state->current_token ); if ( $this->state->current_token->has_self_closing_flag ) { $this->state->stack_of_open_elements->pop(); } return true; /* * > A start tag whose tag name is "svg" */ case '+SVG': $this->reconstruct_active_formatting_elements(); /* * @todo Adjust SVG attributes for the token. (This fixes the case of SVG attributes that are not all lowercase.) * @todo Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink in SVG.) * * These ought to be handled in the attribute methods. */ $this->state->current_token->namespace = 'svg'; $this->insert_html_element( $this->state->current_token ); if ( $this->state->current_token->has_self_closing_flag ) { $this->state->stack_of_open_elements->pop(); } return true; /* * > A start tag whose tag name is one of: "caption", "col", "colgroup", * > "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr" */ case '+CAPTION': case '+COL': case '+COLGROUP': case '+FRAME': case '+HEAD': case '+TBODY': case '+TD': case '+TFOOT': case '+TH': case '+THEAD': case '+TR': // Parse error. Ignore the token. return $this->step(); } if ( ! parent::is_tag_closer() ) { /* * > Any other start tag */ $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); return true; } else { /* * > Any other end tag */ /* * Find the corresponding tag opener in the stack of open elements, if * it exists before reaching a special element, which provides a kind * of boundary in the stack. For example, a `` should not * close anything beyond its containing `P` or `DIV` element. */ foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { if ( 'html' === $node->namespace && $token_name === $node->node_name ) { break; } if ( self::is_special( $node ) ) { // This is a parse error, ignore the token. return $this->step(); } } $this->generate_implied_end_tags( $token_name ); if ( $node !== $this->state->stack_of_open_elements->current_node() ) { // @todo Record parse error: this error doesn't impact parsing. } foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { $this->state->stack_of_open_elements->pop(); if ( $node === $item ) { return true; } } } $this->bail( 'Should not have been able to reach end of IN BODY processing. Check HTML API code.' ); // This unnecessary return prevents tools from inaccurately reporting type errors. return false; } /** * Parses next element in the 'in table' insertion mode. * * This internal function performs the 'in table' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-intable * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_in_table(): bool { $token_name = $this->get_token_name(); $token_type = $this->get_token_type(); $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; $op = "{$op_sigil}{$token_name}"; switch ( $op ) { /* * > A character token, if the current node is table, * > tbody, template, tfoot, thead, or tr element */ case '#text': $current_node = $this->state->stack_of_open_elements->current_node(); $current_node_name = $current_node ? $current_node->node_name : null; if ( $current_node_name && ( 'TABLE' === $current_node_name || 'TBODY' === $current_node_name || 'TEMPLATE' === $current_node_name || 'TFOOT' === $current_node_name || 'THEAD' === $current_node_name || 'TR' === $current_node_name ) ) { /* * If the text is empty after processing HTML entities and stripping * U+0000 NULL bytes then ignore the token. */ if ( parent::TEXT_IS_NULL_SEQUENCE === $this->text_node_classification ) { return $this->step(); } /* * This follows the rules for "in table text" insertion mode. * * Whitespace-only text nodes are inserted in-place. Otherwise * foster parenting is enabled and the nodes would be * inserted out-of-place. * * > If any of the tokens in the pending table character tokens * > list are character tokens that are not ASCII whitespace, * > then this is a parse error: reprocess the character tokens * > in the pending table character tokens list using the rules * > given in the "anything else" entry in the "in table" * > insertion mode. * > * > Otherwise, insert the characters given by the pending table * > character tokens list. * * @see https://html.spec.whatwg.org/#parsing-main-intabletext */ if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { $this->insert_html_element( $this->state->current_token ); return true; } // Non-whitespace would trigger fostering, unsupported at this time. $this->bail( 'Foster parenting is not supported.' ); break; } break; /* * > A comment token */ case '#comment': case '#funky-comment': case '#presumptuous-tag': $this->insert_html_element( $this->state->current_token ); return true; /* * > A DOCTYPE token */ case 'html': // Parse error: ignore the token. return $this->step(); /* * > A start tag whose tag name is "caption" */ case '+CAPTION': $this->state->stack_of_open_elements->clear_to_table_context(); $this->state->active_formatting_elements->insert_marker(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; return true; /* * > A start tag whose tag name is "colgroup" */ case '+COLGROUP': $this->state->stack_of_open_elements->clear_to_table_context(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; return true; /* * > A start tag whose tag name is "col" */ case '+COL': $this->state->stack_of_open_elements->clear_to_table_context(); /* * > Insert an HTML element for a "colgroup" start tag token with no attributes, * > then switch the insertion mode to "in column group". */ $this->insert_virtual_node( 'COLGROUP' ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ case '+TBODY': case '+TFOOT': case '+THEAD': $this->state->stack_of_open_elements->clear_to_table_context(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return true; /* * > A start tag whose tag name is one of: "td", "th", "tr" */ case '+TD': case '+TH': case '+TR': $this->state->stack_of_open_elements->clear_to_table_context(); /* * > Insert an HTML element for a "tbody" start tag token with no attributes, * > then switch the insertion mode to "in table body". */ $this->insert_virtual_node( 'TBODY' ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > A start tag whose tag name is "table" * * This tag in the IN TABLE insertion mode is a parse error. */ case '+TABLE': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) { return $this->step(); } $this->state->stack_of_open_elements->pop_until( 'TABLE' ); $this->reset_insertion_mode_appropriately(); return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > An end tag whose tag name is "table" */ case '-TABLE': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) { // @todo Indicate a parse error once it's possible. return $this->step(); } $this->state->stack_of_open_elements->pop_until( 'TABLE' ); $this->reset_insertion_mode_appropriately(); return true; /* * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ case '-BODY': case '-CAPTION': case '-COL': case '-COLGROUP': case '-HTML': case '-TBODY': case '-TD': case '-TFOOT': case '-TH': case '-THEAD': case '-TR': // Parse error: ignore the token. return $this->step(); /* * > A start tag whose tag name is one of: "style", "script", "template" * > An end tag whose tag name is "template" */ case '+STYLE': case '+SCRIPT': case '+TEMPLATE': case '-TEMPLATE': /* * > Process the token using the rules for the "in head" insertion mode. */ return $this->step_in_head(); /* * > A start tag whose tag name is "input" * * > If the token does not have an attribute with the name "type", or if it does, but * > that attribute's value is not an ASCII case-insensitive match for the string * > "hidden", then: act as described in the "anything else" entry below. */ case '+INPUT': $type_attribute = $this->get_attribute( 'type' ); if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { goto anything_else; } // @todo Indicate a parse error once it's possible. $this->insert_html_element( $this->state->current_token ); return true; /* * > A start tag whose tag name is "form" * * This tag in the IN TABLE insertion mode is a parse error. */ case '+FORM': if ( $this->state->stack_of_open_elements->has_element_in_scope( 'TEMPLATE' ) || isset( $this->state->form_element ) ) { return $this->step(); } // This FORM is special because it immediately closes and cannot have other children. $this->insert_html_element( $this->state->current_token ); $this->state->form_element = $this->state->current_token; $this->state->stack_of_open_elements->pop(); return true; } /* * > Anything else * > Parse error. Enable foster parenting, process the token using the rules for the * > "in body" insertion mode, and then disable foster parenting. * * @todo Indicate a parse error once it's possible. */ anything_else: $this->bail( 'Foster parenting is not supported.' ); } /** * Parses next element in the 'in table text' insertion mode. * * This internal function performs the 'in table text' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 Stub implementation. * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-intabletext * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_in_table_text(): bool { $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_TEXT . ' state.' ); } /** * Parses next element in the 'in caption' insertion mode. * * This internal function performs the 'in caption' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-incaption * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_in_caption(): bool { $tag_name = $this->get_tag(); $op_sigil = $this->is_tag_closer() ? '-' : '+'; $op = "{$op_sigil}{$tag_name}"; switch ( $op ) { /* * > An end tag whose tag name is "caption" * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr" * > An end tag whose tag name is "table" * * These tag handling rules are identical except for the final instruction. * Handle them in a single block. */ case '-CAPTION': case '+CAPTION': case '+COL': case '+COLGROUP': case '+TBODY': case '+TD': case '+TFOOT': case '+TH': case '+THEAD': case '+TR': case '-TABLE': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'CAPTION' ) ) { // Parse error: ignore the token. return $this->step(); } $this->generate_implied_end_tags(); if ( ! $this->state->stack_of_open_elements->current_node_is( 'CAPTION' ) ) { // @todo Indicate a parse error once it's possible. } $this->state->stack_of_open_elements->pop_until( 'CAPTION' ); $this->state->active_formatting_elements->clear_up_to_last_marker(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; // If this is not a CAPTION end tag, the token should be reprocessed. if ( '-CAPTION' === $op ) { return true; } return $this->step( self::REPROCESS_CURRENT_NODE ); /** * > An end tag whose tag name is one of: "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ case '-BODY': case '-COL': case '-COLGROUP': case '-HTML': case '-TBODY': case '-TD': case '-TFOOT': case '-TH': case '-THEAD': case '-TR': // Parse error: ignore the token. return $this->step(); } /** * > Anything else * > Process the token using the rules for the "in body" insertion mode. */ return $this->step_in_body(); } /** * Parses next element in the 'in column group' insertion mode. * * This internal function performs the 'in column group' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-incolgroup * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_in_column_group(): bool { $token_name = $this->get_token_name(); $token_type = $this->get_token_type(); $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; $op = "{$op_sigil}{$token_name}"; switch ( $op ) { /* * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ case '#text': if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { // Insert the character. $this->insert_html_element( $this->state->current_token ); return true; } goto in_column_group_anything_else; break; /* * > A comment token */ case '#comment': case '#funky-comment': case '#presumptuous-tag': $this->insert_html_element( $this->state->current_token ); return true; /* * > A DOCTYPE token */ case 'html': // @todo Indicate a parse error once it's possible. return $this->step(); /* * > A start tag whose tag name is "html" */ case '+HTML': return $this->step_in_body(); /* * > A start tag whose tag name is "col" */ case '+COL': $this->insert_html_element( $this->state->current_token ); $this->state->stack_of_open_elements->pop(); return true; /* * > An end tag whose tag name is "colgroup" */ case '-COLGROUP': if ( ! $this->state->stack_of_open_elements->current_node_is( 'COLGROUP' ) ) { // @todo Indicate a parse error once it's possible. return $this->step(); } $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; return true; /* * > An end tag whose tag name is "col" */ case '-COL': // Parse error: ignore the token. return $this->step(); /* * > A start tag whose tag name is "template" * > An end tag whose tag name is "template" */ case '+TEMPLATE': case '-TEMPLATE': return $this->step_in_head(); } in_column_group_anything_else: /* * > Anything else */ if ( ! $this->state->stack_of_open_elements->current_node_is( 'COLGROUP' ) ) { // @todo Indicate a parse error once it's possible. return $this->step(); } $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; return $this->step( self::REPROCESS_CURRENT_NODE ); } /** * Parses next element in the 'in table body' insertion mode. * * This internal function performs the 'in table body' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-intbody * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_in_table_body(): bool { $tag_name = $this->get_tag(); $op_sigil = $this->is_tag_closer() ? '-' : '+'; $op = "{$op_sigil}{$tag_name}"; switch ( $op ) { /* * > A start tag whose tag name is "tr" */ case '+TR': $this->state->stack_of_open_elements->clear_to_table_body_context(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; return true; /* * > A start tag whose tag name is one of: "th", "td" */ case '+TH': case '+TD': // @todo Indicate a parse error once it's possible. $this->state->stack_of_open_elements->clear_to_table_body_context(); $this->insert_virtual_node( 'TR' ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ case '-TBODY': case '-TFOOT': case '-THEAD': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { // Parse error: ignore the token. return $this->step(); } $this->state->stack_of_open_elements->clear_to_table_body_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; return true; /* * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "tfoot", "thead" * > An end tag whose tag name is "table" */ case '+CAPTION': case '+COL': case '+COLGROUP': case '+TBODY': case '+TFOOT': case '+THEAD': case '-TABLE': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TBODY' ) && ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'THEAD' ) && ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TFOOT' ) ) { // Parse error: ignore the token. return $this->step(); } $this->state->stack_of_open_elements->clear_to_table_body_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "td", "th", "tr" */ case '-BODY': case '-CAPTION': case '-COL': case '-COLGROUP': case '-HTML': case '-TD': case '-TH': case '-TR': // Parse error: ignore the token. return $this->step(); } /* * > Anything else * > Process the token using the rules for the "in table" insertion mode. */ return $this->step_in_table(); } /** * Parses next element in the 'in row' insertion mode. * * This internal function performs the 'in row' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-intr * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_in_row(): bool { $tag_name = $this->get_tag(); $op_sigil = $this->is_tag_closer() ? '-' : '+'; $op = "{$op_sigil}{$tag_name}"; switch ( $op ) { /* * > A start tag whose tag name is one of: "th", "td" */ case '+TH': case '+TD': $this->state->stack_of_open_elements->clear_to_table_row_context(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; $this->state->active_formatting_elements->insert_marker(); return true; /* * > An end tag whose tag name is "tr" */ case '-TR': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { // Parse error: ignore the token. return $this->step(); } $this->state->stack_of_open_elements->clear_to_table_row_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return true; /* * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr" * > An end tag whose tag name is "table" */ case '+CAPTION': case '+COL': case '+COLGROUP': case '+TBODY': case '+TFOOT': case '+THEAD': case '+TR': case '-TABLE': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { // Parse error: ignore the token. return $this->step(); } $this->state->stack_of_open_elements->clear_to_table_row_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ case '-TBODY': case '-TFOOT': case '-THEAD': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { // Parse error: ignore the token. return $this->step(); } if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { // Ignore the token. return $this->step(); } $this->state->stack_of_open_elements->clear_to_table_row_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "td", "th" */ case '-BODY': case '-CAPTION': case '-COL': case '-COLGROUP': case '-HTML': case '-TD': case '-TH': // Parse error: ignore the token. return $this->step(); } /* * > Anything else * > Process the token using the rules for the "in table" insertion mode. */ return $this->step_in_table(); } /** * Parses next element in the 'in cell' insertion mode. * * This internal function performs the 'in cell' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-intd * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_in_cell(): bool { $tag_name = $this->get_tag(); $op_sigil = $this->is_tag_closer() ? '-' : '+'; $op = "{$op_sigil}{$tag_name}"; switch ( $op ) { /* * > An end tag whose tag name is one of: "td", "th" */ case '-TD': case '-TH': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { // Parse error: ignore the token. return $this->step(); } $this->generate_implied_end_tags(); /* * @todo This needs to check if the current node is an HTML element, meaning that * when SVG and MathML support is added, this needs to differentiate between an * HTML element of the given name, such as `
` stops at tags `TABLE`,
* `TBODY`, `TR`, and `TD`. The `TBODY` and `TR` tags do not appear in
* the original HTML and cannot be used as bookmarks.
*
* @since 6.4.0
*
* @param string $bookmark_name Identifies this particular bookmark.
* @return bool Whether the bookmark was successfully created.
*/
public function set_bookmark( $bookmark_name ): bool {
if ( $this->is_virtual() ) {
_doing_it_wrong(
__METHOD__,
__( 'Cannot set bookmarks on tokens that do no appear in the original HTML text.' ),
'6.8.0'
);
return false;
}
return parent::set_bookmark( "_{$bookmark_name}" );
}
/**
* Checks whether a bookmark with the given name exists.
*
* @since 6.5.0
*
* @param string $bookmark_name Name to identify a bookmark that potentially exists.
* @return bool Whether that bookmark exists.
*/
public function has_bookmark( $bookmark_name ): bool {
return parent::has_bookmark( "_{$bookmark_name}" );
}
/*
* HTML Parsing Algorithms
*/
/**
* Closes a P element.
*
* @since 6.4.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#close-a-p-element
*/
private function close_a_p_element(): void {
$this->generate_implied_end_tags( 'P' );
$this->state->stack_of_open_elements->pop_until( 'P' );
}
/**
* Closes elements that have implied end tags.
*
* @since 6.4.0
* @since 6.7.0 Full spec support.
*
* @see https://html.spec.whatwg.org/#generate-implied-end-tags
*
* @param string|null $except_for_this_element Perform as if this element doesn't exist in the stack of open elements.
*/
private function generate_implied_end_tags( ?string $except_for_this_element = null ): void {
$elements_with_implied_end_tags = array(
'DD',
'DT',
'LI',
'OPTGROUP',
'OPTION',
'P',
'RB',
'RP',
'RT',
'RTC',
);
$no_exclusions = ! isset( $except_for_this_element );
while (
( $no_exclusions || ! $this->state->stack_of_open_elements->current_node_is( $except_for_this_element ) ) &&
in_array( $this->state->stack_of_open_elements->current_node()->node_name, $elements_with_implied_end_tags, true )
) {
$this->state->stack_of_open_elements->pop();
}
}
/**
* Closes elements that have implied end tags, thoroughly.
*
* See the HTML specification for an explanation why this is
* different from generating end tags in the normal sense.
*
* @since 6.4.0
* @since 6.7.0 Full spec support.
*
* @see WP_HTML_Processor::generate_implied_end_tags
* @see https://html.spec.whatwg.org/#generate-implied-end-tags
*/
private function generate_implied_end_tags_thoroughly(): void {
$elements_with_implied_end_tags = array(
'CAPTION',
'COLGROUP',
'DD',
'DT',
'LI',
'OPTGROUP',
'OPTION',
'P',
'RB',
'RP',
'RT',
'RTC',
'TBODY',
'TD',
'TFOOT',
'TH',
'THEAD',
'TR',
);
while ( in_array( $this->state->stack_of_open_elements->current_node()->node_name, $elements_with_implied_end_tags, true ) ) {
$this->state->stack_of_open_elements->pop();
}
}
/**
* Returns the adjusted current node.
*
* > The adjusted current node is the context element if the parser was created as
* > part of the HTML fragment parsing algorithm and the stack of open elements
* > has only one element in it (fragment case); otherwise, the adjusted current
* > node is the current node.
*
* @see https://html.spec.whatwg.org/#adjusted-current-node
*
* @since 6.7.0
*
* @return WP_HTML_Token|null The adjusted current node.
*/
private function get_adjusted_current_node(): ?WP_HTML_Token {
if ( isset( $this->context_node ) && 1 === $this->state->stack_of_open_elements->count() ) {
return $this->context_node;
}
return $this->state->stack_of_open_elements->current_node();
}
/**
* Reconstructs the active formatting elements.
*
* > This has the effect of reopening all the formatting elements that were opened
* > in the current body, cell, or caption (whichever is youngest) that haven't
* > been explicitly closed.
*
* @since 6.4.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#reconstruct-the-active-formatting-elements
*
* @return bool Whether any formatting elements needed to be reconstructed.
*/
private function reconstruct_active_formatting_elements(): bool {
/*
* > If there are no entries in the list of active formatting elements, then there is nothing
* > to reconstruct; stop this algorithm.
*/
if ( 0 === $this->state->active_formatting_elements->count() ) {
return false;
}
$last_entry = $this->state->active_formatting_elements->current_node();
if (
/*
* > If the last (most recently added) entry in the list of active formatting elements is a marker;
* > stop this algorithm.
*/
'marker' === $last_entry->node_name ||
/*
* > If the last (most recently added) entry in the list of active formatting elements is an
* > element that is in the stack of open elements, then there is nothing to reconstruct;
* > stop this algorithm.
*/
$this->state->stack_of_open_elements->contains_node( $last_entry )
) {
return false;
}
$this->bail( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' );
}
/**
* Runs the reset the insertion mode appropriately algorithm.
*
* @since 6.7.0
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately
*/
private function reset_insertion_mode_appropriately(): void {
// Set the first node.
$first_node = null;
foreach ( $this->state->stack_of_open_elements->walk_down() as $first_node ) {
break;
}
/*
* > 1. Let _last_ be false.
*/
$last = false;
foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
/*
* > 2. Let _node_ be the last node in the stack of open elements.
* > 3. _Loop_: If _node_ is the first node in the stack of open elements, then set _last_
* > to true, and, if the parser was created as part of the HTML fragment parsing
* > algorithm (fragment case), set node to the context element passed to
* > that algorithm.
* > …
*/
if ( $node === $first_node ) {
$last = true;
if ( isset( $this->context_node ) ) {
$node = $this->context_node;
}
}
// All of the following rules are for matching HTML elements.
if ( 'html' !== $node->namespace ) {
continue;
}
switch ( $node->node_name ) {
/*
* > 4. If node is a `select` element, run these substeps:
* > 1. If _last_ is true, jump to the step below labeled done.
* > 2. Let _ancestor_ be _node_.
* > 3. _Loop_: If _ancestor_ is the first node in the stack of open elements,
* > jump to the step below labeled done.
* > 4. Let ancestor be the node before ancestor in the stack of open elements.
* > …
* > 7. Jump back to the step labeled _loop_.
* > 8. _Done_: Switch the insertion mode to "in select" and return.
*/
case 'SELECT':
if ( ! $last ) {
foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $ancestor ) {
if ( 'html' !== $ancestor->namespace ) {
continue;
}
switch ( $ancestor->node_name ) {
/*
* > 5. If _ancestor_ is a `template` node, jump to the step below
* > labeled _done_.
*/
case 'TEMPLATE':
break 2;
/*
* > 6. If _ancestor_ is a `table` node, switch the insertion mode to
* > "in select in table" and return.
*/
case 'TABLE':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE;
return;
}
}
}
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT;
return;
/*
* > 5. If _node_ is a `td` or `th` element and _last_ is false, then switch the
* > insertion mode to "in cell" and return.
*/
case 'TD':
case 'TH':
if ( ! $last ) {
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL;
return;
}
break;
/*
* > 6. If _node_ is a `tr` element, then switch the insertion mode to "in row"
* > and return.
*/
case 'TR':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
return;
/*
* > 7. If _node_ is a `tbody`, `thead`, or `tfoot` element, then switch the
* > insertion mode to "in table body" and return.
*/
case 'TBODY':
case 'THEAD':
case 'TFOOT':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
return;
/*
* > 8. If _node_ is a `caption` element, then switch the insertion mode to
* > "in caption" and return.
*/
case 'CAPTION':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION;
return;
/*
* > 9. If _node_ is a `colgroup` element, then switch the insertion mode to
* > "in column group" and return.
*/
case 'COLGROUP':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP;
return;
/*
* > 10. If _node_ is a `table` element, then switch the insertion mode to
* > "in table" and return.
*/
case 'TABLE':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
return;
/*
* > 11. If _node_ is a `template` element, then switch the insertion mode to the
* > current template insertion mode and return.
*/
case 'TEMPLATE':
$this->state->insertion_mode = end( $this->state->stack_of_template_insertion_modes );
return;
/*
* > 12. If _node_ is a `head` element and _last_ is false, then switch the
* > insertion mode to "in head" and return.
*/
case 'HEAD':
if ( ! $last ) {
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD;
return;
}
break;
/*
* > 13. If _node_ is a `body` element, then switch the insertion mode to "in body"
* > and return.
*/
case 'BODY':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
return;
/*
* > 14. If _node_ is a `frameset` element, then switch the insertion mode to
* > "in frameset" and return. (fragment case)
*/
case 'FRAMESET':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET;
return;
/*
* > 15. If _node_ is an `html` element, run these substeps:
* > 1. If the head element pointer is null, switch the insertion mode to
* > "before head" and return. (fragment case)
* > 2. Otherwise, the head element pointer is not null, switch the insertion
* > mode to "after head" and return.
*/
case 'HTML':
$this->state->insertion_mode = isset( $this->state->head_element )
? WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD
: WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD;
return;
}
}
/*
* > 16. If _last_ is true, then switch the insertion mode to "in body"
* > and return. (fragment case)
*
* This is only reachable if `$last` is true, as per the fragment parsing case.
*/
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
}
/**
* Runs the adoption agency algorithm.
*
* @since 6.4.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#adoption-agency-algorithm
*/
private function run_adoption_agency_algorithm(): void {
$budget = 1000;
$subject = $this->get_tag();
$current_node = $this->state->stack_of_open_elements->current_node();
if (
// > If the current node is an HTML element whose tag name is subject
$current_node && $subject === $current_node->node_name &&
// > the current node is not in the list of active formatting elements
! $this->state->active_formatting_elements->contains_node( $current_node )
) {
$this->state->stack_of_open_elements->pop();
return;
}
$outer_loop_counter = 0;
while ( $budget-- > 0 ) {
if ( $outer_loop_counter++ >= 8 ) {
return;
}
/*
* > Let formatting element be the last element in the list of active formatting elements that:
* > - is between the end of the list and the last marker in the list,
* > if any, or the start of the list otherwise,
* > - and has the tag name subject.
*/
$formatting_element = null;
foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
if ( 'marker' === $item->node_name ) {
break;
}
if ( $subject === $item->node_name ) {
$formatting_element = $item;
break;
}
}
// > If there is no such element, then return and instead act as described in the "any other end tag" entry above.
if ( null === $formatting_element ) {
$this->bail( 'Cannot run adoption agency when "any other end tag" is required.' );
}
// > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return.
if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) {
$this->state->active_formatting_elements->remove_node( $formatting_element );
return;
}
// > If formatting element is in the stack of open elements, but the element is not in scope, then this is a parse error; return.
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $formatting_element->node_name ) ) {
return;
}
/*
* > Let furthest block be the topmost node in the stack of open elements that is lower in the stack
* > than formatting element, and is an element in the special category. There might not be one.
*/
$is_above_formatting_element = true;
$furthest_block = null;
foreach ( $this->state->stack_of_open_elements->walk_down() as $item ) {
if ( $is_above_formatting_element && $formatting_element->bookmark_name !== $item->bookmark_name ) {
continue;
}
if ( $is_above_formatting_element ) {
$is_above_formatting_element = false;
continue;
}
if ( self::is_special( $item ) ) {
$furthest_block = $item;
break;
}
}
/*
* > If there is no furthest block, then the UA must first pop all the nodes from the bottom of the
* > stack of open elements, from the current node up to and including formatting element, then
* > remove formatting element from the list of active formatting elements, and finally return.
*/
if ( null === $furthest_block ) {
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
$this->state->stack_of_open_elements->pop();
if ( $formatting_element->bookmark_name === $item->bookmark_name ) {
$this->state->active_formatting_elements->remove_node( $formatting_element );
return;
}
}
}
$this->bail( 'Cannot extract common ancestor in adoption agency algorithm.' );
}
$this->bail( 'Cannot run adoption agency when looping required.' );
}
/**
* Runs the "close the cell" algorithm.
*
* > Where the steps above say to close the cell, they mean to run the following algorithm:
* > 1. Generate implied end tags.
* > 2. If the current node is not now a td element or a th element, then this is a parse error.
* > 3. Pop elements from the stack of open elements stack until a td element or a th element has been popped from the stack.
* > 4. Clear the list of active formatting elements up to the last marker.
* > 5. Switch the insertion mode to "in row".
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#close-the-cell
*
* @since 6.7.0
*/
private function close_cell(): void {
$this->generate_implied_end_tags();
// @todo Parse error if the current node is a "td" or "th" element.
foreach ( $this->state->stack_of_open_elements->walk_up() as $element ) {
$this->state->stack_of_open_elements->pop();
if ( 'TD' === $element->node_name || 'TH' === $element->node_name ) {
break;
}
}
$this->state->active_formatting_elements->clear_up_to_last_marker();
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
}
/**
* Inserts an HTML element on the stack of open elements.
*
* @since 6.4.0
*
* @see https://html.spec.whatwg.org/#insert-a-foreign-element
*
* @param WP_HTML_Token $token Name of bookmark pointing to element in original input HTML.
*/
private function insert_html_element( WP_HTML_Token $token ): void {
$this->state->stack_of_open_elements->push( $token );
}
/**
* Inserts a foreign element on to the stack of open elements.
*
* @since 6.7.0
*
* @see https://html.spec.whatwg.org/#insert-a-foreign-element
*
* @param WP_HTML_Token $token Insert this token. The token's namespace and
* insertion point will be updated correctly.
* @param bool $only_add_to_element_stack Whether to skip the "insert an element at the adjusted
* insertion location" algorithm when adding this element.
*/
private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to_element_stack ): void {
$adjusted_current_node = $this->get_adjusted_current_node();
$token->namespace = $adjusted_current_node ? $adjusted_current_node->namespace : 'html';
if ( $this->is_mathml_integration_point() ) {
$token->integration_node_type = 'math';
} elseif ( $this->is_html_integration_point() ) {
$token->integration_node_type = 'html';
}
if ( false === $only_add_to_element_stack ) {
/*
* @todo Implement the "appropriate place for inserting a node" and the
* "insert an element at the adjusted insertion location" algorithms.
*
* These algorithms mostly impacts DOM tree construction and not the HTML API.
* Here, there's no DOM node onto which the element will be appended, so the
* parser will skip this step.
*
* @see https://html.spec.whatwg.org/#insert-an-element-at-the-adjusted-insertion-location
*/
}
$this->insert_html_element( $token );
}
/**
* Inserts a virtual element on the stack of open elements.
*
* @since 6.7.0
*
* @param string $token_name Name of token to create and insert into the stack of open elements.
* @param string|null $bookmark_name Optional. Name to give bookmark for created virtual node.
* Defaults to auto-creating a bookmark name.
* @return WP_HTML_Token Newly-created virtual token.
*/
private function insert_virtual_node( $token_name, $bookmark_name = null ): WP_HTML_Token {
$here = $this->bookmarks[ $this->state->current_token->bookmark_name ];
$name = $bookmark_name ?? $this->bookmark_token();
$this->bookmarks[ $name ] = new WP_HTML_Span( $here->start, 0 );
$token = new WP_HTML_Token( $name, $token_name, false );
$this->insert_html_element( $token );
return $token;
}
/*
* HTML Specification Helpers
*/
/**
* Indicates if the current token is a MathML integration point.
*
* @since 6.7.0
*
* @see https://html.spec.whatwg.org/#mathml-text-integration-point
*
* @return bool Whether the current token is a MathML integration point.
*/
private function is_mathml_integration_point(): bool {
$current_token = $this->state->current_token;
if ( ! isset( $current_token ) ) {
return false;
}
if ( 'math' !== $current_token->namespace || 'M' !== $current_token->node_name[0] ) {
return false;
}
$tag_name = $current_token->node_name;
return (
'MI' === $tag_name ||
'MO' === $tag_name ||
'MN' === $tag_name ||
'MS' === $tag_name ||
'MTEXT' === $tag_name
);
}
/**
* Indicates if the current token is an HTML integration point.
*
* Note that this method must be an instance method with access
* to the current token, since it needs to examine the attributes
* of the currently-matched tag, if it's in the MathML namespace.
* Otherwise it would be required to scan the HTML and ensure that
* no other accounting is overlooked.
*
* @since 6.7.0
*
* @see https://html.spec.whatwg.org/#html-integration-point
*
* @return bool Whether the current token is an HTML integration point.
*/
private function is_html_integration_point(): bool {
$current_token = $this->state->current_token;
if ( ! isset( $current_token ) ) {
return false;
}
if ( 'html' === $current_token->namespace ) {
return false;
}
$tag_name = $current_token->node_name;
if ( 'svg' === $current_token->namespace ) {
return (
'DESC' === $tag_name ||
'FOREIGNOBJECT' === $tag_name ||
'TITLE' === $tag_name
);
}
if ( 'math' === $current_token->namespace ) {
if ( 'ANNOTATION-XML' !== $tag_name ) {
return false;
}
$encoding = $this->get_attribute( 'encoding' );
return (
is_string( $encoding ) &&
(
0 === strcasecmp( $encoding, 'application/xhtml+xml' ) ||
0 === strcasecmp( $encoding, 'text/html' )
)
);
}
$this->bail( 'Should not have reached end of HTML Integration Point detection: check HTML API code.' );
// This unnecessary return prevents tools from inaccurately reporting type errors.
return false;
}
/**
* Returns whether an element of a given name is in the HTML special category.
*
* @since 6.4.0
*
* @see https://html.spec.whatwg.org/#special
*
* @param WP_HTML_Token|string $tag_name Node to check, or only its name if in the HTML namespace.
* @return bool Whether the element of the given name is in the special category.
*/
public static function is_special( $tag_name ): bool {
if ( is_string( $tag_name ) ) {
$tag_name = strtoupper( $tag_name );
} else {
$tag_name = 'html' === $tag_name->namespace
? strtoupper( $tag_name->node_name )
: "{$tag_name->namespace} {$tag_name->node_name}";
}
return (
'ADDRESS' === $tag_name ||
'APPLET' === $tag_name ||
'AREA' === $tag_name ||
'ARTICLE' === $tag_name ||
'ASIDE' === $tag_name ||
'BASE' === $tag_name ||
'BASEFONT' === $tag_name ||
'BGSOUND' === $tag_name ||
'BLOCKQUOTE' === $tag_name ||
'BODY' === $tag_name ||
'BR' === $tag_name ||
'BUTTON' === $tag_name ||
'CAPTION' === $tag_name ||
'CENTER' === $tag_name ||
'COL' === $tag_name ||
'COLGROUP' === $tag_name ||
'DD' === $tag_name ||
'DETAILS' === $tag_name ||
'DIR' === $tag_name ||
'DIV' === $tag_name ||
'DL' === $tag_name ||
'DT' === $tag_name ||
'EMBED' === $tag_name ||
'FIELDSET' === $tag_name ||
'FIGCAPTION' === $tag_name ||
'FIGURE' === $tag_name ||
'FOOTER' === $tag_name ||
'FORM' === $tag_name ||
'FRAME' === $tag_name ||
'FRAMESET' === $tag_name ||
'H1' === $tag_name ||
'H2' === $tag_name ||
'H3' === $tag_name ||
'H4' === $tag_name ||
'H5' === $tag_name ||
'H6' === $tag_name ||
'HEAD' === $tag_name ||
'HEADER' === $tag_name ||
'HGROUP' === $tag_name ||
'HR' === $tag_name ||
'HTML' === $tag_name ||
'IFRAME' === $tag_name ||
'IMG' === $tag_name ||
'INPUT' === $tag_name ||
'KEYGEN' === $tag_name ||
'LI' === $tag_name ||
'LINK' === $tag_name ||
'LISTING' === $tag_name ||
'MAIN' === $tag_name ||
'MARQUEE' === $tag_name ||
'MENU' === $tag_name ||
'META' === $tag_name ||
'NAV' === $tag_name ||
'NOEMBED' === $tag_name ||
'NOFRAMES' === $tag_name ||
'NOSCRIPT' === $tag_name ||
'OBJECT' === $tag_name ||
'OL' === $tag_name ||
'P' === $tag_name ||
'PARAM' === $tag_name ||
'PLAINTEXT' === $tag_name ||
'PRE' === $tag_name ||
'SCRIPT' === $tag_name ||
'SEARCH' === $tag_name ||
'SECTION' === $tag_name ||
'SELECT' === $tag_name ||
'SOURCE' === $tag_name ||
'STYLE' === $tag_name ||
'SUMMARY' === $tag_name ||
'TABLE' === $tag_name ||
'TBODY' === $tag_name ||
'TD' === $tag_name ||
'TEMPLATE' === $tag_name ||
'TEXTAREA' === $tag_name ||
'TFOOT' === $tag_name ||
'TH' === $tag_name ||
'THEAD' === $tag_name ||
'TITLE' === $tag_name ||
'TR' === $tag_name ||
'TRACK' === $tag_name ||
'UL' === $tag_name ||
'WBR' === $tag_name ||
'XMP' === $tag_name ||
// MathML.
'math MI' === $tag_name ||
'math MO' === $tag_name ||
'math MN' === $tag_name ||
'math MS' === $tag_name ||
'math MTEXT' === $tag_name ||
'math ANNOTATION-XML' === $tag_name ||
// SVG.
'svg DESC' === $tag_name ||
'svg FOREIGNOBJECT' === $tag_name ||
'svg TITLE' === $tag_name
);
}
/**
* Returns whether a given element is an HTML Void Element
*
* > area, base, br, col, embed, hr, img, input, link, meta, source, track, wbr
*
* @since 6.4.0
*
* @see https://html.spec.whatwg.org/#void-elements
*
* @param string $tag_name Name of HTML tag to check.
* @return bool Whether the given tag is an HTML Void Element.
*/
public static function is_void( $tag_name ): bool {
$tag_name = strtoupper( $tag_name );
return (
'AREA' === $tag_name ||
'BASE' === $tag_name ||
'BASEFONT' === $tag_name || // Obsolete but still treated as void.
'BGSOUND' === $tag_name || // Obsolete but still treated as void.
'BR' === $tag_name ||
'COL' === $tag_name ||
'EMBED' === $tag_name ||
'FRAME' === $tag_name ||
'HR' === $tag_name ||
'IMG' === $tag_name ||
'INPUT' === $tag_name ||
'KEYGEN' === $tag_name || // Obsolete but still treated as void.
'LINK' === $tag_name ||
'META' === $tag_name ||
'PARAM' === $tag_name || // Obsolete but still treated as void.
'SOURCE' === $tag_name ||
'TRACK' === $tag_name ||
'WBR' === $tag_name
);
}
/**
* Gets an encoding from a given string.
*
* This is an algorithm defined in the WHAT-WG specification.
*
* Example:
*
* 'UTF-8' === self::get_encoding( 'utf8' );
* 'UTF-8' === self::get_encoding( " \tUTF-8 " );
* null === self::get_encoding( 'UTF-7' );
* null === self::get_encoding( 'utf8; charset=' );
*
* @see https://encoding.spec.whatwg.org/#concept-encoding-get
*
* @todo As this parser only supports UTF-8, only the UTF-8
* encodings are detected. Add more as desired, but the
* parser will bail on non-UTF-8 encodings.
*
* @since 6.7.0
*
* @param string $label A string which may specify a known encoding.
* @return string|null Known encoding if matched, otherwise null.
*/
protected static function get_encoding( string $label ): ?string {
/*
* > Remove any leading and trailing ASCII whitespace from label.
*/
$label = trim( $label, " \t\f\r\n" );
/*
* > If label is an ASCII case-insensitive match for any of the labels listed in the
* > table below, then return the corresponding encoding; otherwise return failure.
*/
switch ( strtolower( $label ) ) {
case 'unicode-1-1-utf-8':
case 'unicode11utf8':
case 'unicode20utf8':
case 'utf-8':
case 'utf8':
case 'x-unicode20utf8':
return 'UTF-8';
default:
return null;
}
}
/*
* Constants that would pollute the top of the class if they were found there.
*/
/**
* Indicates that the next HTML token should be parsed and processed.
*
* @since 6.4.0
*
* @var string
*/
const PROCESS_NEXT_NODE = 'process-next-node';
/**
* Indicates that the current HTML token should be reprocessed in the newly-selected insertion mode.
*
* @since 6.4.0
*
* @var string
*/
const REPROCESS_CURRENT_NODE = 'reprocess-current-node';
/**
* Indicates that the current HTML token should be processed without advancing the parser.
*
* @since 6.5.0
*
* @var string
*/
const PROCESS_CURRENT_NODE = 'process-current-node';
/**
* Indicates that the parser encountered unsupported markup and has bailed.
*
* @since 6.4.0
*
* @var string
*/
const ERROR_UNSUPPORTED = 'unsupported';
/**
* Indicates that the parser encountered more HTML tokens than it
* was able to process and has bailed.
*
* @since 6.4.0
*
* @var string
*/
const ERROR_EXCEEDED_MAX_BOOKMARKS = 'exceeded-max-bookmarks';
/**
* Unlock code that must be passed into the constructor to create this class.
*
* This class extends the WP_HTML_Tag_Processor, which has a public class
* constructor. Therefore, it's not possible to have a private constructor here.
*
* This unlock code is used to ensure that anyone calling the constructor is
* doing so with a full understanding that it's intended to be a private API.
*
* @access private
*/
const CONSTRUCTOR_UNLOCK_CODE = 'Use WP_HTML_Processor::create_fragment() instead of calling the class constructor directly.';
}
|