@wordpress/block-serialization-default-parser
Advanced tools
| <?php | ||
| /** | ||
| * Block Serialization Parser | ||
| * | ||
| * @package WordPress | ||
| */ | ||
| /** | ||
| * Class WP_Block_Parser_Block | ||
| * | ||
| * Holds the block structure in memory | ||
| * | ||
| * @since 5.0.0 | ||
| */ | ||
| class WP_Block_Parser_Block { | ||
| /** | ||
| * Name of block | ||
| * | ||
| * @example "core/paragraph" | ||
| * | ||
| * @since 5.0.0 | ||
| * @var string | ||
| */ | ||
| public $blockName; // phpcs:ignore WordPress.NamingConventions.ValidVariableName | ||
| /** | ||
| * Optional set of attributes from block comment delimiters | ||
| * | ||
| * @example null | ||
| * @example array( 'columns' => 3 ) | ||
| * | ||
| * @since 5.0.0 | ||
| * @var array|null | ||
| */ | ||
| public $attrs; | ||
| /** | ||
| * List of inner blocks (of this same class) | ||
| * | ||
| * @since 5.0.0 | ||
| * @var WP_Block_Parser_Block[] | ||
| */ | ||
| public $innerBlocks; // phpcs:ignore WordPress.NamingConventions.ValidVariableName | ||
| /** | ||
| * Resultant HTML from inside block comment delimiters | ||
| * after removing inner blocks | ||
| * | ||
| * @example "...Just <!-- wp:test /--> testing..." -> "Just testing..." | ||
| * | ||
| * @since 5.0.0 | ||
| * @var string | ||
| */ | ||
| public $innerHTML; // phpcs:ignore WordPress.NamingConventions.ValidVariableName | ||
| /** | ||
| * List of string fragments and null markers where inner blocks were found | ||
| * | ||
| * @example array( | ||
| * 'innerHTML' => 'BeforeInnerAfter', | ||
| * 'innerBlocks' => array( block, block ), | ||
| * 'innerContent' => array( 'Before', null, 'Inner', null, 'After' ), | ||
| * ) | ||
| * | ||
| * @since 4.2.0 | ||
| * @var array | ||
| */ | ||
| public $innerContent; // phpcs:ignore WordPress.NamingConventions.ValidVariableName | ||
| /** | ||
| * Constructor. | ||
| * | ||
| * Will populate object properties from the provided arguments. | ||
| * | ||
| * @since 5.0.0 | ||
| * | ||
| * @param string $name Name of block. | ||
| * @param array $attrs Optional set of attributes from block comment delimiters. | ||
| * @param array $inner_blocks List of inner blocks (of this same class). | ||
| * @param string $inner_html Resultant HTML from inside block comment delimiters after removing inner blocks. | ||
| * @param array $inner_content List of string fragments and null markers where inner blocks were found. | ||
| */ | ||
| public function __construct( $name, $attrs, $inner_blocks, $inner_html, $inner_content ) { | ||
| $this->blockName = $name; // phpcs:ignore WordPress.NamingConventions.ValidVariableName | ||
| $this->attrs = $attrs; | ||
| $this->innerBlocks = $inner_blocks; // phpcs:ignore WordPress.NamingConventions.ValidVariableName | ||
| $this->innerHTML = $inner_html; // phpcs:ignore WordPress.NamingConventions.ValidVariableName | ||
| $this->innerContent = $inner_content; // phpcs:ignore WordPress.NamingConventions.ValidVariableName | ||
| } | ||
| } |
| <?php | ||
| /** | ||
| * Block Serialization Parser | ||
| * | ||
| * @package WordPress | ||
| */ | ||
| /** | ||
| * Class WP_Block_Parser_Frame | ||
| * | ||
| * Holds partial blocks in memory while parsing | ||
| * | ||
| * @internal | ||
| * @since 5.0.0 | ||
| */ | ||
| class WP_Block_Parser_Frame { | ||
| /** | ||
| * Full or partial block | ||
| * | ||
| * @since 5.0.0 | ||
| * @var WP_Block_Parser_Block | ||
| */ | ||
| public $block; | ||
| /** | ||
| * Byte offset into document for start of parse token | ||
| * | ||
| * @since 5.0.0 | ||
| * @var int | ||
| */ | ||
| public $token_start; | ||
| /** | ||
| * Byte length of entire parse token string | ||
| * | ||
| * @since 5.0.0 | ||
| * @var int | ||
| */ | ||
| public $token_length; | ||
| /** | ||
| * Byte offset into document for after parse token ends | ||
| * (used during reconstruction of stack into parse production) | ||
| * | ||
| * @since 5.0.0 | ||
| * @var int | ||
| */ | ||
| public $prev_offset; | ||
| /** | ||
| * Byte offset into document where leading HTML before token starts | ||
| * | ||
| * @since 5.0.0 | ||
| * @var int | ||
| */ | ||
| public $leading_html_start; | ||
| /** | ||
| * Constructor | ||
| * | ||
| * Will populate object properties from the provided arguments. | ||
| * | ||
| * @since 5.0.0 | ||
| * | ||
| * @param WP_Block_Parser_Block $block Full or partial block. | ||
| * @param int $token_start Byte offset into document for start of parse token. | ||
| * @param int $token_length Byte length of entire parse token string. | ||
| * @param int $prev_offset Byte offset into document for after parse token ends. | ||
| * @param int $leading_html_start Byte offset into document where leading HTML before token starts. | ||
| */ | ||
| public function __construct( $block, $token_start, $token_length, $prev_offset = null, $leading_html_start = null ) { | ||
| $this->block = $block; | ||
| $this->token_start = $token_start; | ||
| $this->token_length = $token_length; | ||
| $this->prev_offset = isset( $prev_offset ) ? $prev_offset : $token_start + $token_length; | ||
| $this->leading_html_start = $leading_html_start; | ||
| } | ||
| } |
| <?php | ||
| /** | ||
| * Block Serialization Parser | ||
| * | ||
| * @package WordPress | ||
| */ | ||
| /** | ||
| * Class WP_Block_Parser | ||
| * | ||
| * Parses a document and constructs a list of parsed block objects | ||
| * | ||
| * @since 5.0.0 | ||
| * @since 4.0.0 returns arrays not objects, all attributes are arrays | ||
| */ | ||
| class WP_Block_Parser { | ||
| /** | ||
| * Input document being parsed | ||
| * | ||
| * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->" | ||
| * | ||
| * @since 5.0.0 | ||
| * @var string | ||
| */ | ||
| public $document; | ||
| /** | ||
| * Tracks parsing progress through document | ||
| * | ||
| * @since 5.0.0 | ||
| * @var int | ||
| */ | ||
| public $offset; | ||
| /** | ||
| * List of parsed blocks | ||
| * | ||
| * @since 5.0.0 | ||
| * @var WP_Block_Parser_Block[] | ||
| */ | ||
| public $output; | ||
| /** | ||
| * Stack of partially-parsed structures in memory during parse | ||
| * | ||
| * @since 5.0.0 | ||
| * @var WP_Block_Parser_Frame[] | ||
| */ | ||
| public $stack; | ||
| /** | ||
| * Empty associative array, here due to PHP quirks | ||
| * | ||
| * @since 4.4.0 | ||
| * @var array empty associative array | ||
| */ | ||
| public $empty_attrs; | ||
| /** | ||
| * Parses a document and returns a list of block structures | ||
| * | ||
| * When encountering an invalid parse will return a best-effort | ||
| * parse. In contrast to the specification parser this does not | ||
| * return an error on invalid inputs. | ||
| * | ||
| * @since 5.0.0 | ||
| * | ||
| * @param string $document Input document being parsed. | ||
| * @return array[] | ||
| */ | ||
| public function parse( $document ) { | ||
| $this->document = $document; | ||
| $this->offset = 0; | ||
| $this->output = array(); | ||
| $this->stack = array(); | ||
| $this->empty_attrs = json_decode( '{}', true ); | ||
| while ( $this->proceed() ) { | ||
| continue; | ||
| } | ||
| return $this->output; | ||
| } | ||
| /** | ||
| * Processes the next token from the input document | ||
| * and returns whether to proceed eating more tokens | ||
| * | ||
| * This is the "next step" function that essentially | ||
| * takes a token as its input and decides what to do | ||
| * with that token before descending deeper into a | ||
| * nested block tree or continuing along the document | ||
| * or breaking out of a level of nesting. | ||
| * | ||
| * @internal | ||
| * @since 5.0.0 | ||
| * @return bool | ||
| */ | ||
| public function proceed() { | ||
| $next_token = $this->next_token(); | ||
| list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token; | ||
| $stack_depth = count( $this->stack ); | ||
| // we may have some HTML soup before the next block. | ||
| $leading_html_start = $start_offset > $this->offset ? $this->offset : null; | ||
| switch ( $token_type ) { | ||
| case 'no-more-tokens': | ||
| // if not in a block then flush output. | ||
| if ( 0 === $stack_depth ) { | ||
| $this->add_freeform(); | ||
| return false; | ||
| } | ||
| /* | ||
| * Otherwise we have a problem | ||
| * This is an error | ||
| * | ||
| * we have options | ||
| * - treat it all as freeform text | ||
| * - assume an implicit closer (easiest when not nesting) | ||
| */ | ||
| // for the easy case we'll assume an implicit closer. | ||
| if ( 1 === $stack_depth ) { | ||
| $this->add_block_from_stack(); | ||
| return false; | ||
| } | ||
| /* | ||
| * for the nested case where it's more difficult we'll | ||
| * have to assume that multiple closers are missing | ||
| * and so we'll collapse the whole stack piecewise | ||
| */ | ||
| while ( 0 < count( $this->stack ) ) { | ||
| $this->add_block_from_stack(); | ||
| } | ||
| return false; | ||
| case 'void-block': | ||
| /* | ||
| * easy case is if we stumbled upon a void block | ||
| * in the top-level of the document | ||
| */ | ||
| if ( 0 === $stack_depth ) { | ||
| if ( isset( $leading_html_start ) ) { | ||
| $this->output[] = (array) $this->freeform( | ||
| substr( | ||
| $this->document, | ||
| $leading_html_start, | ||
| $start_offset - $leading_html_start | ||
| ) | ||
| ); | ||
| } | ||
| $this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ); | ||
| $this->offset = $start_offset + $token_length; | ||
| return true; | ||
| } | ||
| // otherwise we found an inner block. | ||
| $this->add_inner_block( | ||
| new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), | ||
| $start_offset, | ||
| $token_length | ||
| ); | ||
| $this->offset = $start_offset + $token_length; | ||
| return true; | ||
| case 'block-opener': | ||
| // track all newly-opened blocks on the stack. | ||
| array_push( | ||
| $this->stack, | ||
| new WP_Block_Parser_Frame( | ||
| new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), | ||
| $start_offset, | ||
| $token_length, | ||
| $start_offset + $token_length, | ||
| $leading_html_start | ||
| ) | ||
| ); | ||
| $this->offset = $start_offset + $token_length; | ||
| return true; | ||
| case 'block-closer': | ||
| /* | ||
| * if we're missing an opener we're in trouble | ||
| * This is an error | ||
| */ | ||
| if ( 0 === $stack_depth ) { | ||
| /* | ||
| * we have options | ||
| * - assume an implicit opener | ||
| * - assume _this_ is the opener | ||
| * - give up and close out the document | ||
| */ | ||
| $this->add_freeform(); | ||
| return false; | ||
| } | ||
| // if we're not nesting then this is easy - close the block. | ||
| if ( 1 === $stack_depth ) { | ||
| $this->add_block_from_stack( $start_offset ); | ||
| $this->offset = $start_offset + $token_length; | ||
| return true; | ||
| } | ||
| /* | ||
| * otherwise we're nested and we have to close out the current | ||
| * block and add it as a new innerBlock to the parent | ||
| */ | ||
| $stack_top = array_pop( $this->stack ); | ||
| $html = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset ); | ||
| $stack_top->block->innerHTML .= $html; | ||
| $stack_top->block->innerContent[] = $html; | ||
| $stack_top->prev_offset = $start_offset + $token_length; | ||
| $this->add_inner_block( | ||
| $stack_top->block, | ||
| $stack_top->token_start, | ||
| $stack_top->token_length, | ||
| $start_offset + $token_length | ||
| ); | ||
| $this->offset = $start_offset + $token_length; | ||
| return true; | ||
| default: | ||
| // This is an error. | ||
| $this->add_freeform(); | ||
| return false; | ||
| } | ||
| } | ||
| /** | ||
| * Scans the document from where we last left off | ||
| * and finds the next valid token to parse if it exists | ||
| * | ||
| * Returns the type of the find: kind of find, block information, attributes | ||
| * | ||
| * @internal | ||
| * @since 5.0.0 | ||
| * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments | ||
| * @return array | ||
| */ | ||
| public function next_token() { | ||
| $matches = null; | ||
| /* | ||
| * aye the magic | ||
| * we're using a single RegExp to tokenize the block comment delimiters | ||
| * we're also using a trick here because the only difference between a | ||
| * block opener and a block closer is the leading `/` before `wp:` (and | ||
| * a closer has no attributes). we can trap them both and process the | ||
| * match back in PHP to see which one it was. | ||
| */ | ||
| $has_match = preg_match( | ||
| '/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s', | ||
| $this->document, | ||
| $matches, | ||
| PREG_OFFSET_CAPTURE, | ||
| $this->offset | ||
| ); | ||
| // if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE. | ||
| if ( false === $has_match ) { | ||
| return array( 'no-more-tokens', null, null, null, null ); | ||
| } | ||
| // we have no more tokens. | ||
| if ( 0 === $has_match ) { | ||
| return array( 'no-more-tokens', null, null, null, null ); | ||
| } | ||
| list( $match, $started_at ) = $matches[0]; | ||
| $length = strlen( $match ); | ||
| $is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1]; | ||
| $is_void = isset( $matches['void'] ) && -1 !== $matches['void'][1]; | ||
| $namespace = $matches['namespace']; | ||
| $namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/'; | ||
| $name = $namespace . $matches['name'][0]; | ||
| $has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1]; | ||
| /* | ||
| * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays | ||
| * are associative arrays. If we use `array()` we get a JSON `[]` | ||
| */ | ||
| $attrs = $has_attrs | ||
| ? json_decode( $matches['attrs'][0], /* as-associative */ true ) | ||
| : $this->empty_attrs; | ||
| /* | ||
| * This state isn't allowed | ||
| * This is an error | ||
| */ | ||
| if ( $is_closer && ( $is_void || $has_attrs ) ) { | ||
| // we can ignore them since they don't hurt anything. | ||
| } | ||
| if ( $is_void ) { | ||
| return array( 'void-block', $name, $attrs, $started_at, $length ); | ||
| } | ||
| if ( $is_closer ) { | ||
| return array( 'block-closer', $name, null, $started_at, $length ); | ||
| } | ||
| return array( 'block-opener', $name, $attrs, $started_at, $length ); | ||
| } | ||
| /** | ||
| * Returns a new block object for freeform HTML | ||
| * | ||
| * @internal | ||
| * @since 3.9.0 | ||
| * | ||
| * @param string $inner_html HTML content of block. | ||
| * @return WP_Block_Parser_Block freeform block object. | ||
| */ | ||
| public function freeform( $inner_html ) { | ||
| return new WP_Block_Parser_Block( null, $this->empty_attrs, array(), $inner_html, array( $inner_html ) ); | ||
| } | ||
| /** | ||
| * Pushes a length of text from the input document | ||
| * to the output list as a freeform block. | ||
| * | ||
| * @internal | ||
| * @since 5.0.0 | ||
| * @param null $length how many bytes of document text to output. | ||
| */ | ||
| public function add_freeform( $length = null ) { | ||
| $length = $length ? $length : strlen( $this->document ) - $this->offset; | ||
| if ( 0 === $length ) { | ||
| return; | ||
| } | ||
| $this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) ); | ||
| } | ||
| /** | ||
| * Given a block structure from memory pushes | ||
| * a new block to the output list. | ||
| * | ||
| * @internal | ||
| * @since 5.0.0 | ||
| * @param WP_Block_Parser_Block $block The block to add to the output. | ||
| * @param int $token_start Byte offset into the document where the first token for the block starts. | ||
| * @param int $token_length Byte length of entire block from start of opening token to end of closing token. | ||
| * @param int|null $last_offset Last byte offset into document if continuing form earlier output. | ||
| */ | ||
| public function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) { | ||
| $parent = $this->stack[ count( $this->stack ) - 1 ]; | ||
| $parent->block->innerBlocks[] = (array) $block; | ||
| $html = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset ); | ||
| if ( ! empty( $html ) ) { | ||
| $parent->block->innerHTML .= $html; | ||
| $parent->block->innerContent[] = $html; | ||
| } | ||
| $parent->block->innerContent[] = null; | ||
| $parent->prev_offset = $last_offset ? $last_offset : $token_start + $token_length; | ||
| } | ||
| /** | ||
| * Pushes the top block from the parsing stack to the output list. | ||
| * | ||
| * @internal | ||
| * @since 5.0.0 | ||
| * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML. | ||
| */ | ||
| public function add_block_from_stack( $end_offset = null ) { | ||
| $stack_top = array_pop( $this->stack ); | ||
| $prev_offset = $stack_top->prev_offset; | ||
| $html = isset( $end_offset ) | ||
| ? substr( $this->document, $prev_offset, $end_offset - $prev_offset ) | ||
| : substr( $this->document, $prev_offset ); | ||
| if ( ! empty( $html ) ) { | ||
| $stack_top->block->innerHTML .= $html; | ||
| $stack_top->block->innerContent[] = $html; | ||
| } | ||
| if ( isset( $stack_top->leading_html_start ) ) { | ||
| $this->output[] = (array) $this->freeform( | ||
| substr( | ||
| $this->document, | ||
| $stack_top->leading_html_start, | ||
| $stack_top->token_start - $stack_top->leading_html_start | ||
| ) | ||
| ); | ||
| } | ||
| $this->output[] = (array) $stack_top->block; | ||
| } | ||
| } | ||
| /** | ||
| * WP_Block_Parser_Block class. | ||
| * | ||
| * Required for backward compatibility in WordPress Core. | ||
| */ | ||
| require_once __DIR__ . '/class-wp-block-parser-block.php'; | ||
| /** | ||
| * WP_Block_Parser_Frame class. | ||
| * | ||
| * Required for backward compatibility in WordPress Core. | ||
| */ | ||
| require_once __DIR__ . '/class-wp-block-parser-frame.php'; |
+2
-2
| { | ||
| "name": "@wordpress/block-serialization-default-parser", | ||
| "version": "4.35.0", | ||
| "version": "4.35.1", | ||
| "description": "Block serialization specification parser for WordPress posts.", | ||
@@ -36,3 +36,3 @@ "author": "The WordPress Contributors", | ||
| }, | ||
| "gitHead": "a92f606309b1541b834ff9b0a76ed2a466fc45ed" | ||
| "gitHead": "ce5639111c30763dbdf07f40eeb136ea6030ecf1" | ||
| } |
+4
-548
@@ -8,549 +8,5 @@ <?php | ||
| /** | ||
| * Class WP_Block_Parser_Block | ||
| * | ||
| * Holds the block structure in memory | ||
| * | ||
| * @since 5.0.0 | ||
| */ | ||
| class WP_Block_Parser_Block { | ||
| /** | ||
| * Name of block | ||
| * | ||
| * @example "core/paragraph" | ||
| * | ||
| * @since 5.0.0 | ||
| * @var string | ||
| */ | ||
| public $blockName; | ||
| /** | ||
| * Optional set of attributes from block comment delimiters | ||
| * | ||
| * @example null | ||
| * @example array( 'columns' => 3 ) | ||
| * | ||
| * @since 5.0.0 | ||
| * @var array|null | ||
| */ | ||
| public $attrs; | ||
| /** | ||
| * List of inner blocks (of this same class) | ||
| * | ||
| * @since 5.0.0 | ||
| * @var WP_Block_Parser_Block[] | ||
| */ | ||
| public $innerBlocks; | ||
| /** | ||
| * Resultant HTML from inside block comment delimiters | ||
| * after removing inner blocks | ||
| * | ||
| * @example "...Just <!-- wp:test /--> testing..." -> "Just testing..." | ||
| * | ||
| * @since 5.0.0 | ||
| * @var string | ||
| */ | ||
| public $innerHTML; | ||
| /** | ||
| * List of string fragments and null markers where inner blocks were found | ||
| * | ||
| * @example array( | ||
| * 'innerHTML' => 'BeforeInnerAfter', | ||
| * 'innerBlocks' => array( block, block ), | ||
| * 'innerContent' => array( 'Before', null, 'Inner', null, 'After' ), | ||
| * ) | ||
| * | ||
| * @since 4.2.0 | ||
| * @var array | ||
| */ | ||
| public $innerContent; | ||
| /** | ||
| * Constructor. | ||
| * | ||
| * Will populate object properties from the provided arguments. | ||
| * | ||
| * @since 5.0.0 | ||
| * | ||
| * @param string $name Name of block. | ||
| * @param array $attrs Optional set of attributes from block comment delimiters. | ||
| * @param array $innerBlocks List of inner blocks (of this same class). | ||
| * @param string $innerHTML Resultant HTML from inside block comment delimiters after removing inner blocks. | ||
| * @param array $innerContent List of string fragments and null markers where inner blocks were found. | ||
| */ | ||
| public function __construct( $name, $attrs, $innerBlocks, $innerHTML, $innerContent ) { | ||
| $this->blockName = $name; | ||
| $this->attrs = $attrs; | ||
| $this->innerBlocks = $innerBlocks; | ||
| $this->innerHTML = $innerHTML; | ||
| $this->innerContent = $innerContent; | ||
| } | ||
| } | ||
| /** | ||
| * Class WP_Block_Parser_Frame | ||
| * | ||
| * Holds partial blocks in memory while parsing | ||
| * | ||
| * @internal | ||
| * @since 5.0.0 | ||
| */ | ||
| class WP_Block_Parser_Frame { | ||
| /** | ||
| * Full or partial block | ||
| * | ||
| * @since 5.0.0 | ||
| * @var WP_Block_Parser_Block | ||
| */ | ||
| public $block; | ||
| /** | ||
| * Byte offset into document for start of parse token | ||
| * | ||
| * @since 5.0.0 | ||
| * @var int | ||
| */ | ||
| public $token_start; | ||
| /** | ||
| * Byte length of entire parse token string | ||
| * | ||
| * @since 5.0.0 | ||
| * @var int | ||
| */ | ||
| public $token_length; | ||
| /** | ||
| * Byte offset into document for after parse token ends | ||
| * (used during reconstruction of stack into parse production) | ||
| * | ||
| * @since 5.0.0 | ||
| * @var int | ||
| */ | ||
| public $prev_offset; | ||
| /** | ||
| * Byte offset into document where leading HTML before token starts | ||
| * | ||
| * @since 5.0.0 | ||
| * @var int | ||
| */ | ||
| public $leading_html_start; | ||
| /** | ||
| * Constructor | ||
| * | ||
| * Will populate object properties from the provided arguments. | ||
| * | ||
| * @since 5.0.0 | ||
| * | ||
| * @param WP_Block_Parser_Block $block Full or partial block. | ||
| * @param int $token_start Byte offset into document for start of parse token. | ||
| * @param int $token_length Byte length of entire parse token string. | ||
| * @param int $prev_offset Byte offset into document for after parse token ends. | ||
| * @param int $leading_html_start Byte offset into document where leading HTML before token starts. | ||
| */ | ||
| public function __construct( $block, $token_start, $token_length, $prev_offset = null, $leading_html_start = null ) { | ||
| $this->block = $block; | ||
| $this->token_start = $token_start; | ||
| $this->token_length = $token_length; | ||
| $this->prev_offset = isset( $prev_offset ) ? $prev_offset : $token_start + $token_length; | ||
| $this->leading_html_start = $leading_html_start; | ||
| } | ||
| } | ||
| /** | ||
| * Class WP_Block_Parser | ||
| * | ||
| * Parses a document and constructs a list of parsed block objects | ||
| * | ||
| * @since 5.0.0 | ||
| * @since 4.0.0 returns arrays not objects, all attributes are arrays | ||
| */ | ||
| class WP_Block_Parser { | ||
| /** | ||
| * Input document being parsed | ||
| * | ||
| * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->" | ||
| * | ||
| * @since 5.0.0 | ||
| * @var string | ||
| */ | ||
| public $document; | ||
| /** | ||
| * Tracks parsing progress through document | ||
| * | ||
| * @since 5.0.0 | ||
| * @var int | ||
| */ | ||
| public $offset; | ||
| /** | ||
| * List of parsed blocks | ||
| * | ||
| * @since 5.0.0 | ||
| * @var WP_Block_Parser_Block[] | ||
| */ | ||
| public $output; | ||
| /** | ||
| * Stack of partially-parsed structures in memory during parse | ||
| * | ||
| * @since 5.0.0 | ||
| * @var WP_Block_Parser_Frame[] | ||
| */ | ||
| public $stack; | ||
| /** | ||
| * Empty associative array, here due to PHP quirks | ||
| * | ||
| * @since 4.4.0 | ||
| * @var array empty associative array | ||
| */ | ||
| public $empty_attrs; | ||
| /** | ||
| * Parses a document and returns a list of block structures | ||
| * | ||
| * When encountering an invalid parse will return a best-effort | ||
| * parse. In contrast to the specification parser this does not | ||
| * return an error on invalid inputs. | ||
| * | ||
| * @since 5.0.0 | ||
| * | ||
| * @param string $document Input document being parsed. | ||
| * @return array[] | ||
| */ | ||
| public function parse( $document ) { | ||
| $this->document = $document; | ||
| $this->offset = 0; | ||
| $this->output = array(); | ||
| $this->stack = array(); | ||
| $this->empty_attrs = json_decode( '{}', true ); | ||
| while ( $this->proceed() ) { | ||
| continue; | ||
| } | ||
| return $this->output; | ||
| } | ||
| /** | ||
| * Processes the next token from the input document | ||
| * and returns whether to proceed eating more tokens | ||
| * | ||
| * This is the "next step" function that essentially | ||
| * takes a token as its input and decides what to do | ||
| * with that token before descending deeper into a | ||
| * nested block tree or continuing along the document | ||
| * or breaking out of a level of nesting. | ||
| * | ||
| * @internal | ||
| * @since 5.0.0 | ||
| * @return bool | ||
| */ | ||
| public function proceed() { | ||
| $next_token = $this->next_token(); | ||
| list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token; | ||
| $stack_depth = count( $this->stack ); | ||
| // we may have some HTML soup before the next block. | ||
| $leading_html_start = $start_offset > $this->offset ? $this->offset : null; | ||
| switch ( $token_type ) { | ||
| case 'no-more-tokens': | ||
| // if not in a block then flush output. | ||
| if ( 0 === $stack_depth ) { | ||
| $this->add_freeform(); | ||
| return false; | ||
| } | ||
| /* | ||
| * Otherwise we have a problem | ||
| * This is an error | ||
| * | ||
| * we have options | ||
| * - treat it all as freeform text | ||
| * - assume an implicit closer (easiest when not nesting) | ||
| */ | ||
| // for the easy case we'll assume an implicit closer. | ||
| if ( 1 === $stack_depth ) { | ||
| $this->add_block_from_stack(); | ||
| return false; | ||
| } | ||
| /* | ||
| * for the nested case where it's more difficult we'll | ||
| * have to assume that multiple closers are missing | ||
| * and so we'll collapse the whole stack piecewise | ||
| */ | ||
| while ( 0 < count( $this->stack ) ) { | ||
| $this->add_block_from_stack(); | ||
| } | ||
| return false; | ||
| case 'void-block': | ||
| /* | ||
| * easy case is if we stumbled upon a void block | ||
| * in the top-level of the document | ||
| */ | ||
| if ( 0 === $stack_depth ) { | ||
| if ( isset( $leading_html_start ) ) { | ||
| $this->output[] = (array) $this->freeform( | ||
| substr( | ||
| $this->document, | ||
| $leading_html_start, | ||
| $start_offset - $leading_html_start | ||
| ) | ||
| ); | ||
| } | ||
| $this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ); | ||
| $this->offset = $start_offset + $token_length; | ||
| return true; | ||
| } | ||
| // otherwise we found an inner block. | ||
| $this->add_inner_block( | ||
| new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), | ||
| $start_offset, | ||
| $token_length | ||
| ); | ||
| $this->offset = $start_offset + $token_length; | ||
| return true; | ||
| case 'block-opener': | ||
| // track all newly-opened blocks on the stack. | ||
| array_push( | ||
| $this->stack, | ||
| new WP_Block_Parser_Frame( | ||
| new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), | ||
| $start_offset, | ||
| $token_length, | ||
| $start_offset + $token_length, | ||
| $leading_html_start | ||
| ) | ||
| ); | ||
| $this->offset = $start_offset + $token_length; | ||
| return true; | ||
| case 'block-closer': | ||
| /* | ||
| * if we're missing an opener we're in trouble | ||
| * This is an error | ||
| */ | ||
| if ( 0 === $stack_depth ) { | ||
| /* | ||
| * we have options | ||
| * - assume an implicit opener | ||
| * - assume _this_ is the opener | ||
| * - give up and close out the document | ||
| */ | ||
| $this->add_freeform(); | ||
| return false; | ||
| } | ||
| // if we're not nesting then this is easy - close the block. | ||
| if ( 1 === $stack_depth ) { | ||
| $this->add_block_from_stack( $start_offset ); | ||
| $this->offset = $start_offset + $token_length; | ||
| return true; | ||
| } | ||
| /* | ||
| * otherwise we're nested and we have to close out the current | ||
| * block and add it as a new innerBlock to the parent | ||
| */ | ||
| $stack_top = array_pop( $this->stack ); | ||
| $html = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset ); | ||
| $stack_top->block->innerHTML .= $html; | ||
| $stack_top->block->innerContent[] = $html; | ||
| $stack_top->prev_offset = $start_offset + $token_length; | ||
| $this->add_inner_block( | ||
| $stack_top->block, | ||
| $stack_top->token_start, | ||
| $stack_top->token_length, | ||
| $start_offset + $token_length | ||
| ); | ||
| $this->offset = $start_offset + $token_length; | ||
| return true; | ||
| default: | ||
| // This is an error. | ||
| $this->add_freeform(); | ||
| return false; | ||
| } | ||
| } | ||
| /** | ||
| * Scans the document from where we last left off | ||
| * and finds the next valid token to parse if it exists | ||
| * | ||
| * Returns the type of the find: kind of find, block information, attributes | ||
| * | ||
| * @internal | ||
| * @since 5.0.0 | ||
| * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments | ||
| * @return array | ||
| */ | ||
| public function next_token() { | ||
| $matches = null; | ||
| /* | ||
| * aye the magic | ||
| * we're using a single RegExp to tokenize the block comment delimiters | ||
| * we're also using a trick here because the only difference between a | ||
| * block opener and a block closer is the leading `/` before `wp:` (and | ||
| * a closer has no attributes). we can trap them both and process the | ||
| * match back in PHP to see which one it was. | ||
| */ | ||
| $has_match = preg_match( | ||
| '/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s', | ||
| $this->document, | ||
| $matches, | ||
| PREG_OFFSET_CAPTURE, | ||
| $this->offset | ||
| ); | ||
| // if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE. | ||
| if ( false === $has_match ) { | ||
| return array( 'no-more-tokens', null, null, null, null ); | ||
| } | ||
| // we have no more tokens. | ||
| if ( 0 === $has_match ) { | ||
| return array( 'no-more-tokens', null, null, null, null ); | ||
| } | ||
| list( $match, $started_at ) = $matches[0]; | ||
| $length = strlen( $match ); | ||
| $is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1]; | ||
| $is_void = isset( $matches['void'] ) && -1 !== $matches['void'][1]; | ||
| $namespace = $matches['namespace']; | ||
| $namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/'; | ||
| $name = $namespace . $matches['name'][0]; | ||
| $has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1]; | ||
| /* | ||
| * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays | ||
| * are associative arrays. If we use `array()` we get a JSON `[]` | ||
| */ | ||
| $attrs = $has_attrs | ||
| ? json_decode( $matches['attrs'][0], /* as-associative */ true ) | ||
| : $this->empty_attrs; | ||
| /* | ||
| * This state isn't allowed | ||
| * This is an error | ||
| */ | ||
| if ( $is_closer && ( $is_void || $has_attrs ) ) { | ||
| // we can ignore them since they don't hurt anything. | ||
| } | ||
| if ( $is_void ) { | ||
| return array( 'void-block', $name, $attrs, $started_at, $length ); | ||
| } | ||
| if ( $is_closer ) { | ||
| return array( 'block-closer', $name, null, $started_at, $length ); | ||
| } | ||
| return array( 'block-opener', $name, $attrs, $started_at, $length ); | ||
| } | ||
| /** | ||
| * Returns a new block object for freeform HTML | ||
| * | ||
| * @internal | ||
| * @since 3.9.0 | ||
| * | ||
| * @param string $innerHTML HTML content of block. | ||
| * @return WP_Block_Parser_Block freeform block object. | ||
| */ | ||
| public function freeform( $innerHTML ) { | ||
| return new WP_Block_Parser_Block( null, $this->empty_attrs, array(), $innerHTML, array( $innerHTML ) ); | ||
| } | ||
| /** | ||
| * Pushes a length of text from the input document | ||
| * to the output list as a freeform block. | ||
| * | ||
| * @internal | ||
| * @since 5.0.0 | ||
| * @param null $length how many bytes of document text to output. | ||
| */ | ||
| public function add_freeform( $length = null ) { | ||
| $length = $length ? $length : strlen( $this->document ) - $this->offset; | ||
| if ( 0 === $length ) { | ||
| return; | ||
| } | ||
| $this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) ); | ||
| } | ||
| /** | ||
| * Given a block structure from memory pushes | ||
| * a new block to the output list. | ||
| * | ||
| * @internal | ||
| * @since 5.0.0 | ||
| * @param WP_Block_Parser_Block $block The block to add to the output. | ||
| * @param int $token_start Byte offset into the document where the first token for the block starts. | ||
| * @param int $token_length Byte length of entire block from start of opening token to end of closing token. | ||
| * @param int|null $last_offset Last byte offset into document if continuing form earlier output. | ||
| */ | ||
| public function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) { | ||
| $parent = $this->stack[ count( $this->stack ) - 1 ]; | ||
| $parent->block->innerBlocks[] = (array) $block; | ||
| $html = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset ); | ||
| if ( ! empty( $html ) ) { | ||
| $parent->block->innerHTML .= $html; | ||
| $parent->block->innerContent[] = $html; | ||
| } | ||
| $parent->block->innerContent[] = null; | ||
| $parent->prev_offset = $last_offset ? $last_offset : $token_start + $token_length; | ||
| } | ||
| /** | ||
| * Pushes the top block from the parsing stack to the output list. | ||
| * | ||
| * @internal | ||
| * @since 5.0.0 | ||
| * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML. | ||
| */ | ||
| public function add_block_from_stack( $end_offset = null ) { | ||
| $stack_top = array_pop( $this->stack ); | ||
| $prev_offset = $stack_top->prev_offset; | ||
| $html = isset( $end_offset ) | ||
| ? substr( $this->document, $prev_offset, $end_offset - $prev_offset ) | ||
| : substr( $this->document, $prev_offset ); | ||
| if ( ! empty( $html ) ) { | ||
| $stack_top->block->innerHTML .= $html; | ||
| $stack_top->block->innerContent[] = $html; | ||
| } | ||
| if ( isset( $stack_top->leading_html_start ) ) { | ||
| $this->output[] = (array) $this->freeform( | ||
| substr( | ||
| $this->document, | ||
| $stack_top->leading_html_start, | ||
| $stack_top->token_start - $stack_top->leading_html_start | ||
| ) | ||
| ); | ||
| } | ||
| $this->output[] = (array) $stack_top->block; | ||
| } | ||
| } | ||
| // Require files. | ||
| require_once __DIR__ . '/class-wp-block-parser-block.php'; | ||
| require_once __DIR__ . '/class-wp-block-parser-frame.php'; | ||
| require_once __DIR__ . '/class-wp-block-parser.php'; |
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
160823
0.78%19
18.75%