platform/api/symfony/Component/DomCrawler/Crawler.yaml

683 lines
16 KiB
YAML
Raw Normal View History

2024-09-02 17:44:11 +00:00
name: Crawler
class_comment: '# * Crawler eases navigation of a list of \DOMNode objects.
# *
# * @author Fabien Potencier <fabien@symfony.com>
# *
# * @implements \IteratorAggregate<int, \DOMNode>'
dependencies:
- name: HTML5
type: class
source: Masterminds\HTML5
- name: CssSelectorConverter
type: class
source: Symfony\Component\CssSelector\CssSelectorConverter
properties: []
methods:
- name: __construct
visibility: public
parameters:
- name: node
default: 'null'
- name: uri
default: 'null'
- name: baseHref
default: 'null'
- name: useHtml5Parser
default: 'true'
comment: "# * Crawler eases navigation of a list of \\DOMNode objects.\n# *\n# *\
\ @author Fabien Potencier <fabien@symfony.com>\n# *\n# * @implements \\IteratorAggregate<int,\
\ \\DOMNode>\n# */\n# class Crawler implements \\Countable, \\IteratorAggregate\n\
# {\n# /**\n# * The default namespace prefix to be used with XPath and CSS expressions.\n\
# */\n# private string $defaultNamespacePrefix = 'default';\n# \n# /**\n# * A\
\ map of manually registered namespaces.\n# *\n# * @var array<string, string>\n\
# */\n# private array $namespaces = [];\n# \n# /**\n# * A map of cached namespaces.\n\
# */\n# private \\ArrayObject $cachedNamespaces;\n# \n# private ?string $baseHref;\n\
# private ?\\DOMDocument $document = null;\n# \n# /**\n# * @var list<\\DOMNode>\n\
# */\n# private array $nodes = [];\n# \n# /**\n# * Whether the Crawler contains\
\ HTML or XML content (used when converting CSS to XPath).\n# */\n# private bool\
\ $isHtml = true;\n# \n# private ?HTML5 $html5Parser = null;\n# \n# /**\n# * @param\
\ \\DOMNodeList|\\DOMNode|\\DOMNode[]|string|null $node A Node to use as the base\
\ for the crawling"
- name: getUri
visibility: public
parameters: []
comment: '# * Returns the current URI.'
- name: getBaseHref
visibility: public
parameters: []
comment: '# * Returns base href.'
- name: clear
visibility: public
parameters: []
comment: '# * Removes all the nodes.'
- name: add
visibility: public
parameters:
- name: node
comment: '# * Adds a node to the current list of nodes.
# *
# * This method uses the appropriate specialized add*() method based
# * on the type of the argument.
# *
# * @param \DOMNodeList|\DOMNode|\DOMNode[]|string|null $node A node
# *
# * @throws \InvalidArgumentException when node is not the expected type'
- name: addContent
visibility: public
parameters:
- name: content
- name: type
default: 'null'
comment: '# * Adds HTML/XML content.
# *
# * If the charset is not set via the content type, it is assumed to be UTF-8,
# * or ISO-8859-1 as a fallback, which is the default charset defined by the
# * HTTP 1.1 specification.'
- name: addHtmlContent
visibility: public
parameters:
- name: content
- name: charset
default: '''UTF-8'''
comment: '# * Adds an HTML content to the list of nodes.
# *
# * The libxml errors are disabled when the content is parsed.
# *
# * If you want to get parsing errors, be sure to enable
# * internal errors via libxml_use_internal_errors(true)
# * and then, get the errors via libxml_get_errors(). Be
# * sure to clear errors with libxml_clear_errors() afterward.'
- name: addXmlContent
visibility: public
parameters:
- name: content
- name: charset
default: '''UTF-8'''
- name: options
default: \LIBXML_NONET
comment: '# * Adds an XML content to the list of nodes.
# *
# * The libxml errors are disabled when the content is parsed.
# *
# * If you want to get parsing errors, be sure to enable
# * internal errors via libxml_use_internal_errors(true)
# * and then, get the errors via libxml_get_errors(). Be
# * sure to clear errors with libxml_clear_errors() afterward.
# *
# * @param int $options Bitwise OR of the libxml option constants
# * LIBXML_PARSEHUGE is dangerous, see
# * http://symfony.com/blog/security-release-symfony-2-0-17-released'
- name: addDocument
visibility: public
parameters:
- name: dom
comment: '# * Adds a \DOMDocument to the list of nodes.
# *
# * @param \DOMDocument $dom A \DOMDocument instance'
- name: addNodeList
visibility: public
parameters:
- name: nodes
comment: '# * Adds a \DOMNodeList to the list of nodes.
# *
# * @param \DOMNodeList $nodes A \DOMNodeList instance'
- name: addNodes
visibility: public
parameters:
- name: nodes
comment: '# * Adds an array of \DOMNode instances to the list of nodes.
# *
# * @param \DOMNode[] $nodes An array of \DOMNode instances'
- name: addNode
visibility: public
parameters:
- name: node
comment: '# * Adds a \DOMNode instance to the list of nodes.
# *
# * @param \DOMNode $node A \DOMNode instance'
- name: eq
visibility: public
parameters:
- name: position
comment: '# * Returns a node given its position in the node list.'
- name: each
visibility: public
parameters:
- name: closure
comment: '# * Calls an anonymous function on each node of the list.
# *
# * The anonymous function receives the position and the node wrapped
# * in a Crawler instance as arguments.
# *
# * Example:
# *
# * $crawler->filter(''h1'')->each(function ($node, $i) {
# * return $node->text();
# * });
# *
# * @param \Closure $closure An anonymous function
# *
# * @return array An array of values returned by the anonymous function'
- name: slice
visibility: public
parameters:
- name: offset
default: '0'
- name: length
default: 'null'
comment: '# * Slices the list of nodes by $offset and $length.'
- name: reduce
visibility: public
parameters:
- name: closure
comment: '# * Reduces the list of nodes by calling an anonymous function.
# *
# * To remove a node from the list, the anonymous function must return false.
# *
# * @param \Closure $closure An anonymous function'
- name: first
visibility: public
parameters: []
comment: '# * Returns the first node of the current selection.'
- name: last
visibility: public
parameters: []
comment: '# * Returns the last node of the current selection.'
- name: siblings
visibility: public
parameters: []
comment: '# * Returns the siblings nodes of the current selection.
# *
# * @throws \InvalidArgumentException When current node is empty'
- name: matches
visibility: public
parameters:
- name: selector
comment: null
- name: closest
visibility: public
parameters:
- name: selector
comment: '# * Return first parents (heading toward the document root) of the Element
that matches the provided selector.
# *
# * @see https://developer.mozilla.org/en-US/docs/Web/API/Element/closest#Polyfill
# *
# * @throws \InvalidArgumentException When current node is empty'
- name: nextAll
visibility: public
parameters: []
comment: '# * Returns the next siblings nodes of the current selection.
# *
# * @throws \InvalidArgumentException When current node is empty'
- name: previousAll
visibility: public
parameters: []
comment: '# * Returns the previous sibling nodes of the current selection.
# *
# * @throws \InvalidArgumentException'
- name: ancestors
visibility: public
parameters: []
comment: '# * Returns the ancestors of the current selection.
# *
# * @throws \InvalidArgumentException When the current node is empty'
- name: children
visibility: public
parameters:
- name: selector
default: 'null'
comment: '# * Returns the children nodes of the current selection.
# *
# * @throws \InvalidArgumentException When current node is empty
# * @throws \RuntimeException If the CssSelector Component is not available
and $selector is provided'
- name: attr
visibility: public
parameters:
- name: attribute
- name: default
default: 'null'
comment: '# * Returns the attribute value of the first node of the list.
# *
# * @param string|null $default When not null: the value to return when the node
or attribute is empty
# *
# * @throws \InvalidArgumentException When current node is empty'
- name: nodeName
visibility: public
parameters: []
comment: '# * Returns the node name of the first node of the list.
# *
# * @throws \InvalidArgumentException When current node is empty'
- name: text
visibility: public
parameters:
- name: default
default: 'null'
- name: normalizeWhitespace
default: 'true'
comment: '# * Returns the text of the first node of the list.
# *
# * Pass true as the second argument to normalize whitespaces.
# *
# * @param string|null $default When not null: the value to return
when the current node is empty
# * @param bool $normalizeWhitespace Whether whitespaces should be trimmed
and normalized to single spaces
# *
# * @throws \InvalidArgumentException When current node is empty'
- name: innerText
visibility: public
parameters:
- name: normalizeWhitespace
default: 'true'
comment: '# * Returns only the inner text that is the direct descendent of the current
node, excluding any child nodes.
# *
# * @param bool $normalizeWhitespace Whether whitespaces should be trimmed and
normalized to single spaces'
- name: html
visibility: public
parameters:
- name: default
default: 'null'
comment: '# * Returns the first node of the list as HTML.
# *
# * @param string|null $default When not null: the value to return when the current
node is empty
# *
# * @throws \InvalidArgumentException When current node is empty'
- name: outerHtml
visibility: public
parameters: []
comment: null
- name: evaluate
visibility: public
parameters:
- name: xpath
comment: '# * Evaluates an XPath expression.
# *
# * Since an XPath expression might evaluate to either a simple type or a \DOMNodeList,
# * this method will return either an array of simple types or a new Crawler instance.'
- name: extract
visibility: public
parameters:
- name: attributes
comment: '# * Extracts information from the list of nodes.
# *
# * You can extract attributes or/and the node value (_text).
# *
# * Example:
# *
# * $crawler->filter(''h1 a'')->extract([''_text'', ''href'']);'
- name: filterXPath
visibility: public
parameters:
- name: xpath
comment: '# * Filters the list of nodes with an XPath expression.
# *
# * The XPath expression is evaluated in the context of the crawler, which
# * is considered as a fake parent of the elements inside it.
# * This means that a child selector "div" or "./div" will match only
# * the div elements of the current crawler, not their children.'
- name: filter
visibility: public
parameters:
- name: selector
comment: '# * Filters the list of nodes with a CSS selector.
# *
# * This method only works if you have installed the CssSelector Symfony Component.
# *
# * @throws \LogicException if the CssSelector Component is not available'
- name: selectLink
visibility: public
parameters:
- name: value
comment: '# * Selects links by name or alt value for clickable images.'
- name: selectImage
visibility: public
parameters:
- name: value
comment: '# * Selects images by alt value.'
- name: selectButton
visibility: public
parameters:
- name: value
comment: '# * Selects a button by name or alt value for images.'
- name: link
visibility: public
parameters:
- name: method
default: '''get'''
comment: '# * Returns a Link object for the first node in the list.
# *
# * @throws \InvalidArgumentException If the current node list is empty or the
selected node is not instance of DOMElement'
- name: links
visibility: public
parameters: []
comment: '# * Returns an array of Link objects for the nodes in the list.
# *
# * @return Link[]
# *
# * @throws \InvalidArgumentException If the current node list contains non-DOMElement
instances'
- name: image
visibility: public
parameters: []
comment: '# * Returns an Image object for the first node in the list.
# *
# * @throws \InvalidArgumentException If the current node list is empty'
- name: images
visibility: public
parameters: []
comment: '# * Returns an array of Image objects for the nodes in the list.
# *
# * @return Image[]'
- name: form
visibility: public
parameters:
- name: values
default: 'null'
- name: method
default: 'null'
comment: '# * Returns a Form object for the first node in the list.
# *
# * @throws \InvalidArgumentException If the current node list is empty or the
selected node is not instance of DOMElement'
- name: setDefaultNamespacePrefix
visibility: public
parameters:
- name: prefix
comment: '# * Overloads a default namespace prefix to be used with XPath and CSS
expressions.'
- name: registerNamespace
visibility: public
parameters:
- name: prefix
- name: namespace
comment: null
- name: xpathLiteral
visibility: public
parameters:
- name: s
comment: '# * Converts string for XPath expressions.
# *
# * Escaped characters are: quotes (") and apostrophe ('').
# *
# * Examples:
# *
# * echo Crawler::xpathLiteral(''foo " bar'');
# * //prints ''foo " bar''
# *
# * echo Crawler::xpathLiteral("foo '' bar");
# * //prints "foo '' bar"
# *
# * echo Crawler::xpathLiteral(''a\''b"c'');
# * //prints concat(''a'', "''", ''b"c'')'
- name: filterRelativeXPath
visibility: private
parameters:
- name: xpath
comment: '# * Filters the list of nodes with an XPath expression.
# *
# * The XPath expression should already be processed to apply it in the context
of each node.'
- name: relativize
visibility: private
parameters:
- name: xpath
comment: '# * Make the XPath relative to the current context.
# *
# * The returned XPath will match elements matching the XPath inside the current
crawler
# * when running in the context of a node of the crawler.'
- name: getNode
visibility: public
parameters:
- name: position
comment: null
- name: count
visibility: public
parameters: []
comment: null
- name: getIterator
visibility: public
parameters: []
comment: '# * @return \ArrayIterator<int, \DOMNode>'
- name: sibling
visibility: protected
parameters:
- name: node
- name: siblingDir
default: '''nextSibling'''
comment: null
- name: parseHtml5
visibility: private
parameters:
- name: htmlContent
- name: charset
default: '''UTF-8'''
comment: null
- name: supportsEncoding
visibility: private
parameters:
- name: encoding
comment: null
- name: parseXhtml
visibility: private
parameters:
- name: htmlContent
- name: charset
default: '''UTF-8'''
comment: null
- name: convertToHtmlEntities
visibility: private
parameters:
- name: htmlContent
- name: charset
default: '''UTF-8'''
comment: '# * Converts charset to HTML-entities to ensure valid parsing.'
- name: createDOMXPath
visibility: private
parameters:
- name: document
- name: prefixes
default: '[]'
comment: '# * @throws \InvalidArgumentException'
- name: discoverNamespace
visibility: private
parameters:
- name: domxpath
- name: prefix
comment: '# * @throws \InvalidArgumentException'
- name: findNamespacePrefixes
visibility: private
parameters:
- name: xpath
comment: null
- name: createSubCrawler
visibility: private
parameters:
- name: nodes
comment: '# * Creates a crawler for some subnodes.
# *
# * @param \DOMNodeList|\DOMNode|\DOMNode[]|string|null $nodes'
- name: createCssSelectorConverter
visibility: private
parameters: []
comment: '# * @throws \LogicException If the CssSelector Component is not available'
- name: parseHtmlString
visibility: private
parameters:
- name: content
- name: charset
comment: '# * Parse string into DOMDocument object using HTML5 parser if the content
is HTML5 and the library is available.
# * Use libxml parser otherwise.'
- name: canParseHtml5String
visibility: private
parameters:
- name: content
comment: null
- name: isValidHtml5Heading
visibility: private
parameters:
- name: heading
comment: null
- name: normalizeWhitespace
visibility: private
parameters:
- name: string
comment: null
traits:
- Masterminds\HTML5
- Symfony\Component\CssSelector\CssSelectorConverter
interfaces:
- \IteratorAggregate
- \Countable