-
Notifications
You must be signed in to change notification settings - Fork 83
/
Copy pathxml_find_all.Rd
123 lines (103 loc) · 4.27 KB
/
xml_find_all.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xml_find.R
\name{xml_find_all}
\alias{xml_find_all}
\alias{xml_find_all.xml_nodeset}
\alias{xml_find_first}
\alias{xml_find_num}
\alias{xml_find_int}
\alias{xml_find_chr}
\alias{xml_find_lgl}
\alias{xml_find_one}
\title{Find nodes that match an xpath expression.}
\usage{
xml_find_all(x, xpath, ns = xml_ns(x), ...)
\method{xml_find_all}{xml_nodeset}(x, xpath, ns = xml_ns(x), flatten = TRUE, ...)
xml_find_first(x, xpath, ns = xml_ns(x))
xml_find_num(x, xpath, ns = xml_ns(x))
xml_find_int(x, xpath, ns = xml_ns(x))
xml_find_chr(x, xpath, ns = xml_ns(x))
xml_find_lgl(x, xpath, ns = xml_ns(x))
}
\arguments{
\item{x}{A document, node, or node set.}
\item{xpath}{A string containing an xpath (1.0) expression.}
\item{ns}{Optionally, a named vector giving prefix-url pairs, as produced
by \code{\link[=xml_ns]{xml_ns()}}. If provided, all names will be explicitly
qualified with the ns prefix, i.e. if the element \code{bar} is defined
in namespace \code{foo}, it will be called \code{foo:bar}. (And
similarly for attributes). Default namespaces must be given an explicit
name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and
\code{\link[=xml_set_name]{xml_set_name()}}.}
\item{...}{Further arguments passed to or from other methods.}
\item{flatten}{A logical indicating whether to return a single, flattened
nodeset or a list of nodesets.}
}
\value{
\code{xml_find_all} returns a nodeset if applied to a node, and a nodeset
or a list of nodesets if applied to a nodeset. If there are no matches,
the nodeset(s) will be empty. Within each nodeset, the result will always
be unique; repeated nodes are automatically de-duplicated.
\code{xml_find_first} returns a node if applied to a node, and a nodeset
if applied to a nodeset. The output is \emph{always} the same size as
the input. If there are no matches, \code{xml_find_first} will return a
missing node; if there are multiple matches, it will return the first
only.
\code{xml_find_num}, \code{xml_find_chr}, \code{xml_find_lgl} return
numeric, character and logical results respectively.
}
\description{
Xpath is like regular expressions for trees - it's worth learning if
you're trying to extract nodes from arbitrary locations in a document.
Use \code{xml_find_all} to find all matches - if there's no match you'll
get an empty result. Use \code{xml_find_first} to find a specific match -
if there's no match you'll get an \code{xml_missing} node.
}
\section{Deprecated functions}{
\code{xml_find_one()} has been deprecated. Instead use
\code{xml_find_first()}.
}
\examples{
x <- read_xml("<foo><bar><baz/></bar><baz/></foo>")
xml_find_all(x, ".//baz")
xml_path(xml_find_all(x, ".//baz"))
# Note the difference between .// and //
# // finds anywhere in the document (ignoring the current node)
# .// finds anywhere beneath the current node
(bar <- xml_find_all(x, ".//bar"))
xml_find_all(bar, ".//baz")
xml_find_all(bar, "//baz")
# Find all vs find one -----------------------------------------------------
x <- read_xml("<body>
<p>Some <b>text</b>.</p>
<p>Some <b>other</b> <b>text</b>.</p>
<p>No bold here!</p>
</body>")
para <- xml_find_all(x, ".//p")
# By default, if you apply xml_find_all to a nodeset, it finds all matches,
# de-duplicates them, and returns as a single nodeset. This means you
# never know how many results you'll get
xml_find_all(para, ".//b")
# If you set flatten to FALSE, though, xml_find_all will return a list of
# nodesets, where each nodeset contains the matches for the corresponding
# node in the original nodeset.
xml_find_all(para, ".//b", flatten = FALSE)
# xml_find_first only returns the first match per input node. If there are 0
# matches it will return a missing node
xml_find_first(para, ".//b")
xml_text(xml_find_first(para, ".//b"))
# Namespaces ---------------------------------------------------------------
# If the document uses namespaces, you'll need use xml_ns to form
# a unique mapping between full namespace url and a short prefix
x <- read_xml('
<root xmlns:f = "http://foo.com" xmlns:g = "http://bar.com">
<f:doc><g:baz /></f:doc>
<f:doc><g:baz /></f:doc>
</root>
')
xml_find_all(x, ".//f:doc")
xml_find_all(x, ".//f:doc", xml_ns(x))
}
\seealso{
\code{\link[=xml_ns_strip]{xml_ns_strip()}} to remove the default namespaces
}