## ----echo=FALSE---------------------------------------------------------------
knitr::opts_chunk$set(comment = "#>", collapse = TRUE)
## -----------------------------------------------------------------------------
library(rvest)
## -----------------------------------------------------------------------------
html <- read_html("http://rvest.tidyverse.org/")
class(html)
## -----------------------------------------------------------------------------
html <- minimal_html("
This is a paragraph
")
html
## -----------------------------------------------------------------------------
html <- minimal_html("
This is a heading
This is a paragraph
This is an important paragraph
")
## -----------------------------------------------------------------------------
html %>% html_element("h1")
html %>% html_elements("p")
html %>% html_elements(".important")
html %>% html_elements("#first")
## -----------------------------------------------------------------------------
html <- minimal_html("
- apple & pear
- banana
- pineapple
")
html %>%
html_elements("li") %>%
html_text2()
## -----------------------------------------------------------------------------
html %>%
html_elements("li") %>%
html_text()
## -----------------------------------------------------------------------------
html <- minimal_html("
This is
a
paragraph.
This is another paragraph.
It has two sentences.
")
## -----------------------------------------------------------------------------
html %>%
html_element("body") %>%
html_text2() %>%
cat()
## -----------------------------------------------------------------------------
html %>%
html_element("body") %>%
html_text() %>%
cat()
## -----------------------------------------------------------------------------
html <- minimal_html("
cats
")
## -----------------------------------------------------------------------------
html %>%
html_elements("a") %>%
html_attr("href")
html %>%
html_elements("img") %>%
html_attr("src")
## -----------------------------------------------------------------------------
html %>%
html_elements("img") %>%
html_attr("width")
html %>%
html_elements("img") %>%
html_attr("width") %>%
as.integer()
## -----------------------------------------------------------------------------
html <- minimal_html("
x |
y |
1.5 |
2.7 |
4.9 |
1.3 |
7.2 |
8.1 |
")
## -----------------------------------------------------------------------------
html %>%
html_node("table") %>%
html_table()
## -----------------------------------------------------------------------------
html <- minimal_html("
- C-3PO is a droid that weighs 167 kg
- R2-D2 is a droid that weighs 96 kg
- Yoda weighs 66 kg
- R4-P17 is a droid
")
## -----------------------------------------------------------------------------
html %>% html_elements("b") %>% html_text2()
html %>% html_elements("i") %>% html_text2()
html %>% html_elements(".weight") %>% html_text2()
## -----------------------------------------------------------------------------
characters <- html %>% html_elements("li")
characters %>% html_element("b") %>% html_text2()
characters %>% html_element("i") %>% html_text2()
characters %>% html_element(".weight") %>% html_text2()
## -----------------------------------------------------------------------------
data.frame(
name = characters %>% html_element("b") %>% html_text2(),
species = characters %>% html_element("i") %>% html_text2(),
weight = characters %>% html_element(".weight") %>% html_text2()
)