## ----echo=FALSE--------------------------------------------------------------- knitr::opts_chunk$set(comment = "#>", collapse = TRUE) ## ----------------------------------------------------------------------------- library(rvest) ## ----------------------------------------------------------------------------- html <- read_html("http://rvest.tidyverse.org/") class(html) ## ----------------------------------------------------------------------------- html <- minimal_html("

This is a paragraph

") html ## ----------------------------------------------------------------------------- html <- minimal_html("

This is a heading

This is a paragraph

This is an important paragraph

") ## ----------------------------------------------------------------------------- html %>% html_element("h1") html %>% html_elements("p") html %>% html_elements(".important") html %>% html_elements("#first") ## ----------------------------------------------------------------------------- html <- minimal_html("
  1. apple & pear
  2. banana
  3. pineapple
") html %>% html_elements("li") %>% html_text2() ## ----------------------------------------------------------------------------- html %>% html_elements("li") %>% html_text() ## ----------------------------------------------------------------------------- html <- minimal_html("

This is a paragraph.

This is another paragraph. It has two sentences.

") ## ----------------------------------------------------------------------------- html %>% html_element("body") %>% html_text2() %>% cat() ## ----------------------------------------------------------------------------- html %>% html_element("body") %>% html_text() %>% cat() ## ----------------------------------------------------------------------------- html <- minimal_html("

cats

") ## ----------------------------------------------------------------------------- html %>% html_elements("a") %>% html_attr("href") html %>% html_elements("img") %>% html_attr("src") ## ----------------------------------------------------------------------------- html %>% html_elements("img") %>% html_attr("width") html %>% html_elements("img") %>% html_attr("width") %>% as.integer() ## ----------------------------------------------------------------------------- html <- minimal_html("
x y
1.5 2.7
4.9 1.3
7.2 8.1
") ## ----------------------------------------------------------------------------- html %>% html_node("table") %>% html_table() ## ----------------------------------------------------------------------------- html <- minimal_html(" ") ## ----------------------------------------------------------------------------- html %>% html_elements("b") %>% html_text2() html %>% html_elements("i") %>% html_text2() html %>% html_elements(".weight") %>% html_text2() ## ----------------------------------------------------------------------------- characters <- html %>% html_elements("li") characters %>% html_element("b") %>% html_text2() characters %>% html_element("i") %>% html_text2() characters %>% html_element(".weight") %>% html_text2() ## ----------------------------------------------------------------------------- data.frame( name = characters %>% html_element("b") %>% html_text2(), species = characters %>% html_element("i") %>% html_text2(), weight = characters %>% html_element(".weight") %>% html_text2() )