the data visualization with r:an example- visualizing obesity across united states by using data...

5
## LOAD THE PACKAGES #### library(rvest) library(ggplot2) library(dplyr) library(scales) ## LOAD THE DATA #### obesity = read_html("https://en.wikipedia.org/wiki/Obesity_in_the_United_ States") obesity = obesity %>% html_nodes("table") %>% .[[1]]%>% html_table(fill=T) ## CLEAN THE DATA #### str(obesity) for(i in 2:4){ obesity[,i] = gsub("%", "", obesity[,i]) obesity[,i] = as.numeric(obesity[,i])

Upload: volkan-oban

Post on 16-Apr-2017

119 views

Category:

Data & Analytics


0 download

TRANSCRIPT

Page 1: The data visualization with R:An Example- Visualizing obesity across united states by using data from wikipedia

## LOAD THE PACKAGES ####library(rvest)

library(ggplot2)

library(dplyr)

library(scales)

## LOAD THE DATA ####obesity = read_html("https://en.wikipedia.org/wiki/Obesity_in_the_United_States")

obesity = obesity %>%

html_nodes("table") %>%

.[[1]]%>%

html_table(fill=T)

## CLEAN THE DATA ####str(obesity)

for(i in 2:4){

obesity[,i] = gsub("%", "", obesity[,i])

obesity[,i] = as.numeric(obesity[,i])

names(obesity)

names(obesity) = make.names(names(obesity))

Page 2: The data visualization with R:An Example- Visualizing obesity across united states by using data from wikipedia

names(obesity)

states = map_data("state")

str(states)

Merge two datasets (obesity and states) by region, therefore we first need to create a new variable (region) in obesity dataset.

# create a new variable name for stateobesity$region = tolower(obesity$State.and.District.of.Columbia)

Merge the datasets.

states = merge(states, obesity, by="region", all.x=T)str(states)

## MAKE THE PLOT ##### adults

ggplot(states, aes(x = long, y = lat, group = group, fill = Obese.adults)) +

geom_polygon(color = "white") +

scale_fill_gradient(name = "Percent", low = "#feceda", high = "#c81f49", guide = "colorbar", na.value="black", breaks = pretty_breaks(n = 5)) +

labs(title="Prevalence of Obesity in Adults") +

coord_map()

Page 3: The data visualization with R:An Example- Visualizing obesity across united states by using data from wikipedia

 plot the prevalence of obesity in children.

# childrenggplot(states, aes(x = long, y = lat, group = group, fill = Obese.children.and.adolescents)) +

geom_polygon(color = "white") +

scale_fill_gradient(name = "Percent", low = "#feceda", high = "#c81f49", guide = "colorbar", na.value="black", breaks = pretty_breaks(n = 5)) +

labs(title="Prevalence of Obesity in Children") +

coord_map()

Page 4: The data visualization with R:An Example- Visualizing obesity across united states by using data from wikipedia

If you like to show the name of State in the map use the code below to create a new dataset.

statenames = states %>% group_by(region) %>%

summarise(

long = mean(range(long)),

lat = mean(range(lat)),

group = mean(group),

Obese.adults = mean(Obese.adults),

Obese.children.and.adolescents = mean(Obese.children.and.adolescents)