the data visualization with r:an example- visualizing obesity across united states by using data...
TRANSCRIPT
## LOAD THE PACKAGES ####library(rvest)
library(ggplot2)
library(dplyr)
library(scales)
## LOAD THE DATA ####obesity = read_html("https://en.wikipedia.org/wiki/Obesity_in_the_United_States")
obesity = obesity %>%
html_nodes("table") %>%
.[[1]]%>%
html_table(fill=T)
## CLEAN THE DATA ####str(obesity)
for(i in 2:4){
obesity[,i] = gsub("%", "", obesity[,i])
obesity[,i] = as.numeric(obesity[,i])
names(obesity)
names(obesity) = make.names(names(obesity))
names(obesity)
states = map_data("state")
str(states)
Merge two datasets (obesity and states) by region, therefore we first need to create a new variable (region) in obesity dataset.
# create a new variable name for stateobesity$region = tolower(obesity$State.and.District.of.Columbia)
Merge the datasets.
states = merge(states, obesity, by="region", all.x=T)str(states)
## MAKE THE PLOT ##### adults
ggplot(states, aes(x = long, y = lat, group = group, fill = Obese.adults)) +
geom_polygon(color = "white") +
scale_fill_gradient(name = "Percent", low = "#feceda", high = "#c81f49", guide = "colorbar", na.value="black", breaks = pretty_breaks(n = 5)) +
labs(title="Prevalence of Obesity in Adults") +
coord_map()
plot the prevalence of obesity in children.
# childrenggplot(states, aes(x = long, y = lat, group = group, fill = Obese.children.and.adolescents)) +
geom_polygon(color = "white") +
scale_fill_gradient(name = "Percent", low = "#feceda", high = "#c81f49", guide = "colorbar", na.value="black", breaks = pretty_breaks(n = 5)) +
labs(title="Prevalence of Obesity in Children") +
coord_map()
If you like to show the name of State in the map use the code below to create a new dataset.
statenames = states %>% group_by(region) %>%
summarise(
long = mean(range(long)),
lat = mean(range(lat)),
group = mean(group),
Obese.adults = mean(Obese.adults),
Obese.children.and.adolescents = mean(Obese.children.and.adolescents)