r class 5 -data visualization

98
Data Visualization http://nycdatascience.com/part4_en/ 1 of 98

Upload: vivian-s-zhang

Post on 26-Jan-2015

105 views

Category:

Education


2 download

DESCRIPTION

RSVP our R beginner 5 days intensive classes at www.nycdatascience.com

TRANSCRIPT

Page 1: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

1 of 98

Page 2: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

2 of 98

Page 3: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

3 of 98

Page 4: R class 5 -data visualization

data <- read.table('data/anscombe.txt',T)

data <- data[,-1]

head(data)

x1 x2 x3 x4 y1 y2 y3 y4

1 10 10 10 8 8.04 9.14 7.46 6.58

2 8 8 8 8 6.95 8.14 6.77 5.76

3 13 13 13 8 7.58 8.74 12.74 7.71

4 9 9 9 8 8.81 8.77 7.11 8.84

5 11 11 11 8 8.33 9.26 7.81 8.47

6 14 14 14 8 9.96 8.10 8.84 7.04

Data Visualization http://nycdatascience.com/part4_en/

4 of 98

Page 5: R class 5 -data visualization

colMeans(data)

x1 x2 x3 x4 y1 y2 y3 y4

9.0 9.0 9.0 9.0 7.5 7.5 7.5 7.5

sapply(1:4,function(x) cor(data[,x],data[,x+4]))

[1] 0.816 0.816 0.816 0.817

Data Visualization http://nycdatascience.com/part4_en/

5 of 98

Page 6: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

6 of 98

Page 7: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

7 of 98

Page 8: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

8 of 98

Page 9: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

9 of 98

Page 10: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

10 of 98

Page 11: R class 5 -data visualization

plot(cars$dist~cars$speed)

Data Visualization http://nycdatascience.com/part4_en/

11 of 98

Page 12: R class 5 -data visualization

plot(cars$dist,type='l')

Data Visualization http://nycdatascience.com/part4_en/

12 of 98

Page 13: R class 5 -data visualization

plot(cars$dist,type='h')

Data Visualization http://nycdatascience.com/part4_en/

13 of 98

Page 14: R class 5 -data visualization

hist(cars$dist)

Data Visualization http://nycdatascience.com/part4_en/

14 of 98

Page 15: R class 5 -data visualization

library(lattice)

num <- sample(1:3,size=50,replace=T)

barchart(table(num))

Data Visualization http://nycdatascience.com/part4_en/

15 of 98

Page 16: R class 5 -data visualization

qqmath(rnorm(100))

Data Visualization http://nycdatascience.com/part4_en/

16 of 98

Page 17: R class 5 -data visualization

stripplot(~ Sepal.Length | Species, data = iris,layout=c(1,3))

Data Visualization http://nycdatascience.com/part4_en/

17 of 98

Page 18: R class 5 -data visualization

densityplot(~ Sepal.Length, groups=Species, data = iris,plot.points=FALSE)

Data Visualization http://nycdatascience.com/part4_en/

18 of 98

Page 19: R class 5 -data visualization

bwplot(Species~ Sepal.Length, data = iris)

Data Visualization http://nycdatascience.com/part4_en/

19 of 98

Page 20: R class 5 -data visualization

xyplot(Sepal.Width~ Sepal.Length, groups=Species, data = iris)

Data Visualization http://nycdatascience.com/part4_en/

20 of 98

Page 21: R class 5 -data visualization

splom(iris[1:4])

Data Visualization http://nycdatascience.com/part4_en/

21 of 98

Page 22: R class 5 -data visualization

histogram(~ Sepal.Length | Species, data = iris,layout=c(1,3))

Data Visualization http://nycdatascience.com/part4_en/

22 of 98

Page 23: R class 5 -data visualization

library(plyr)

func3d <- function(x,y) {

sin(x^2/2 - y^2/4) * cos(2*x - exp(y))

}

vec1 <- vec2 <- seq(0,2,length=30)

para <- expand.grid(x=vec1,y=vec2)

result6 <- mdply(.data=para,.fun=func3d)

Data Visualization http://nycdatascience.com/part4_en/

23 of 98

Page 24: R class 5 -data visualization

library(lattice)

wireframe(V1~x*y,data=result6,scales = list(arrows = FALSE),

drape = TRUE, colorkey = F)

Data Visualization http://nycdatascience.com/part4_en/

24 of 98

Page 25: R class 5 -data visualization

library(ggplot2)

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) + geom_point()

print(p)

Data Visualization http://nycdatascience.com/part4_en/

25 of 98

Page 26: R class 5 -data visualization

summary(p)

data: manufacturer, model, displ, year, cyl, trans, drv, cty, hwy, fl, class [234x11]

mapping: x = cty, y = hwy

faceting: facet_null()

-----------------------------------

geom_point: na.rm = FALSE

stat_identity:

position_identity: (width = NULL, height = NULL)

Data Visualization http://nycdatascience.com/part4_en/

26 of 98

Page 27: R class 5 -data visualization

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy,colour=factor(year)))

p <- p + geom_point()

print(p)

Data Visualization http://nycdatascience.com/part4_en/

27 of 98

Page 28: R class 5 -data visualization

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy,colour=factor(year)))

p <- p + geom_smooth()

print(p)

Data Visualization http://nycdatascience.com/part4_en/

28 of 98

Page 29: R class 5 -data visualization

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) +

geom_point(aes(colour=factor(year))) +

geom_smooth()

Data Visualization http://nycdatascience.com/part4_en/

29 of 98

Page 30: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

30 of 98

Page 31: R class 5 -data visualization

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) +

geom_point(aes(colour=factor(year))) +

geom_smooth() +

scale_color_manual(values=c('blue2','red4'))

Data Visualization http://nycdatascience.com/part4_en/

31 of 98

Page 32: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

32 of 98

Page 33: R class 5 -data visualization

p <- ggplot(data=mpg,mapping=aes(x=cty,y=hwy)) +

geom_point(aes(colour=factor(year))) +

geom_smooth() +

scale_color_manual(values=c('blue2','red4')) +

facet_wrap(~ year,ncol=1)

Data Visualization http://nycdatascience.com/part4_en/

33 of 98

Page 34: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

34 of 98

Page 35: R class 5 -data visualization

p <- ggplot(data=mpg, mapping=aes(x=cty,y=hwy)) +

geom_point(aes(colour=class,size=displ),

alpha=0.5,position = "jitter") +

geom_smooth() +

scale_size_continuous(range = c(4, 10)) +

facet_wrap(~ year,ncol=1) +

opts(title='Vehicle model and fuel consumption') +

labs(y='Highway miles per gallon',

x='Urban miles per gallon',

size='Displacement',

colour = 'Model')

Data Visualization http://nycdatascience.com/part4_en/

35 of 98

Page 36: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

36 of 98

Page 37: R class 5 -data visualization

p <- ggplot(data=mpg, mapping=aes(x=cty,y=hwy)) +

geom_point(aes(colour=factor(year),size=displ), alpha=0.5,position = "jitter")+

stat_smooth()+

scale_color_manual(values =c('steelblue','red4'))+

scale_size_continuous(range = c(4, 10))

Data Visualization http://nycdatascience.com/part4_en/

37 of 98

Page 38: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

38 of 98

Page 39: R class 5 -data visualization

library(ggplot2)

p <- ggplot(data=iris,aes(x=Sepal.Length))+

geom_histogram()

print(p)

Data Visualization http://nycdatascience.com/part4_en/

39 of 98

Page 40: R class 5 -data visualization

p <- ggplot(iris,aes(x=Sepal.Length))+

geom_histogram(binwidth=0.1, # Set the group gap

fill='skyblue', # Set the fill color

colour='black') # Set the border color

Data Visualization http://nycdatascience.com/part4_en/

40 of 98

Page 41: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

41 of 98

Page 42: R class 5 -data visualization

p <- ggplot(iris,aes(x=Sepal.Length)) +

geom_histogram(aes(y=..density..),

fill='skyblue',

color='black') +

geom_density(color='black',

linetype=2,adjust=2)

Data Visualization http://nycdatascience.com/part4_en/

42 of 98

Page 43: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

43 of 98

Page 44: R class 5 -data visualization

p <- ggplot(iris,aes(x=Sepal.Length)) +

geom_histogram(aes(y=..density..), # Note: set y to relative frequency

fill='gray60',

color='gray') +

geom_density(color='black',linetype=1,adjust=0.5) +

geom_density(color='black',linetype=2,adjust=1) +

geom_density(color='black',linetype=3,adjust=2)

Data Visualization http://nycdatascience.com/part4_en/

44 of 98

Page 45: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

45 of 98

Page 46: R class 5 -data visualization

p <- ggplot(iris,aes(x=Sepal.Length,fill=Species)) + geom_density(alpha=0.5,color='gra

print(p)

Data Visualization http://nycdatascience.com/part4_en/

46 of 98

Page 47: R class 5 -data visualization

p <- ggplot(iris,aes(x=Species,y=Sepal.Length,fill=Species)) + geom_boxplot()

print(p)

Data Visualization http://nycdatascience.com/part4_en/

47 of 98

Page 48: R class 5 -data visualization

p <- ggplot(iris,aes(x=Species,y=Sepal.Length,fill=Species)) + geom_violin()

print(p)

Data Visualization http://nycdatascience.com/part4_en/

48 of 98

Page 49: R class 5 -data visualization

p <- ggplot(iris,aes(x=Species,y=Sepal.Length,

fill=Species)) +

geom_violin(fill='gray',alpha=0.5) +

geom_dotplot(binaxis = "y", stackdir = "center")

print(p)

Data Visualization http://nycdatascience.com/part4_en/

49 of 98

Page 50: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

50 of 98

Page 51: R class 5 -data visualization

p <- ggplot(mpg,aes(x=class)) +

geom_bar()

print(p)

Data Visualization http://nycdatascience.com/part4_en/

51 of 98

Page 52: R class 5 -data visualization

mpg$year <- factor(mpg$year)

p <- ggplot(mpg,aes(x=class,fill=year)) +

geom_bar(color='black')

Data Visualization http://nycdatascience.com/part4_en/

52 of 98

Page 53: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

53 of 98

Page 54: R class 5 -data visualization

p <- ggplot(mpg,aes(x=class,fill=year)) +

geom_bar(color='black',

position=position_dodge())

Data Visualization http://nycdatascience.com/part4_en/

54 of 98

Page 55: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

55 of 98

Page 56: R class 5 -data visualization

p <- ggplot(mpg, aes(x = factor(1), fill = factor(class))) +

geom_bar(width = 1)+

coord_polar(theta = "y")

Data Visualization http://nycdatascience.com/part4_en/

56 of 98

Page 57: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

57 of 98

Page 58: R class 5 -data visualization

set.seed(1)

# Randomly generate 100 wind directions, and divide them into 16 intervals.

dir <- cut_interval(runif(100,0,360),n=16)

# Randomly generate 100 wind speed, and divide them into 4 intensities.

mag <- cut_interval(rgamma(100,15),4)

sample <- data.frame(dir=dir,mag=mag)

# Map wind direction to X-axie, frequency to Y-axie and speed to fill colors. Transfor

p <- ggplot(sample,aes(x=dir,fill=mag)) +

geom_bar()+ coord_polar()

Data Visualization http://nycdatascience.com/part4_en/

58 of 98

Page 59: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

59 of 98

Page 60: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

60 of 98

Page 61: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

61 of 98

Page 62: R class 5 -data visualization

data <- read.csv('data/soft_impact.csv',T)

library(reshape2)

data.melt <- melt(data,id='Year')

p <- ggplot(data.melt,aes(x=Year,y=value,

group=variable,fill=variable)) +

geom_area(color='black',size=0.3,

position=position_fill()) +

scale_fill_brewer()

Data Visualization http://nycdatascience.com/part4_en/

62 of 98

Page 63: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

63 of 98

Page 64: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

64 of 98

Page 65: R class 5 -data visualization

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +

geom_point()

print(p)

Data Visualization http://nycdatascience.com/part4_en/

65 of 98

Page 66: R class 5 -data visualization

mpg$year <- factor(mpg$year)

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year))

print(p)

Data Visualization http://nycdatascience.com/part4_en/

66 of 98

Page 67: R class 5 -data visualization

mpg$year <- factor(mpg$year)

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year,shape=year))

print(p)

Data Visualization http://nycdatascience.com/part4_en/

67 of 98

Page 68: R class 5 -data visualization

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) + geom_point(aes(color=year),alpha=0.5,position

print(p)

Data Visualization http://nycdatascience.com/part4_en/

68 of 98

Page 69: R class 5 -data visualization

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +

geom_point(aes(color=year),alpha=0.5,position = "jitter") +

geom_smooth(method='lm')

print(p)

Data Visualization http://nycdatascience.com/part4_en/

69 of 98

Page 70: R class 5 -data visualization

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +

geom_point(aes(color=year,size=displ),alpha=0.5,position = "jitter") +

geom_smooth(method='lm') +

scale_size_continuous(range = c(4, 10))

Data Visualization http://nycdatascience.com/part4_en/

70 of 98

Page 71: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

71 of 98

Page 72: R class 5 -data visualization

p <- ggplot(data=mpg,aes(x=cty,y=hwy)) +

geom_point(aes(colour=class,size=displ),

alpha=0.5,position = "jitter") +

geom_smooth() +

scale_size_continuous(range = c(4, 10)) +

facet_wrap(~ year,ncol=1)

Data Visualization http://nycdatascience.com/part4_en/

72 of 98

Page 73: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

73 of 98

Page 74: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

74 of 98

Page 75: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

75 of 98

Page 76: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

76 of 98

Page 77: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

77 of 98

Page 78: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

78 of 98

Page 79: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

79 of 98

Page 80: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

80 of 98

Page 81: R class 5 -data visualization

fillcolor <- ifelse(economics[440:470,'unemploy']<8000,'steelblue','red4')

p <- ggplot(economics[440:470,],aes(x=date,y=unemploy)) +

geom_bar(stat='identity',

fill=fillcolor)

Data Visualization http://nycdatascience.com/part4_en/

81 of 98

Page 82: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

82 of 98

Page 83: R class 5 -data visualization

p <- ggplot(economics[300:470,],aes(x=date,ymax=psavert,ymin=0)) +

geom_linerange(color='grey20',size=0.5) +

geom_point(aes(y=psavert),color='red4') +

theme_bw()

Data Visualization http://nycdatascience.com/part4_en/

83 of 98

Page 84: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

84 of 98

Page 85: R class 5 -data visualization

fill.color <- ifelse(economics$date > '1980-01-01' &

economics$date < '1990-01-01',

'steelblue','red4')

p <- ggplot(economics,aes(x=date,ymax=psavert,ymin=0)) +

geom_linerange(color=fill.color,size=0.9) +

geom_text(aes(x=as.Date("1985-01-01",'%Y-%m-%d'),y=13),label="1980'") +

theme_bw()

Data Visualization http://nycdatascience.com/part4_en/

85 of 98

Page 86: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

86 of 98

Page 87: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

87 of 98

Page 88: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

88 of 98

Page 89: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

89 of 98

Page 90: R class 5 -data visualization

library(ggplot2)

world <- map_data("world")

worldmap <- ggplot(world, aes(x=long, y=lat, group=group)) +

geom_path(color='gray10',size=0.3) +

geom_point(x=114,y=30,size=10,shape='*') +

scale_y_continuous(breaks=(-2:2) * 30) +

scale_x_continuous(breaks=(-4:4) * 45) +

coord_map("ortho", orientation=c(30, 120, 0)) +

theme(panel.grid.major = element_line(colour = "gray50"),

panel.background = element_rect(fill = "white"),

axis.text=element_blank(),

axis.ticks=element_blank(),

axis.title=element_blank())

Data Visualization http://nycdatascience.com/part4_en/

90 of 98

Page 91: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

91 of 98

Page 92: R class 5 -data visualization

map <- map_data('state')

arrests <- USArrests

names(arrests) <- tolower(names(arrests))

arrests$region <- tolower(rownames(USArrests))

usmap <- ggplot(data=arrests) +

geom_map(map =map,aes(map_id = region,fill = murder),color='gray40' ) +

expand_limits(x = map$long, y = map$lat) +

scale_fill_continuous(high='red2',low='white') +

theme_bw() +

theme(panel.grid.major = element_blank(),

panel.background = element_blank(),

axis.text=element_blank(),

axis.ticks=element_blank(),

axis.title=element_blank(),

legend.position = c(0.95,0.28),

legend.background=element_rect(fill="white", colour="white"))+ coord_map('mercat

Data Visualization http://nycdatascience.com/part4_en/

92 of 98

Page 93: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

93 of 98

Page 94: R class 5 -data visualization

library(ggmap)

library(XML)

webpage <-'http://data.earthquake.cn/datashare/globeEarthquake_csn.html'

tables <- readHTMLTable(webpage,stringsAsFactors = FALSE)

raw <- tables[[6]]

data <- raw[,c(1,3,4)]

names(data) <- c('date','lan','lon')

data$lan <- as.numeric(data$lan)

data$lon <- as.numeric(data$lon)

data$date <- as.Date(data$date, "%Y-%m-%d")

#Read the map data from Google by the ggmap package, and mark the previous data on the

earthquake <- ggmap(get_googlemap(center = 'china', zoom=4,maptype='terrain'),extent='

geom_point(data=data,aes(x=lon,y=lan),colour = 'red',alpha=0.7)+

theme(legend.position = "none")

Data Visualization http://nycdatascience.com/part4_en/

94 of 98

Page 95: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

95 of 98

Page 96: R class 5 -data visualization

library(googleVis)

library(WDI)

DF <- WDI(country=c("CN","RU","BR","ZA","IN",'DE','AU','CA','FR','IT','JP','MX','GB','

M <- gvisMotionChart(DF, idvar="country", timevar="year",

xvar='EN.ATM.CO2E.KT',

yvar='NY.GDP.MKTP.CD')

plot(M)

Data Visualization http://nycdatascience.com/part4_en/

96 of 98

Page 97: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

97 of 98

Page 98: R class 5 -data visualization

Data Visualization http://nycdatascience.com/part4_en/

98 of 98