数据分析的图形可视化是了解数据分布、波动和相关性等属性必不可少的手段。数据根据时间动态变化的可视化图形:时序图、动态堆积图和流图等等。
分组线条图 grouped line chart
library(ggplot2)
library(babynames)
library(dplyr)
library(hrbrthemes)
library(viridis)
# Keep only 3 names
don <- babynames %>%
filter(name %in% c("Ashley", "Patricia", "Helen")) %>%
filter(sex=="F")
# Plot
don %>%
ggplot( aes(x=year, y=n, group=name, color=name)) +
geom_line() +
scale_color_viridis(discrete = TRUE) +
ggtitle("Popularity of American names in the previous 30 years") +
theme_ipsum() +
ylab("Number of babies born")
面积图 Area
library(ggplot2)
library(hrbrthemes)
xValue <- 1:10
yValue <- abs(cumsum(rnorm(10)))
data <- data.frame(xValue,yValue)
ggplot(data, aes(x=xValue, y=yValue)) +
geom_area( fill="#69b3a2", alpha=0.4) +
geom_line(color="#69b3a2", size=2) +
geom_point(size=3, color="#69b3a2") +
theme_ipsum() +
ggtitle("Evolution of something")
面积堆积图 Stacked area chart
library(ggplot2)
library(dplyr)
time <- as.numeric(rep(seq(1,7),each=7))
value <- runif(49, 10, 100)
group <- rep(LETTERS[1:7],times=7)
data <- data.frame(time, value, group)
plotdata <- data %>%
group_by(time, group) %>%
summarise(n = sum(value)) %>%
mutate(percentage = n / sum(n))
ggplot(plotdata, aes(x=time, y=percentage, fill=group)) +
geom_area(alpha=0.6 , size=1, colour="white")+
scale_fill_viridis(discrete = T) +
theme_ipsum()
Streamgraph
# devtools::install_github("hrbrmstr/streamgraph")
library(streamgraph)
library(dplyr)
library(babynames)
babynames %>%
filter(grepl("^Kr", name)) %>%
group_by(year, name) %>%
tally(wt=n) %>%
streamgraph("name", "n", "year")
babynames %>%
filter(grepl("^I", name)) %>%
group_by(year, name) %>%
tally(wt=n) %>%
streamgraph("name", "n", "year", offset="zero", interpolate="linear") %>%
sg_legend(show=TRUE, label="I- names: ")
Time Series
library(ggplot2)
library(dplyr)
library(hrbrthemes)
data <- data.frame(
day = as.Date("2017-06-14") - 0:364,
value = runif(365) + seq(-140, 224)^2 / 10000
)
ggplot(data, aes(x=day, y=value)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") +
theme_ipsum() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(limit=c(as.Date("2017-01-01"),as.Date("2017-02-11"))) +
ylim(0,1.5)
library(dygraphs)
library(xts)
library(tidyverse)
library(lubridate)
data <- read.table("https://python-graph-gallery.com/wp-content/uploads/bike.csv", header=T, sep=",") %>% head(300)
data$datetime <- ymd_hms(data$datetime)
don <- xts(x = data$count, order.by = data$datetime)
dygraph(don) %>%
dyOptions(labelsUTC = TRUE, fillGraph=TRUE, fillAlpha=0.1, drawGrid = FALSE, colors="#D8AE5A") %>%
dyRangeSelector() %>%
dyCrosshair(direction = "vertical") %>%
dyHighlight(highlightCircleSize = 5, highlightSeriesBackgroundAlpha = 0.2, hideOnMouseOut = FALSE) %>%
dyRoller(rollPeriod = 1)