ggplot2常用图表

R语言
可视化
ggplot2常用图表
作者

不止BI

发布于

2024年4月1日

配对

代码
library(tidyverse)
library(ggtext)
library(here)
#' https://ourworldindata.org/working-hours
df <- read_csv(here(
  "docs/blog/posts/ggplot2",
  "annual-working-hours-per-worker.csv"
))
df <- rename(df, Hours = 4)


highlight_countries <- c(
  "Vietnam",
  "Norway",
  "Germany",
  "China",
  "Bangladesh",
  "Ireland",
  "South Korea",
  "Singapore",
  "Myanmar",
  "Greece"
)

df %>%
  filter(Year == 1970 | Year == max(Year)) %>%
  group_by(Entity) %>%
  filter(n() == 2) %>%
  mutate(
    max_value_year = which.max(Hours),
    trend = ifelse(max_value_year == 1, "decrease", "increase")
  ) %>%
  ungroup() %>%
  mutate(
    highlight = case_when(
      !(Entity %in% highlight_countries) ~ "other",
      Entity %in% c("Myanmar", "Singapore") ~ "same",
      TRUE ~ trend
    )
  ) %>%
  ggplot(aes(factor(Year), Hours, group = Entity, color = highlight)) +
  geom_line(aes(size = ifelse(highlight == "other", 0.1, 0.7))) +
  # use 2 geoms to make sure highlighted countries' dots are placed on top
  geom_point(data = . %>% filter(highlight == "other"), size = 0.2) +
  geom_point(data = . %>% filter(highlight != "other")) +
  ggrepel::geom_text_repel(
    data = . %>% filter(highlight != "other"),
    aes(
      x = ifelse(Year == min(Year), 1 - 0.35, 2 + 0.35),
      label = glue::glue("{Entity} ({scales::number(Hours, accuracy = 1)})"),
      hjust = ifelse(Year == min(Year), 1, 0)
    ),
    size = 2.5,
    nudge_x = 0,
    direction = "y",
    family = "Fira Sans",
    segment.size = 0
  ) +
  scale_x_discrete(position = "top") +
  scale_size_identity() +
  coord_cartesian(clip = "off") +
  scale_color_manual(
    values = c(
      "other" = "grey60",
      "decrease" = "#092044",
      "increase" = "#C33C2E",
      "same" = colorspace::darken("#F0C94C", 0.2)
    )
  ) +
  guides(col = "none") +
  labs(
    title = "1970 和 2017 年平均每年工作时长",
    subtitle = "大多数国家的工时都 <b style='color:#092044'>下降</b> 了,这可以被解读为进步的标志。
      值得注意的例外是工时 <b style='color:#C33C2E'>上升</b> 的孟加拉国和中国。
      各国之间的工作时间仍然存在巨大差异。",
    caption = "**数据来源:** Huberman & Minns (2007); PWT 9.1 (2019), Our World in Data "
  ) +
  theme_minimal(base_family = "Fira Sans", base_size = 8) +
  theme(
    plot.background = element_rect(color = NA, fill = "white"),
    panel.grid = element_blank(),
    panel.grid.major.x = element_line(color = "#ECEEF2", size = 5),
    text = element_text(color = "#555555"),
    axis.title = element_blank(),
    axis.text.x = element_text(size = 12, face = "bold", color = "grey38"),
    axis.text.y = element_blank(),
    plot.margin = margin(t = 6, l = 16, r = 16, b = 4),
    plot.title = element_text(
      family = "Playfair Display",
      size = 14,
      color = "grey12"
    ),
    plot.subtitle = element_textbox_simple(
      margin = margin(t = 6, b = 12)
    ),
    plot.caption = element_markdown(
      hjust = 0,
      margin = margin(t = 8)
    )
  )
图 1
a = 1

日历

代码
# 加载tidyverse并查看数据
library(tidyverse)
flights <- nycflights13::flights

# 按日期统计航班量
date_counts <- flights %>%
  mutate(date = make_date(year, month, day)) %>%
  count(date)

# 添加日期属性(中文月份和星期)
date_counts_w_labels <- date_counts %>%
  mutate(
    day = mday(date),
    # 设置locale显示中文月份和星期
    month = month(date, label = TRUE, abbr = FALSE),
    wday = wday(date, label = TRUE, week_start = 1),
    week = stringi::stri_datetime_fields(date)$WeekOfMonth
  )

# 创建第一个分面图(已适配中文显示)
labels_color <- 'grey30'
scheduled_color <- '#009E73'

date_counts_w_labels %>%
  ggplot(aes(wday, 7 - week)) +
  geom_tile(
    aes(fill = n),
    col = labels_color
  ) +
  facet_wrap(vars(month), ncol = 3) + # 分面标签自动显示中文月份
  coord_equal(expand = FALSE) +
  scale_fill_gradient(
    high = scheduled_color,
    low = colorspace::lighten(scheduled_color, 0.9),
    name = "定期航班数量" # 图例标题
  ) +
  theme_void() +
  labs(
    title = '周六离港航班较少', # 主标题
    subtitle = '基于2013年336,776个定期航班数据', # 副标题
    fill = '定期航班数量', # 图例标签
    caption = '数据来源:{nycflights13} R包 '
  ) +
  theme(
    legend.position = 'top',
    # 添加中文字体支持(需系统已安装)
    text = element_text(family = "SimHei", size = 12)
  ) +
  guides(
    fill = guide_colorbar(
      barwidth = unit(15, 'cm'),
      barheight = unit(0.3, 'cm'),
      title.position = 'top',
      title.hjust = 0,
      title.vjust = 0,
      frame.colour = labels_color
    )
  )
图 2

条形图

library(tidyverse)
set.seed(23445)

# 创建基础数据
manufacturers <- mpg |>
  janitor::clean_names() |>
  mutate(manufacturer = str_to_title(manufacturer))

selected_manufacturers <- manufacturers |>
  filter(
    manufacturer %in% sample(unique(manufacturer), size = 6)
  )

# 基础条形图
selected_manufacturers |>
  ggplot(aes(x = manufacturer)) +
  geom_bar(fill = 'dodgerblue4') +
  labs(
    x = element_blank(),
    y = 'Number of cars',
    title = 'Selected brands in the {mpg} dataset'
  )

# 按频率排序
selected_manufacturers |>
  mutate(manufacturer = fct_infreq(manufacturer)) |>
  ggplot(aes(x = manufacturer)) +
  geom_bar(fill = 'dodgerblue4') +
  labs(
    x = element_blank(),
    y = 'Number of cars',
    title = 'Selected brands in the {mpg} dataset'
  )

# 水平条形图
horizontal_bars <- manufacturers |>
  mutate(
    manufacturer = fct_infreq(manufacturer) |> fct_rev()
  ) |>
  ggplot(aes(y = manufacturer)) +
  geom_bar(fill = 'dodgerblue4') +
  labs(
    y = element_blank(),
    x = 'Number of cars',
    title = 'Number of cars in the {mpg} dataset'
  )
horizontal_bars

# 增大文本
larger_text <- horizontal_bars +
  theme_grey(base_size = 14) +
  theme(plot.title = element_text(size = rel(1.1)))
larger_text

# 删除标签周围多余间距
horizontal_bars_no_spacing <- larger_text +
  scale_x_continuous(expand = expansion(mult = c(0, 0.01)))
horizontal_bars_no_spacing

# 删除杂乱
no_y_grid_plot <- horizontal_bars_no_spacing +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
  )
no_y_grid_plot

# 直接标签
counts_manufacturer <- count(manufacturers, manufacturer)

no_y_grid_plot +
  geom_text(
    data = counts_manufacturer,
    mapping = aes(x = n, y = manufacturer, label = n),
    hjust = 1,
    nudge_x = -0.25,
    color = 'white'
  ) +
  geom_vline(xintercept = 0) +
  scale_x_continuous(breaks = NULL, expand = expansion(mult = c(0, 0.01))) +
  labs(x = element_blank()) +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )

# 将y轴标签纳入条形内
counts_manufacturer <- counts_manufacturer |>
  mutate(
    manufacturer_label = case_when(
      manufacturer == 'Land Rover' ~ 'L. Rover',
      manufacturer == 'Lincoln' ~ 'Linc.',
      T ~ manufacturer
    )
  )
no_y_grid_plot +
  geom_text(
    data = counts_manufacturer,
    mapping = aes(x = n, y = manufacturer, label = n),
    hjust = 1,
    nudge_x = -0.1,
    color = 'white',
    fontface = 'bold',
    size = 4.5
  ) +
  geom_text(
    data = counts_manufacturer,
    mapping = aes(x = 0, y = manufacturer, label = manufacturer_label),
    hjust = 0,
    nudge_x = 0.25,
    color = 'white',
    fontface = 'bold',
    size = 4.5
  ) +
  geom_vline(xintercept = 0) +
  scale_x_continuous(breaks = NULL, expand = expansion(mult = c(0, 0.01))) +
  scale_y_discrete(breaks = NULL) +
  labs(x = element_blank()) +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )

manufacturers |>
  mutate(manufacturer = fct_infreq(manufacturer) |> fct_rev()) |>
  ggplot(aes(y = manufacturer)) +
  geom_bar(
    just = 1,
    fill = 'dodgerblue4',
    width = 0.4
  ) +
  geom_text(
    data = counts_manufacturer,
    mapping = aes(
      x = n,
      y = manufacturer,
      label = n
    ),
    hjust = 1,
    vjust = 0,
    nudge_y = 0.1,
    color = 'grey30',
    fontface = 'bold',
    size = 5.5
  ) +
  geom_text(
    data = counts_manufacturer,
    mapping = aes(
      x = 0,
      y = manufacturer,
      label = manufacturer_label
    ),
    hjust = 0,
    vjust = 0,
    nudge_y = 0.1,
    nudge_x = 0.05,
    color = 'grey30',
    fontface = 'bold',
    size = 5.5
  ) +
  labs(
    y = element_blank(),
    x = 'Number of cars',
    title = 'Number of cars in the {mpg} dataset'
  ) +
  scale_x_continuous(expand = expansion(mult = c(0, 0.01))) +
  scale_y_discrete(breaks = NULL) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(size = rel(1.1)),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank()
  ) +
  geom_vline(xintercept = 0) +
  scale_x_continuous(breaks = NULL, expand = expansion(mult = c(0, 0.01))) +
  labs(x = element_blank()) +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )

回到顶部