一、概述
气泡图(Bubble Chart)是一种展示三个或更多变量之间关系的二维图表。它与散点图类似,但是在每个数据点处加上了一个圆圈,圆圈的大小和第三个变量相关。因此气泡图不仅可以用于显示两个变量之间的关系,还可以同时显示第三个变量的大小或重要性,增加了数据展示的可读性和信息量。
二、数据集
下面我将以R语言自带的mtcars数据集为例画出气泡图。
首先,我们需要加载ggplot2包和mtcars数据集,并指定想要用到的变量。
2.1 安装及其使用
install.packages("ggplot2")
library(ggplot2)
2.2 读取数据
加载数据
# 加载 R 自带数据集 mtcars
data(mtcars)
str(mtcars)
数据集展示
'data.frame': 32 obs. of 11 variables:
$ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
$ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
$ disp: num 160 160 108 258 360 ...
$ hp : num 110 110 93 110 175 105 245 62 95 123 ...
$ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
$ wt : num 2.62 2.88 2.32 3.21 3.44 ...
$ qsec: num 16.5 17 18.6 19.4 17 ...
$ vs : num 0 0 1 1 0 1 0 1 1 1 ...
$ am : num 1 1 1 0 0 0 0 0 0 0 ...
$ gear: num 4 4 4 3 3 3 3 4 4 4 ...
$ carb: num 4 4 1 1 2 1 4 2 2 4 ...
三、使用用法
3.1 基础方法
ggplot(data = mtcars, aes(x = wt, y = mpg)) + # 构建绘图对象
geom_point(aes(size = hp), alpha = 0.7, color = "#0072B2") + # 添加气泡、大小和透明度,设定颜色
labs(title = "wt vs. mpg (by hp)", x = "Weight", y = "Miles per gallon", size = "Horsepower") + # 设定标题和轴标签
scale_size_continuous(range = c(2, 12)) # 设定气泡大小的范围
3.2 气泡颜色变化
ggplot(data = mtcars, aes(x = wt, y = mpg, size = hp, color = disp))+ # 构建绘图对象,并设置气泡大小和颜色
geom_point(alpha = 0.7, shape = 21, stroke = 0.2)+ # 添加气泡,设定透明度、形状和边缘宽度
labs(title = "wt vs. mpg (by hp and disp)", x = "Weight", y = "Miles per gallon", size = "Horsepower", color = "Displacement")+ # 设定标题和轴标签
scale_size(range = c(2, 12))+ # 设定气泡大小范围
scale_color_gradient(low = "darkblue", high = "red") # 设定颜色范围
3.3 聚类气泡图
解析来展示论文中类似的聚类气泡图,代码和数据集如下:
install.packages("ggplot2")
install.packages("dplyr")
install.packages("ggrepel")
install.packages("gridExtra")
install.packages("ggalt")
library(ggplot2)
library(dplyr)
library(ggrepel)
library(gridExtra)
library(ggalt)
# 导入数据集
data(iris)
head(iris)
# 绘制聚类气泡图
p1 <- ggplot(df, aes(x = x, y = y, color = factor(cluster))) +
geom_point(size = 3, alpha = 0.8) +
scale_color_manual(values = c("#0072B2", "#D55E00", "#009E73")) +
theme_bw()
p2 <- p1 +
stat_ellipse(aes(x = x, y = y, fill = factor(cluster)),
data = df, alpha = 0.2, level = 0.95) +
geom_point(size = 3, alpha = 0.8) +
scale_color_manual(values = c("#0072B2", "#D55E00", "#009E73")) +
theme_bw()
p3 <- p2 +
ggtitle("Cluster Bubble Plot") +
theme(legend.position = "none")
grid.arrange(p3, ncol = 1)
带标签的聚类气泡图
# 数据预处理
df <- iris %>%
select(Sepal.Length, Sepal.Width) %>%
scale()
km_fit <- kmeans(df, 3)
df <- iris %>%
select(Sepal.Length, Sepal.Width) %>%
scale() %>%
bind_cols(cluster = km_fit$cluster) %>%
rename(x = Sepal.Length, y = Sepal.Width)
# 选择要用于聚类的列
cluster_data <- mtcars[, c("mpg", "disp", "hp")]
# 进行k-means聚类,并将聚类结果添加到数据集中
set.seed(1)
mtcars$cluster <- kmeans(cluster_data, centers = 3)$cluster
# 绘制聚类气泡图
p1 <- ggplot(df, aes(x = x, y = y, color = factor(cluster))) +
geom_point(size = 3, alpha = 0.8) +
scale_color_manual(values = c("#0072B2", "#D55E00", "#009E73")) +
theme_bw()
p2 <- p1 +
stat_ellipse(aes(x = x, y = y, fill = factor(cluster)),
data = df, alpha = 0.2, level = 0.95) +
geom_point(size = 3, alpha = 0.8) +
scale_color_manual(values = c("#0072B2", "#D55E00", "#009E73")) +
theme_bw()
p3 <- p2 +
geom_label_repel(aes(label = cluster), size = 3) +
ggtitle("Cluster Bubble Plot") +
theme(legend.position = "none")
grid.arrange(p3, ncol = 1)
✔