ggplot2分组箱线图添加均值
ggplot2分组箱线图添加均值
一、不分组的简单箱线图加均值
代码如下(示例):
# Library
library(ggplot2)
# create data
names=c(rep("A", 20) , rep("B", 8) , rep("C", 30), rep("D", 80))
value=c( sample(2:5, 20 , replace=T) , sample(4:10, 8 , replace=T), sample(1:7, 30 , replace=T), sample(3:8, 80 , replace=T) )
data=data.frame(names,value)
# plot
p <- ggplot(data, aes(x=names, y=value, fill=names)) +
geom_boxplot(alpha=0.7) +
stat_summary(fun="mean", geom="point", shape=20, size=2.5, color="red", fill="red",alpha=0.7) +
theme(legend.position="none") +
scale_fill_brewer(palette="Accent")
p
stat_summary
用到了stat_summary函数算均值,具体介绍一下
代码如下(示例):(图片略,直接跑可以跑出来)
library(tidyverse)
library(gapminder)
#----------------------
gapminder %>%
ggplot(aes(x = year, y = lifeExp)) +
stat_summary(fun = "mean", geom = "point") + #对lifeExp求平均,用点点显示
stat_summary(fun = "mean", geom = "line") #对lifeExp求平均,用线显示
#----------------------
gapminder %>%
ggplot(aes(x = year, y = lifeExp)) +
stat_summary(fun = "median", geom = "bar")#对lifeExp求中位数,用bar显示
#----------------------
gapminder %>%
mutate(year = as.integer(year)) %>%
ggplot(aes(x = year, y = lifeExp)) +
stat_summary(fun = "mean", geom = "area",#对lifeExp求平均,用面积覆盖显示
fill = "#EB5286",
alpha = .5) +
stat_summary(fun = "mean", geom = "point",#对lifeExp求平均,用点点显示
color = "#6F213F")
#----------------------
gapminder %>%
ggplot(aes(x = year, y = lifeExp)) +
stat_summary(fun = "mean", #点是均值
geom = "pointrange",#点和范围线
fun.min = min,#对应范围是最大值和最小值
fun.max = max)
#----------------------
gapminder %>%
ggplot(aes(x = year, y = lifeExp)) +
stat_summary(geom = "errorbar",#使用经典的误差线来显示最大值和最小值
width = 1,#fun缺省
fun.min = min,
fun.max = max)
#----------------------
gapminder %>%
ggplot(aes(x = year, y = lifeExp)) +
stat_summary(fun =" mean",
geom = "pointrange",
fun.max = function(x) mean(x) + sd(x),
fun.min = function(x) mean(x) - sd(x))
#fun.data="mean_sd"在ggpubr包里
gapminder %>%
ggplot(aes(x = year, y = lifeExp)) +
stat_summary(fun.data = "mean_sd",geom = "pointrange")
#以上两种作用是一样的
gapminder %>%
ggplot(aes(x = year, y = lifeExp)) +
stat_summary(fun="mean",geom="bar")+
stat_summary(fun.data = "mean_sd",geom = "errorbar",color="red")
#----------------------
#标准差
gapminder %>%
ggplot(aes(x = year, y = lifeExp)) +
stat_summary(fun = "mean",
geom = "pointrange",
fun.max = function(x) mean(x) + sd(x) / sqrt(length(x)),
fun.min = function(x) mean(x) - sd(x) / sqrt(length(x)))
#----------------------
library(Hmisc)
gapminder %>%
ggplot(aes(x = year, y = lifeExp)) +
stat_summary(fun.data = "mean_cl_normal",#置信区间
fun.args = list(conf.int = .99))#设置是99%质心区间
#置信区间的误差线
gapminder %>%
ggplot(aes(x = year, y = lifeExp)) +
stat_summary(fun.data = "mean_cl_normal",
geom = "errorbar",
width = .4) +
stat_summary(fun = "mean", geom = "point")
#----------------------
#均值加置信区间
gapminder %>%
filter(year == 2007) %>%
ggplot(aes(x = continent, y = lifeExp)) +
stat_summary(fun = "mean", geom = "bar", alpha = .7) +
stat_summary(fun = "mean", geom = "point",
size = 1) +
stat_summary(fun.data = "mean_cl_normal",
geom = "errorbar",
width = .2)
#----------------------
#用mean_cl_bool对mpg进行运算,返回均值,最大值,最小值3个向量组成的矩阵
#----------------------
#position = position_dodge( )并排显示多个条形图
#第一幅图的绘图命令中用到了参数position="dodge",第二幅图的绘图命令中用到的参数是position=position_dodge()。
#这是因为position="dodge"是参数默认为0.9的position_dodge()的简写。当我们需要单独指定该参数的时候,必须输入完整的命令。
colors <-c("#E41A1C","#1E90FF","#FF8C00","#4DAF4A","#984EA3",
"#40E0D0","#FFC0CB","#00BFFF","#FFDEAD","#90EE90",
"#EE82EE","#00FFFF","#F0A3FF", "#0075DC",
"#993F00","#4C005C","#2BCE48","#FFCC99",
"#808080","#94FFB5","#8F7C00","#9DCC00",
"#C20088","#003380","#FFA405","#FFA8BB",
"#426600","#FF0010","#5EF1F2","#00998F",
"#740AFF","#990000","#FFFF00")
gapminder %>%
mutate(
year = as.factor(year)
) %>%
ggplot(aes(x = continent, y = lifeExp, fill = year)) +
stat_summary(fun = "mean", geom = "bar",
alpha = .7, position = position_dodge(0.95)) +#组间的距离
stat_summary(fun = "mean", geom = "point",
position = position_dodge(0.95),
size = 1) +
stat_summary(fun.data = "mean_cl_normal",
geom = "errorbar",
position = position_dodge(0.95),
width = .2) +
scale_fill_manual(values = colors)+ ###这个可以这样子改颜色!!!
theme_minimal()+ #去掉黑背景还蛮好看
scale_y_continuous(expand=c(0,5))+scale_x_discrete(expand=c(0.2,0))
#离散型scale_y_discrete 或者scale_x_discrete就是用于更改坐标轴两边的留白的
参考: 通过ggplot2中stat_summary函数快速进行数据统计.
以及 stat_summary.
分组箱线图
所需数据格式(长数据)
p1 <- ggplot(baidata, aes(x=组别, y=值, fill=组别)) +
facet_wrap(~状态)+ labs (y="白细胞")+ #分面按照基线7天14天
ggtitle("不同组别不同时间点白细胞变化") +
theme(plot.title = element_text(hjust = 0.5,size = 10)) +
scale_fill_manual(values=c("lightgoldenrod1","lavender"))+
geom_boxplot( color="azure4",outlier.colour="red",
outlier.fill="red",outlier.size=1,outlier.alpha=0#notch=TRUE,notchwidth = 0.8
)+ theme(text = element_text(family = "wqy-microhei"))
#outlier.alpha=0异常值透明度为0,就是不显示异常值
p1
p1+stat_summary(fun=mean,geom="point",color="red",alpha=0.5,size=1)