如何使用ggplot2在Kaplan-Meier图下面放置“风险数”表

我想使用ggplot2创建一个Kaplan-Meier图,其下面有一个危险数字表,表示每个时间点每个组的风险数(即x轴刻度).风险数量应与相应的价格对齐.左边的风险数字表应该是行名,表示有风险的数字所属的组.

我写了下面的例子.我学习如何确定这个question的风险数字.但是,我不知道如何在Kaplan-Meier情节下面的风险表中创建一个漂亮的,良好对齐的数字.朋友帮助我创建了以下示例中的风险表数量.但是,我的例子的结果是不够的.

library(survival)
library(reshape2)
data(colon)
library(Hmisc)

d <- colon[, Cs(time, status, rx)]
rm(colon)
names(d) <- c("days", "event", "group")
d$group <- ifelse(d$group == "Obs", 1, 2)

fit <- survfit(Surv(days,event)~group, data=d)
diff <- survdiff(Surv(days,event)~group, data=d)

risksets <- with(na.omit(d[, Cs(days, event, group)]), table(group, cut(days, seq(0, max(days), by=365) ) ))
number.at.risk <- sapply(1:nrow(risksets), function(i) Reduce("-",  risksets[i,], init=rowSums(risksets)[i], accumulate=TRUE))
number.at.risk <- data.frame(number.at.risk)
names(number.at.risk) <- c("Group.A", "Group.B")
number.at.risk

###
p.value <- round(1 - pchisq(diff$chisq, 1), digits=4)
p.value <- ifelse(p.value < 0.001, "<0.001", paste("= ", p.value))

d.mortality <- data.frame(time=fit$time, surv=fit$surv, strata=summary(fit, censored=T)$strata)
zeros <- data.frame(time=0, surv=1, strata=unique(d.mortality$strata))
d.mortality <- rbind(d.mortality, zeros)
levels(d.mortality$strata) <- c("Group A", "Group B")
d.mortality$surv <- (1-d.mortality$surv)*100 # event free to events and in %
###
g <- ggplot(d.mortality, aes(time, surv, group=strata)) + 
     geom_step(aes(colour=strata), size=1) +
     theme_bw() + # white background
     theme(
          plot.background = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          legend.position="none",
          axis.line = element_line(color = 'black'),
          axis.text.x = element_text(size=15),
          axis.text.y = element_text(size=15),
          axis.title.x = element_text(size=17, hjust=.5, vjust=.25, face="bold"),
          axis.title.y = element_text(size=17, hjust=.5, vjust=1.5, face="bold"),
          plot.title = element_text(size=20, hjust=-.1, vjust=1, face="bold")
     ) +
     scale_y_continuous("Cumulative event rate [%]", limits=c(0, 60)) + 
     scale_x_continuous("Time [years]", limits=c(0, 1825), breaks=seq(0, 1825, 365), labels=c(0, 1, 2, 3, 4, 5)) +
     annotate("text", x = 1000, y = 45, label = "Group A") +
     annotate("text", x = 1000, y = 30, label = "Group B") +
     annotate("text", x = 1000, y = 55, label = paste("P ", p.value, "by log-rank test", collapse=""))

number.at.risk = number.at.risk[1:6,]
df_nums = melt(number.at.risk)
df_nums$year = 1:6
tbl = ggplot(df_nums, aes(x = year, y = factor(variable), colour = variable,label=value)) +
     geom_text(size = 3.5) + theme(panel.grid.major = element_blank(), legend.position = "none") +      theme_bw() + 
     theme(
          plot.background = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          legend.position="none",
          axis.line = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks=element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          plot.title = element_blank()
     ) + scale_y_discrete(breaks=c("Group.B","Group.A"), labels=c("Number at Risk\nGroup B", "Group A"))

Layout <- grid.layout(nrow = 2, ncol = 1, heights = unit(c(2, 0.55), c("null", "null")))
 grid.show.layout(Layout)
 vplayout <- function(...) {
    grid.newpage()
    pushViewport(viewport(layout = Layout))
}

subplot <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y)
mmplot <- function(a, b) {
     vplayout()
     print(a, vp = subplot(1, 1))
     print(b, vp = subplot(2, 1))
 }

 dev.new()
mmplot(g, tbl)

《如何使用ggplot2在Kaplan-Meier图下面放置“风险数”表》

更新#1

正如所建议的那样,我使用gtable来得到数字.我对变体a(来自baptiste的示例代码)的布局不满意,所以我尝试了其他的东西.但是,版本B确实有另一个缺点:标签位于主图的绘图层的x维度内.

a)如何创建具有良好对齐风险数字的合理布局数字.

b)此外,如何在主要情节和桌子之间放置“有风险的数字”的标题?标题“有风险的数字”应与tbl的“A组”和“B组”标签的左端对齐.

c)tbl中风险数字的字体大小以及相应的标签“A组”和“B组”应与主图中的刻度标签相同.我怎样才能做到这一点?

library(survival)
library(reshape2)
data(colon)
library(Hmisc)

d <- colon[, Cs(time, status, rx)]
rm(colon)
names(d) <- c("days", "event", "group")
d$group <- ifelse(d$group == "Obs", 1, 2)

fit <- survfit(Surv(days,event)~group, data=d)
diff <- survdiff(Surv(days,event)~group, data=d)

risksets <- with(na.omit(d[, Cs(days, event, group)]), table(group, cut(days, seq(0, max(days), by=365) ) ))
number.at.risk <- sapply(1:nrow(risksets), function(i) Reduce("-",  risksets[i,], init=rowSums(risksets)[i], accumulate=TRUE))
number.at.risk <- data.frame(number.at.risk)
names(number.at.risk) <- c("Group.A", "Group.B")
number.at.risk

###
p.value <- round(1 - pchisq(diff$chisq, 1), digits=4)
p.value <- ifelse(p.value < 0.001, "<0.001", paste("= ", p.value))

d.mortality <- data.frame(time=fit$time, surv=fit$surv, strata=summary(fit, censored=T)$strata)
zeros <- data.frame(time=0, surv=1, strata=unique(d.mortality$strata))
d.mortality <- rbind(d.mortality, zeros)
levels(d.mortality$strata) <- c("Group A", "Group B")
d.mortality$surv <- (1-d.mortality$surv)*100 # event free to events and in %
###
g <- ggplot(d.mortality, aes(time, surv, group=strata)) + 
     geom_step(aes(colour=strata), size=1) +
#           theme_bw() + # white background
     theme(
          plot.background = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          legend.position="none",
          axis.line = element_line(color = 'black'),
          axis.text.x = element_text(size=15),
          axis.text.y = element_text(size=15),
          axis.title.x = element_text(size=17, hjust=.5, vjust=.25, face="bold"),
          axis.title.y = element_text(size=17, hjust=.5, vjust=4, face="bold"),
          plot.title = element_text(size=20, hjust=-.1, vjust=1, face="bold")
     ) +
     scale_y_continuous("Cumulative event rate [%]", limits=c(0, 60)) + 
     scale_x_continuous("Time [years]", limits=c(0, 1825), breaks=seq(0, 1825, 365), labels=c(0, 1, 2, 3, 4, 5)) +
     annotate("text", x = 1000, y = 45, label = "Group A") +
     annotate("text", x = 1000, y = 30, label = "Group B") +
     annotate("text", x = 1000, y = 55, label = paste("P ", p.value, "by log-rank test", collapse=""))

number.at.risk = number.at.risk[1:6,]
df_nums = melt(number.at.risk)
df_nums$year = 1:6
str(df_nums)

tbl <- ggplot(df_nums, aes(x = year, y = factor(variable), colour = variable, label=value)) +
     geom_text() +
#           theme_bw() + 
     theme(
          panel.grid.major = element_blank(), 
          legend.position = "none",
          plot.background = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          legend.position="none",
          axis.line = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks=element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          plot.title = element_blank()
     ) + 
     scale_y_discrete(breaks=c("Group.B","Group.A"), labels=c("Group B", "Group A"))

library(gtable)

# Version A
both = rbind(ggplotGrob(g), ggplotGrob(tbl), size="last")
grid.newpage()
grid.draw(both)

# Version B
a <- gtable(unit(15, c("cm")), unit(c(10,3), "cm"))
a <- gtable_add_grob(a, ggplotGrob(g), 1, 1)
a <- gtable_add_grob(a, ggplotGrob(tbl), 2, 1)
grid.newpage()
grid.draw(a)

版本#1(风险数字与主要情节的x轴刻度很好地对齐但布局不好

《如何使用ggplot2在Kaplan-Meier图下面放置“风险数”表》

版本#2(拧紧的对齐但更好的布局)

《如何使用ggplot2在Kaplan-Meier图下面放置“风险数”表》

更新#2

现在它几乎是完美的.两件小事:

a)如何在图中添加标题(用GIMP完成)“风险数”,如下图所示?

b)为什么B组在A组以上的表中?组A的df_nums中的标签为1,组B组中的标签为2.如何在风险表中的组中将组A设置为B组以上?

> str(df_nums$variable)
 Factor w/ 2 levels "Group.A","Group.B": 1 1 1 1 1 1 2 2 2 2 ...

这里更新的代码:

library(survival)
library(reshape2)
data(colon)
library(Hmisc)

d <- colon[, Cs(time, status, rx)]
rm(colon)
names(d) <- c("days", "event", "group")
d$group <- ifelse(d$group == "Obs", 1, 2)

fit <- survfit(Surv(days,event)~group, data=d)
diff <- survdiff(Surv(days,event)~group, data=d)

risksets <- with(na.omit(d[, Cs(days, event, group)]), table(group, cut(days, seq(0, max(days), by=365) ) ))
number.at.risk <- sapply(1:nrow(risksets), function(i) Reduce("-",  risksets[i,], init=rowSums(risksets)[i], accumulate=TRUE))
number.at.risk <- data.frame(number.at.risk)
names(number.at.risk) <- c("Group.A", "Group.B")
number.at.risk

###
p.value <- round(1 - pchisq(diff$chisq, 1), digits=4)
p.value <- ifelse(p.value < 0.001, "<0.001", paste("= ", p.value))

d.mortality <- data.frame(time=fit$time, surv=fit$surv, strata=summary(fit, censored=T)$strata)
zeros <- data.frame(time=0, surv=1, strata=unique(d.mortality$strata))
d.mortality <- rbind(d.mortality, zeros)
levels(d.mortality$strata) <- c("Group A", "Group B")
d.mortality$surv <- (1-d.mortality$surv)*100 # event free to events and in %
###
g <- ggplot(d.mortality, aes(time, surv, group=strata)) + 
     geom_step(aes(colour=strata), size=1) +
#           theme_bw() + # white background
     theme(
          plot.background = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          legend.position="none",
          axis.line = element_line(color = 'black'),
          axis.text.x = element_text(size=15),
          axis.text.y = element_text(size=15),
          axis.title.x = element_text(size=17, hjust=.5, vjust=.25, face="bold"),
          axis.title.y = element_text(size=17, hjust=.5, vjust=4, face="bold"),
          plot.title = element_text(size=20, hjust=-.1, vjust=1, face="bold")
     ) +
     scale_y_continuous("Cumulative event rate [%]", limits=c(0, 60)) + 
     scale_x_continuous("Time [years]", limits=c(0, 1825), breaks=seq(0, 1825, 365), labels=c(0, 1, 2, 3, 4, 5)) +
     annotate("text", x = 1000, y = 45, label = "Group A") +
     annotate("text", x = 1000, y = 30, label = "Group B") +
     annotate("text", x = 1000, y = 55, label = paste("P ", p.value, "by log-rank test", collapse=""))

number.at.risk = number.at.risk[1:6,]
df_nums = melt(number.at.risk)
str(df_nums$variable)
df_nums
df_nums$year = 1:6
str(df_nums)

tbl <- ggplot(df_nums, aes(x = year, y = factor(variable), colour = variable, label=value)) +
     geom_text() +
#           theme_bw() + 
     theme(
          panel.grid.major = element_blank(), 
          legend.position = "none",
          plot.background = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          legend.position="none",
          axis.line = element_blank(),
          axis.text.x = element_blank(),
          axis.text.y = element_text(size=15, face="bold", color = 'black'),
          axis.ticks=element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          plot.title = element_blank()
     ) + 
     scale_y_discrete(breaks=c("Group.A", "Group.B"), labels=c("Group A", "Group B"))

library(gtable)

# Version C
both = rbind(ggplotGrob(g), ggplotGrob(tbl), size="last")
panels <- both$layout$t[grep("panel", both$layout$name)]
both$heights[panels] <- list(unit(1,"null"), unit(2, "lines"))
grid.newpage()
grid.draw(both)

《如何使用ggplot2在Kaplan-Meier图下面放置“风险数”表》

最佳答案 你能做到的

both = rbind(ggplotGrob(g), ggplotGrob(tbl), size="last")
panels <- both$layout$t[grep("panel", both$layout$name)]
both$heights[panels] <- list(unit(1,"null"), unit(2, "lines"))
both <- gtable_add_rows(both, heights = unit(1,"line"), 8)
both <- gtable_add_grob(both, 
                        textGrob("Number at risk", hjust=0, x=0), 
                        t=9, l=2, r=4)
grid.newpage()
grid.draw(both)
点赞