Graphing Survival and Hazard Functions
Written by Peter Rosenmai on 11 Apr 2014. Last revised 13 Jun 2015.
Here's some R code to graph the basic survival-analysis
functions—s(t), S(t), f(t), F(t), h(t) or H(t)—derived from any of their definitions.
For example:
t <-
seq(0, 4, 0.01)
hazard_fn <-
function(t){2*t}
y <-
apply_survival_function(t, hazard_fn, supplied_fn_type=
"h", fn_type_to_apply=
"S")
plot(x=t, y=y, xlim=
c(0,
max(t)), ylim=
c(0,
max(y)), main=
"S(t)", ylab=
"Survival Probability", type=
"l")
Note that I supplied h(t), the hazard function, but I graphed S(t), the survival function derived from it.
I could instead have plotted h(t) itself:
y <-
apply_survival_function(t, hazard_fn, supplied_fn_type=
"h", fn_type_to_apply=
"h")
plot(x=t, y=y, xlim=
c(0,
max(t)), ylim=
c(0,
max(y)), main=
"h(t)", ylab=
"Hazard", type=
"l")
Or s(t), the survival event density function:
y <-
apply_survival_function(t, hazard_fn, supplied_fn_type=
"h", fn_type_to_apply=
"s")
plot(x=t, y=y, xlim=
c(0,
max(t)), ylim=
c(
min(y), 0), main=
"s(t)", ylab=
"Survival Event Density", type=
"l")
I could have plotted H(t), the cumulative hazard function derived from h(t):
y <-
apply_survival_function(t, hazard_fn, supplied_fn_type=
"h", fn_type_to_apply=
"H")
plot(x=t, y=y, xlim=
c(0,
max(t)), ylim=
c(0,
max(y)), main=
"H(t)", ylab=
"Cumulative Hazard", type=
"l")
Or I could have plotted f(t), the density function:
y <-
apply_survival_function(t, hazard_fn, supplied_fn_type=
"h", fn_type_to_apply=
"f")
plot(x=t, y=y, xlim=
c(0,
max(t)), ylim=
c(0,
max(y)), main=
"f(t)", ylab=
"Event Density", type=
"l")
And F(t), the cumulative density function:
y <-
apply_survival_function(t, hazard_fn, supplied_fn_type=
"h", fn_type_to_apply=
"F")
plot(x=t, y=y, xlim=
c(0,
max(t)), ylim=
c(0,
max(y)), main=
"F(t)", ylab=
"Lifetime Distribution", type=
"l")
If I wanted, I could have plotted them all together. Here, for instance, are the survival-analysis
functions derived from a constant hazard function h(t)=0.05:
plot_survival_functions <-
function(t, fn, supplied_fn_type){
op <-
par(mfrow=c(2,3))
on.exit(
par(op))
fn_type <-
c(
"h",
"f",
"s",
"H",
"F",
"S")
fn_type_ylab <-
c(
"Hazard",
"Event Density",
"Survival Event Density",
"Cumulative Hazard",
"Lifetime Density",
"Survival Probability")
for (i in 1:
length(fn_type)){
required_fn_type <- fn_type[i]
fn_name <-
paste0(required_fn_type,
"(t)")
y <-
apply_survival_function(t, fn, supplied_fn_type, required_fn_type)
if (required_fn_type ==
"s") ylim <-
c(
min(y, na.rm=TRUE), 0)
else ylim <-
c(0,
max(y, na.rm=TRUE))
print(ylim)
plot(x=t, y=y, xlim=
c(0,
max(t)), ylim=ylim, main=fn_name, ylab=fn_type_ylab[i], type=
"l")
}
}
t <-
seq(0, 100, 0.01)
hazard_fn <-
function(t){
rep(0.05,
length(t))}
plot_survival_functions(t, hazard_fn,
"h")
Relationships Between the Basic Survival-Analysis Functions
I've used the following relationships to derive S(t), f(t), F(t), h(t) and H(t) from each other:
The Code
Here's the code you'll need to run the above examples. Note the use of numerical differentiation and
integration.
require(numDeriv)
apply_survival_function <- function(t, fn, supplied_fn_type, fn_type_to_apply){
integrate_from_0 <- function(fn, t){
int_fn <- function(t){
integrate(fn, 0, t)
}
result <- sapply(t, int_fn)
value <- unlist(result["value",])
msg <- unlist(result["message",])
value[which(msg != "OK")] <- NA
return(value)
}
if (supplied_fn_type == "s"){
if (fn_type_to_apply == "s") y <- fn(t)
if (fn_type_to_apply == "S") y <- 1 + integrate_from_0(fn, t)
if (fn_type_to_apply == "f") y <- -fn(t)
if (fn_type_to_apply == "F") y <- -integrate_from_0(fn, t)
if (fn_type_to_apply == "h") y <- grad(function(t){-log(1 + integrate_from_0(fn, t))} , t)
if (fn_type_to_apply == "H") y <- -log(1 + integrate_from_0(fn, t))
}
if (supplied_fn_type == "S"){
if (fn_type_to_apply == "s") y <- -grad(function(t){1 - fn(t)}, t)
if (fn_type_to_apply == "S") y <- fn(t)
if (fn_type_to_apply == "f") y <- grad(function(t){1 - fn(t)}, t)
if (fn_type_to_apply == "F") y <- 1 - fn(t)
if (fn_type_to_apply == "h") y <- grad(function(t){-log(fn(t))}, t)
if (fn_type_to_apply == "H") y <- -log(fn(t))
}
if (supplied_fn_type == "f"){
if (fn_type_to_apply == "s") y <- -fn(t)
if (fn_type_to_apply == "S") y <- 1 - integrate_from_0(fn, t)
if (fn_type_to_apply == "f") y <- fn(t)
if (fn_type_to_apply == "F") y <- integrate_from_0(fn, t)
if (fn_type_to_apply == "h") y <- grad(function(t){-log(1 - integrate_from_0(fn, t))} , t)
if (fn_type_to_apply == "H") y <- -log(1 - integrate_from_0(fn, t))
}
if (supplied_fn_type == "F"){
if (fn_type_to_apply == "s") y <- -grad(fn, t)
if (fn_type_to_apply == "S") y <- 1 - fn(t)
if (fn_type_to_apply == "f") y <- grad(fn, t)
if (fn_type_to_apply == "F") y <- fn(t)
if (fn_type_to_apply == "h") y <- grad(function(t){-log(1 - fn(t))}, t)
if (fn_type_to_apply == "H") y <- -log(1 - fn(t))
}
if (supplied_fn_type == "h"){
if (fn_type_to_apply == "s") y <- -fn(t) * exp(-integrate_from_0(fn, t))
if (fn_type_to_apply == "S") y <- exp(-integrate_from_0(fn, t))
if (fn_type_to_apply == "f") y <- fn(t) * exp(-integrate_from_0(fn, t))
if (fn_type_to_apply == "F") y <- 1 - exp(-integrate_from_0(fn, t))
if (fn_type_to_apply == "h") y <- fn(t)
if (fn_type_to_apply == "H") y <- integrate_from_0(fn, t)
}
if (supplied_fn_type == "H"){
if (fn_type_to_apply == "s") y <- -exp(-fn(t))
if (fn_type_to_apply == "S") y <- exp(-fn(t))
if (fn_type_to_apply == "f") y <- grad(function(t){1 - exp(-fn(t))}, t)
if (fn_type_to_apply == "F") y <- 1 - exp(-fn(t))
if (fn_type_to_apply == "h") y <- grad(fn, t)
if (fn_type_to_apply == "H") y <- fn(t)
}
return(y)
}
Example: Fitting Kaplan Meier and Cox Proportional Hazards Models to Sample Data from Hazard Functions that Perfectly Satisfy the Proportional Hazards Assumption
I use the following code to investigate how well the Cox proportional hazards model fits a dataset that very well satisfies the proportional hazards assumption. My steps:
-
I create a set of hazard functions that perfectly satisfy the proportional hazards assumption. That is, for any two such functions hi and hj, there exists a constant ci,j such that hi(t) = ci,j hj(t) for all t >=0.
-
I use the apply_survival_function(), defined above, to plot the survival curves derived from those hazard functions.
-
For each of the hazard functions, I use F(t), the cumulative density function to get a sample of time-to-event data from the distribution defined by that hazard function.
-
I fit to that data a Kaplan Meier model and a Cox proportional hazards model—and I plot the associated survival curves.
Here are graphs that result when the n_random_points_per_fn parameter is 10:
And when the n_random_points_per_fn parameter is 50:
And when the n_random_points_per_fn parameter is 10,000:
As you can see, as the sample size increases, the Kaplan Meier curves very closely approximate the survival curves from which the sample was drawn. However, the Cox proportional hazards model shows clear bias, the cause of which escapes me.
Here's the code. Don't forget, you'll need to load into your workspace apply_survival_function(), defined above.
require(numDeriv)
require(survival)
n_random_points_per_fn <- 10000
haz_base <- function(t){2 * t + 0.05 * t ^ 2}
mult <- c(0.5, 2, 3.5, 40, 100)
xlim_max <- 4
haz <- list()
for (i in 1:length(mult)) haz[[i]] <- function(t) haz_base(t) * mult[i]
inverse <- function(fn, min_x, max_x){
fn_inv <- function(y){
uniroot((function(x){fn(x) - y}), lower=min_x, upper=max_x)[1]$root
}
return(Vectorize(fn_inv))
}
df_time_to_event <- NULL
for (i in 1:length(mult)){
cdf_inv <- inverse(function(t){apply_survival_function(t, haz[[i]], supplied_fn_type="h", fn_type_to_apply="F")}, 0, 10000)
df_time_to_event_new <- data.frame("time_to_event"=cdf_inv(runif(n_random_points_per_fn)), "function_number"=rep(i, n_random_points_per_fn))
if (is.null(df_time_to_event)) df_time_to_event <- df_time_to_event_new
else df_time_to_event <- rbind(df_time_to_event, df_time_to_event_new)
}
df_time_to_event$event <- 1
old_par <- par(mfcol=c(3, 1))
add <- FALSE
for (i in 1:length(mult)){
s <- function(t){apply_survival_function(t, haz[[i]], supplied_fn_type="h", fn_type_to_apply="S")}
curve(s, from=0, to=xlim_max, col=i, add=add, main="Survival Functions", xaxs="i", xlab="t", ylab="S(t)")
add <- TRUE
}
title <- paste("Kaplan Meier Survival Curves Using", n_random_points_per_fn, "Random Points from Each Survival Function")
plot(survfit(Surv(time_to_event, event) ~ function_number, df_time_to_event), col=1:length(mult), xlim=c(0, xlim_max), main=title, xlab="t", ylab="S(t)")
coxfit <- coxph(Surv(time_to_event, event) ~ function_number, df_time_to_event)
title <- paste("Cox Proportional Hazards Survival Curves Using", n_random_points_per_fn, "Random Points from Each Survival Function")
plot(survfit(coxfit, newdata=data.frame("function_number"=1:length(mult))), col=1:length(mult), xlim=c(0, xlim_max), main=title, xlab="t", ylab="S(t)")
par(old_par)