Graphing Survival and Hazard Functions

Here's some R code to graph the basic survival-analysis functions—s(t), S(t), f(t), F(t), h(t) or H(t)—derived from any of their definitions.

For example:

# Define time points at which to plot
t <- seq(0, 4, 0.01)

# Define h(t), the hazard function
hazard_fn <- function(t){2*t}

# Graph S(t), the survival function derived from h(t)
y <- apply_survival_function(t, hazard_fn, supplied_fn_type="h", fn_type_to_apply="S")
plot(x=t, y=y, xlim=c(0, max(t)), ylim=c(0, max(y)), main="S(t)", ylab="Survival Probability", type="l")
S(t)

Note that I supplied h(t), the hazard function, but I graphed S(t), the survival function derived from it.

I could instead have plotted h(t) itself:

y <- apply_survival_function(t, hazard_fn, supplied_fn_type="h", fn_type_to_apply="h")
plot(x=t, y=y, xlim=c(0, max(t)), ylim=c(0, max(y)), main="h(t)", ylab="Hazard", type="l")
h(t)

Or s(t), the survival event density function:

y <- apply_survival_function(t, hazard_fn, supplied_fn_type="h", fn_type_to_apply="s")
plot(x=t, y=y, xlim=c(0, max(t)), ylim=c(min(y), 0), main="s(t)", ylab="Survival Event Density", type="l")
s(t)

I could have plotted H(t), the cumulative hazard function derived from h(t):

y <- apply_survival_function(t, hazard_fn, supplied_fn_type="h", fn_type_to_apply="H")
plot(x=t, y=y, xlim=c(0, max(t)), ylim=c(0, max(y)), main="H(t)", ylab="Cumulative Hazard", type="l")
H(t)

Or I could have plotted f(t), the density function:

y <- apply_survival_function(t, hazard_fn, supplied_fn_type="h", fn_type_to_apply="f")
plot(x=t, y=y, xlim=c(0, max(t)), ylim=c(0, max(y)), main="f(t)", ylab="Event Density", type="l")
f(t)

And F(t), the cumulative density function:

y <- apply_survival_function(t, hazard_fn, supplied_fn_type="h", fn_type_to_apply="F")
plot(x=t, y=y, xlim=c(0, max(t)), ylim=c(0, max(y)), main="F(t)", ylab="Lifetime Distribution", type="l")
F(t)

If I wanted, I could have plotted them all together. Here, for instance, are the survival-analysis functions derived from a constant hazard function h(t)=0.05:

plot_survival_functions <- function(t, fn, supplied_fn_type){
   op <- par(mfrow=c(2,3))
   on.exit(par(op))

   fn_type      <- c("h", "f", "s", "H", "F", "S")
   fn_type_ylab <- c("Hazard", "Event Density", "Survival Event Density", "Cumulative Hazard", "Lifetime Density", "Survival Probability")
   for (i in 1:length(fn_type)){
      required_fn_type <- fn_type[i]
      fn_name <- paste0(required_fn_type, "(t)")
      y <- apply_survival_function(t, fn, supplied_fn_type, required_fn_type)
      if (required_fn_type == "s") ylim <- c(min(y, na.rm=TRUE), 0)
      else                         ylim <- c(0, max(y, na.rm=TRUE))
      print(ylim)
      plot(x=t, y=y, xlim=c(0, max(t)), ylim=ylim, main=fn_name, ylab=fn_type_ylab[i], type="l")
   }
}
t <- seq(0, 100, 0.01)
hazard_fn <- function(t){rep(0.05, length(t))}
plot_survival_functions(t, hazard_fn, "h")
s(t), S(t), f(t), F(t), h(t) and H(t)

Relationships Between the Basic Survival-Analysis Functions

I've used the following relationships to derive S(t), f(t), F(t), h(t) and H(t) from each other:

Survival-analysis equations

The Code

Here's the code you'll need to run the above examples. Note the use of numerical differentiation and integration.

require(numDeriv)

apply_survival_function <- function(t, fn, supplied_fn_type, fn_type_to_apply){
   integrate_from_0 <- function(fn, t){
      int_fn <- function(t){
         integrate(fn, 0, t)
      }

      result <- sapply(t, int_fn)
      value  <- unlist(result["value",])
      msg    <- unlist(result["message",])
      value[which(msg != "OK")] <- NA
      return(value)
   }

   if (supplied_fn_type == "s"){
      if (fn_type_to_apply == "s") y <- fn(t)
      if (fn_type_to_apply == "S") y <- 1 + integrate_from_0(fn, t)
      if (fn_type_to_apply == "f") y <- -fn(t)
      if (fn_type_to_apply == "F") y <- -integrate_from_0(fn, t)
      if (fn_type_to_apply == "h") y <- grad(function(t){-log(1 + integrate_from_0(fn, t))} , t)
      if (fn_type_to_apply == "H") y <- -log(1 + integrate_from_0(fn, t))
   }

   if (supplied_fn_type == "S"){
      if (fn_type_to_apply == "s") y <- -grad(function(t){1 - fn(t)}, t)
      if (fn_type_to_apply == "S") y <- fn(t)
      if (fn_type_to_apply == "f") y <- grad(function(t){1 - fn(t)}, t)
      if (fn_type_to_apply == "F") y <- 1 - fn(t)
      if (fn_type_to_apply == "h") y <- grad(function(t){-log(fn(t))}, t)
      if (fn_type_to_apply == "H") y <- -log(fn(t))
   }

   if (supplied_fn_type == "f"){
      if (fn_type_to_apply == "s") y <- -fn(t)
      if (fn_type_to_apply == "S") y <- 1 - integrate_from_0(fn, t)
      if (fn_type_to_apply == "f") y <- fn(t)
      if (fn_type_to_apply == "F") y <- integrate_from_0(fn, t)
      if (fn_type_to_apply == "h") y <- grad(function(t){-log(1 - integrate_from_0(fn, t))} , t)
      if (fn_type_to_apply == "H") y <- -log(1 - integrate_from_0(fn, t))
   }

   if (supplied_fn_type == "F"){
      if (fn_type_to_apply == "s") y <- -grad(fn, t)
      if (fn_type_to_apply == "S") y <- 1 - fn(t)
      if (fn_type_to_apply == "f") y <- grad(fn, t)
      if (fn_type_to_apply == "F") y <- fn(t)
      if (fn_type_to_apply == "h") y <- grad(function(t){-log(1 - fn(t))}, t)
      if (fn_type_to_apply == "H") y <- -log(1 - fn(t))
   }

   if (supplied_fn_type == "h"){
      if (fn_type_to_apply == "s") y <- -fn(t) * exp(-integrate_from_0(fn, t))
      if (fn_type_to_apply == "S") y <- exp(-integrate_from_0(fn, t))
      if (fn_type_to_apply == "f") y <- fn(t) * exp(-integrate_from_0(fn, t))
      if (fn_type_to_apply == "F") y <- 1 - exp(-integrate_from_0(fn, t))
      if (fn_type_to_apply == "h") y <- fn(t)
      if (fn_type_to_apply == "H") y <- integrate_from_0(fn, t)
   }

   if (supplied_fn_type == "H"){
      if (fn_type_to_apply == "s") y <- -exp(-fn(t))
      if (fn_type_to_apply == "S") y <- exp(-fn(t))
      if (fn_type_to_apply == "f") y <- grad(function(t){1 - exp(-fn(t))}, t)
      if (fn_type_to_apply == "F") y <- 1 - exp(-fn(t))
      if (fn_type_to_apply == "h") y <- grad(fn, t)
      if (fn_type_to_apply == "H") y <- fn(t)
   }

   return(y)
}

Example: Fitting Kaplan Meier and Cox Proportional Hazards Models to Sample Data from Hazard Functions that Perfectly Satisfy the Proportional Hazards Assumption

I use the following code to investigate how well the Cox proportional hazards model fits a dataset that very well satisfies the proportional hazards assumption. My steps:

  1. I create a set of hazard functions that perfectly satisfy the proportional hazards assumption. That is, for any two such functions hi and hj, there exists a constant ci,j such that hi(t) = ci,j hj(t) for all t >=0.

  2. I use the apply_survival_function(), defined above, to plot the survival curves derived from those hazard functions.

  3. For each of the hazard functions, I use F(t), the cumulative density function to get a sample of time-to-event data from the distribution defined by that hazard function.

  4. I fit to that data a Kaplan Meier model and a Cox proportional hazards model—and I plot the associated survival curves.

Here are graphs that result when the n_random_points_per_fn parameter is 10:

Plots of survival functions and of Kaplan Meier and Cox proportional hazards survival curves based on a sample of size 10.

And when the n_random_points_per_fn parameter is 50:

Plots of survival functions and of Kaplan Meier and Cox proportional hazards survival curves based on a sample of size 50.

And when the n_random_points_per_fn parameter is 10,000:

Plots of survival functions and of Kaplan Meier and Cox proportional hazards survival curves based on a sample of size 10,000.

As you can see, as the sample size increases, the Kaplan Meier curves very closely approximate the survival curves from which the sample was drawn. However, the Cox proportional hazards model shows clear bias, the cause of which escapes me.

Here's the code. Don't forget, you'll need to load into your workspace apply_survival_function(), defined above.

require(numDeriv)
require(survival)

#
# Parameters.
#

# The number of time-to-event points to sample from the distribution defined by each hazard function.
n_random_points_per_fn <- 10000

# The base hazard function.
haz_base <- function(t){2 * t + 0.05 * t ^ 2}

# These define the hazard functions as multiples at each time t of haz_base(t).
mult <- c(0.5, 2, 3.5, 40, 100)

# The x-maximum (time) for the survival curves.
xlim_max <- 4

#
# Define haz, a list of hazard functions such that haz[[i]](t) = mult[i] * haz_base(t) for t >= 0.
# These perfectly satisfy the proportional hazards assumption.
#

haz <- list()
for (i in 1:length(mult)) haz[[i]] <- function(t) haz_base(t) * mult[i]

#
# Store in dataframe df_time_to_event random time-to-event values from haz, the list of hazard functions.
#

inverse <- function(fn, min_x, max_x){
   # Returns the inverse of a function for a given range.
   # E.g. inverse(sin, 0, pi/2)(sin(pi/4)) equals pi/4 because 0 <= pi/4 <= pi/2

   fn_inv <- function(y){
      uniroot((function(x){fn(x) - y}), lower=min_x, upper=max_x)[1]$root
   }
   return(Vectorize(fn_inv))
}
df_time_to_event <- NULL
for (i in 1:length(mult)){
   cdf_inv <- inverse(function(t){apply_survival_function(t, haz[[i]], supplied_fn_type="h", fn_type_to_apply="F")}, 0, 10000)
   df_time_to_event_new <- data.frame("time_to_event"=cdf_inv(runif(n_random_points_per_fn)), "function_number"=rep(i, n_random_points_per_fn))
   if (is.null(df_time_to_event)) df_time_to_event <- df_time_to_event_new
   else df_time_to_event <- rbind(df_time_to_event, df_time_to_event_new)
}
df_time_to_event$event <- 1

#
# Plot the survival curves derived from the hazard functions.
#

# Set up 3-in-1 plotting.
old_par <- par(mfcol=c(3, 1))

add <- FALSE
for (i in 1:length(mult)){
   s <- function(t){apply_survival_function(t, haz[[i]], supplied_fn_type="h", fn_type_to_apply="S")}
   curve(s, from=0, to=xlim_max, col=i, add=add, main="Survival Functions", xaxs="i", xlab="t", ylab="S(t)")
   add <- TRUE
}

#
# Plot the random time-to-event values using Kaplan-Meier curves.
#

title <- paste("Kaplan Meier Survival Curves Using", n_random_points_per_fn, "Random Points from Each Survival Function")
plot(survfit(Surv(time_to_event, event) ~ function_number, df_time_to_event), col=1:length(mult), xlim=c(0, xlim_max), main=title, xlab="t", ylab="S(t)")

#
# Plot the random time-to-event values using Cox proportional-hazards fitted models.
#

coxfit <- coxph(Surv(time_to_event, event) ~ function_number, df_time_to_event)
title <- paste("Cox Proportional Hazards Survival Curves Using", n_random_points_per_fn, "Random Points from Each Survival Function")
plot(survfit(coxfit, newdata=data.frame("function_number"=1:length(mult))), col=1:length(mult), xlim=c(0, xlim_max), main=title, xlab="t", ylab="S(t)")

# Reset plotting parameters.
par(old_par)