Fitting an Exponential Curve to a Stepwise Survival Curve

Let's fit a function of the form f(t) = exp(λt) to a stepwise survival curve (e.g. a Kaplan Meier curve).

Here's the stepwise survival curve we'll be using in this demonstration:

# The data that describes the stepwise survival curve.
surv_time <- c(0.2,  0.5, 1.2,   2,   5,  6.5,    9,   15, 17.3,   35,   80,  200)
surv_prob <- c(  1, 0.94, 0.9, 0.8, 0.6, 0.57, 0.41,  0.2, 0.18, 0.07, 0.02, 0.01)

# Plot the curve.
plot_stepwise_surv_fn <- function(time, prob, col="black"){
   time <- c(0, rep(time, each=2))
   prob <- c(1, 1, rep(prob, each=2)[1:(2 * length(prob) - 1)])
   plot(x=time, y=prob, ylim=c(0, 1), type="l", col=col, xlab="Time", ylab="Probability")
}
plot_stepwise_surv_fn(surv_time, surv_prob)
A stepwise survival curve.

We get two estimates for λ:

fit <- fit_exp_to_stepwise_survival(surv_time, surv_prob)
print(fit)
lambda_method_1 lambda_method_2
    -0.07377279     -0.07375238

Those estimates are very similar, so let's just add the exponential curve defined by one of them to the curve shown above:

curve(expr=sapply(x, function(x){exp(fit["lambda_method_2"] * x)}), from=0, to=max(surv_time), add=TRUE, col="red")
A stepwise survival curve and an exponential curve fitted to it.

As you can see, the curve corresponding to the λ estimate appears to fit the stepwise survival curve quite well.

Here's the function that does the work:

fit_exp_to_stepwise_survival <- function(surv_time, surv_prob, min_lambda=-10, method_1_subs=1000){
   #
   # Fits a curve of the form y = exp(lambda * t) to the stepwise survival curve defined by surv_time and surv_prob.
   #
   # Two methods are used for the curve fitting. Method 1 uses numerical integration to find the lambda that minimises
   # the area between the stepwise and fitted curve. Method 2 is a bit cleverer. It makes use of the fact that the
   # exponential and stepwise functions are easy to integrate.
   #
   # min_lambda is the minimum value that lambda can take in the fitted curve.
   #
   # method_1_subs is the the maximum number of integration subintervals used in method 1
   #
   # Returns a vector with two values:
   # lambda_method_1: The lambda for the fitted curve from the first method.
   # lambda_method_2: The lambda for the fitted curve from the second method.
   #

   # Check the parameters.
   if (length(surv_time) != length(surv_prob)) stop("surv_time and surv_prob must be the same length.")
   if (min_lambda >= 0) stop("min_lambda must be less than zero.")

   # The code that follows assumes that surv_time and surv_prob are sorted by surv_time.
   ord <- order(surv_time)
   surv_time <- surv_time[ord]
   surv_prob <- surv_prob[ord]

   # The negative exponential function that we'll be fitting to the stepwise survival curve.
   neg_exp <- function(lambda, t) exp(-lambda * t)

   #
   # METHOD 1
   #
   stepwise_surv_prob <- function(t){
      # A stepwise survival function based on surv_time and surv_prob.
      prob <- function(t_i){
         if (t_i < 0 || max(surv_time) < t_i) return(NA)
         if (t_i < min(surv_time)) return(1)
         return(surv_prob[max(which(surv_time <= t_i))])
      }
      sapply(t, prob)
   }
   fn_dist1 <- function(lambda) integrate(function(t) abs(neg_exp(lambda, t) - stepwise_surv_prob(t)), lower=0, upper=max(surv_time), subdivisions=method_1_subs)$value
   lambda_v1 <- optimise(f=fn_dist1, interval=c(0, -min_lambda), maximum=FALSE)$minimum

   #
   # METHOD 2
   #
   fn_dist2 <- function(lambda){
      # Create df_calc, the dataframe we'll be using for our calculation of curve areas. "a" ("b") is interval start (end).
      df_calc <- data.frame("a"=c(0, surv_time[1:(length(surv_time) - 1)]), "b"=surv_time, "prob"=c(1, surv_prob[1:(length(surv_time) - 1)]))
      df_calc$intercept <- -log(df_calc$prob) / lambda
      df_calc$polarity <- (df_calc$intercept < df_calc$b) - (df_calc$intercept > df_calc$a)
      for (i in rev(which(df_calc$polarity == 0))){
         # Split in two an interval for which the stepwise survival and the exponential curves intersect.
         df_calc2 <- NULL
         if (i > 1) df_calc2 <- df_calc[1:(i-1),]
         df_calc2 <- rbind(df_calc2, data.frame("a"=df_calc$a[i]        , "b"=df_calc$intercept[i], "prob"=df_calc$prob[i], "intercept"=df_calc$intercept[i], "polarity"=-1))
         df_calc2 <- rbind(df_calc2, data.frame("a"=df_calc$intercept[i], "b"=df_calc$b[i]        , "prob"=df_calc$prob[i], "intercept"=df_calc$intercept[i], "polarity"= 1))
         if (i < nrow(df_calc)) df_calc2 <- rbind(df_calc2, df_calc[(i+1):nrow(df_calc),])
         df_calc <- df_calc2
      }

      # Do the calculations.
      n <- nrow(df_calc)
      df_calc$step_area <- df_calc$prob * (df_calc$b - df_calc$a)
      exp_sum <- df_calc$polarity[n] * neg_exp(lambda, df_calc$b[n]) - df_calc$polarity[1] * neg_exp(lambda, df_calc$a[1])
      if (n > 1){
         pol <- df_calc$polarity
         exp_sum <- exp_sum + sum((pol[1:(n-1)] != pol[2:n]) * pol[1:(n-1)] * 2 * neg_exp(lambda, df_calc$b[1:(n-1)]))
      }
      return(sum(df_calc$polarity * df_calc$step_area) + exp_sum / lambda)
   }
  lambda_v2 <- optimise(f=fn_dist2, interval=c(0, -min_lambda), maximum=FALSE)$minimum

  # Return the results from methods 1 and 2.
  return(c("lambda_method_1"=-lambda_v1, "lambda_method_2"=-lambda_v2))
}