Extend compare_psm_likes() to cover splines models

Merck · May 27, 2024 · ae42bd6 · ae42bd6
1 parent 2cc1420
commit ae42bd6
Show file tree

Hide file tree

Showing 2 changed files with 212 additions and 32 deletions.
diff --git a/R/likepsm.R b/R/likepsm.R
@@ -1,3 +1,28 @@
+#  Copyright (c) 2023 Merck & Co., Inc., Rahway, NJ, USA and its affiliates.
+#  All rights reserved.
+#
+#  This file is part of the psm3mkv program.
+#
+#  psm3mkv is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 3 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# ==================================================================
+# Functions relating to comparing likelihoods of parametric and splines PSMs
+# - compare_psm_likes()
+# - compare_psm_likes_par()
+# - compare_psm_likes_spl()
+# ==================================================================
+
 #' Compare likelihoods of PSMs
 #' 
 #' Compare the total log-likelihood values for the patient-level dataset after fitting PSM-simple and PSM-complex models to each combination of endpoint distributions
@@ -6,66 +31,75 @@
 #' @importFrom rlang .data
 #' @return List containing
 #' - `res`: Dataset of calculation results for each model
-#' - `ind_aic`: Set of statistical distributions for TTP, PFS and OS which individually fit each endpoint with the best (lowest) AIC
-#' - `ind_bic`: Set of statistical distributions for TTP, PFS and OS which individually fit each endpoint with the best (lowest) BIC
-#' - `jt_aic`:  Set of statistical distributions for TTP, PFS and OS which overall fit a PSM with the best (lowest) AIC
-#' - `jt_bic`:  Set of statistical distributions for TTP, PFS and OS which overall fit a PSM with the best (lowest) BIC
+#' - `best`: Tibble indicating which is the best fitting model individually or jointly, to each endpoint, according to AIC or BIC
 #' @export
 #' @examples
 #' # Fit parametric distributions to a dataset
 #' bosonc <- create_dummydata("flexbosms")
 #' parfits <- fit_ends_mods_par(bosonc)
+#' splfits <- fit_ends_mods_spl(bosonc)
 #' # Present comparison of likelihood calculations
 #' compare_psm_likes(bosonc, parfits)
+#' # compare_psm_likes(bosonc, splfits)
 compare_psm_likes <- function(ptdata, fitslist, cuttime=0) {
+  # Determine whether fit is a spline, then call the right function
+  if (fitslist$pfs[[1]]$result$dlist$name=="survspline") {
+    compare_psm_likes_spl(ptdata, fitslist, cuttime)
+  } else {
+    compare_psm_likes_par(ptdata, fitslist, cuttime)
+  }
+}
+
+# Compare likelihoods of PSMs for parametric models
+compare_psm_likes_par <- function(ptdata, fitslist, cuttime=0) {
   # Check that fitslist is a list of 6 endpoints
   if (length(fitslist)!=6) {stop("The list provided to fitslist must contain all 6 endpoints")}
   # Create local variables
   eps <- ndists <- aic_indbest <- bic_indbest <- bests <- res <- thisfit <- aic_jtbest <- bic_jtbest <- NULL
   ll <- rank_aic <- ttp_meth <- pfs_dist <- os_dist <- rank_bic <- NULL
-  # TTP, PFS and OS are endpoints 1, 3 and 4
   eps <- c(1, 3, 4)
-  # Number of distributions for each endpoint
-  ndists <- eps |>
-    purrr::map_vec(~length(fitslist[[.x]]))
-  # Best fits for each endpoint - AIC
-  aic_indbest <- eps |>
-    purrr::map_vec(~find_bestfit(fitslist[[.x]], crit="aic")$fit$dlist$name)
-  # Best fits for each endpoint - BIC
-  bic_indbest <- eps |>
-    purrr::map_vec(~find_bestfit(fitslist[[.x]], crit="bic")$fit$dlist$name)
-  # Join as a tibble
-  bests <- rbind(aic_indbest, bic_indbest)
+  # Best fits by AIC or BIC
+  fits_aic <- eps |>
+    purrr::map(~find_bestfit(fitslist[[.x]], crit="aic")$fit)
+  fits_bic <- eps |>
+    purrr::map(~find_bestfit(fitslist[[.x]], crit="bic")$fit)
+  fits_all <- c(fits_aic, fits_bic)
+  # Pull out names/distributions
+  fits_names <- seq(2*length(eps)) |>
+    purrr::map_vec(~fits_all[[.x]]$dlist$name)
+  # Summarize parametric results in a table
   bests <- tibble::tibble(
-    ttp_meth = bests[,1],
-    pfs_dist = bests[,2],
-    os_dist = bests[,3],
+    ttp_meth = fits_names[c(1, 4)],
+    pfs_dist = fits_names[c(2, 5)],
+    os_dist = fits_names[c(3, 6)],
     meth = "ind",
     ic = c("aic", "bic")
   )
   # Create results table for each model combination
+  ndists <- eps |>
+    purrr::map_vec(~length(fitslist[[.x]]))
   res <- tibble::tibble(
     id = 1:(ndists[3]*ndists[2]*(ndists[1]+1)),
     ttp_meth = NA,
     pfs_dist = NA,
     os_dist = NA,
     ll = NA,
     npar = NA,
-    npts = fitslist$os[[1]]$result$N
+    npts = NA
   )
   # Create a safe calculation of the PSM-simple likelihood (returns NA on error)
   slike_simple <- purrr::possibly(
     ~calc_likes_psm_simple(
       ptdata=ptdata,
       dpam=.x,
-      cuttime=cuttime)$ll[2],
+      cuttime=cuttime),
     otherwise = NA)
   # Create a safe calculation of the PSM-complex likelihood (returns NA on error)
   slike_complex <- purrr::possibly(
     ~calc_likes_psm_complex(
       ptdata=ptdata,
       dpam=.x,
-      cuttime=cuttime)$ll[2],
+      cuttime=cuttime),
     otherwise = NA)
   # Compute results for PSM-simple models
   message("Calculating PSM simple")
@@ -78,8 +112,12 @@ compare_psm_likes <- function(ptdata, fitslist, cuttime=0) {
       res$ttp_meth[resrow] <- "simple"
       res$pfs_dist[resrow] <- thisfit$pfs$dlist$name
       res$os_dist[resrow] <- thisfit$os$dlist$name
-      res$ll[resrow] <- slike_simple(thisfit)
-      res$npar[resrow] <- thisfit$pfs$npars + thisfit$os$npars + 1
+      psmsimple <- slike_simple(thisfit)
+      if (is.na(psmsimple)[1]==FALSE) {
+        res$ll[resrow] <- psmsimple$ll[2]
+        res$npar[resrow] <- thisfit$pfs$npars + thisfit$os$npars + 1
+        res$npts[resrow] <- psmsimple$npts[2]
+      }
     }
   }
   # Compute results for PSM-complex models
@@ -95,8 +133,12 @@ compare_psm_likes <- function(ptdata, fitslist, cuttime=0) {
         res$ttp_meth[resrow] <- thisfit$ttp$dlist$name
         res$pfs_dist[resrow] <- thisfit$pfs$dlist$name
         res$os_dist[resrow] <- thisfit$os$dlist$name
-        res$ll[resrow] <- slike_complex(thisfit)
-        res$npar[resrow] <- thisfit$ttp$npars + thisfit$pfs$npars + thisfit$os$npars
+        psmcomplex <- slike_complex(thisfit)
+        if (is.na(psmcomplex)[1]==FALSE) {
+          res$ll[resrow] <- psmcomplex$ll[2]
+          res$npar[resrow] <- thisfit$ttp$npars + thisfit$pfs$npars + thisfit$os$npars
+          res$npts[resrow] <- psmcomplex$npts[2]
+        }
       }
     }
   }
@@ -114,8 +156,8 @@ compare_psm_likes <- function(ptdata, fitslist, cuttime=0) {
       best_bic = 0
     )
   # Identify best AIC and best BIC model
-  res$best_aic[res$ttp_meth==aic_indbest[1] & res$pfs_dist==aic_indbest[2] & res$os_dist==aic_indbest[3]] <- 1
-  res$best_bic[res$ttp_meth==bic_indbest[1] & res$pfs_dist==bic_indbest[2] & res$os_dist==bic_indbest[3]] <- 1
+  res$best_aic[res$ttp_meth==bests$ttp_meth[1] & res$pfs_dist==bests$pfs_dist[1] & res$os_dist==bests$os_dist[1]] <- 1
+  res$best_bic[res$ttp_meth==bests$ttp_meth[2] & res$pfs_dist==bests$pfs_dist[2] & res$os_dist==bests$os_dist[2]] <- 1
   # Identify best distributions for overall AIC and BIC
   aic_jtbest <- res |>
     dplyr::filter(rank_aic==1) |>
@@ -124,7 +166,146 @@ compare_psm_likes <- function(ptdata, fitslist, cuttime=0) {
   bic_jtbest <- res |>
     dplyr::filter(rank_bic==1) |>
     dplyr::select(ttp_meth, pfs_dist, os_dist) |>
+    dplyr::mutate(meth="joint", ic="bic")
+  # Join together
+  bests <- bests |>
+    tibble::add_row(aic_jtbest) |>
+    tibble::add_row(bic_jtbest)
+  # Return
+  return(list(results=res, bests=bests))
+}
+
+# Compare likelihoods of PSMs for spline models
+compare_psm_likes_spl <- function(ptdata, fitslist, cuttime=0) {
+  # Check that fitslist is a list of 6 endpoints
+  if (length(fitslist)!=6) {stop("The list provided to fitslist must contain all 6 endpoints")}
+  # Create local variables
+  eps <- ndists <- aic_indbest <- bic_indbest <- bests <- res <- thisfit <- aic_jtbest <- bic_jtbest <- NULL
+  ll <- rank_aic <- ttp_meth <- pfs_dist <- os_dist <- rank_bic <- NULL
+  # Best fits by AIC or BIC
+  eps <- c(1, 3, 4)
+  fits_aic <- eps |>
+    purrr::map(~find_bestfit(fitslist[[.x]], crit="aic")$fit)
+  fits_bic <- eps |>
+    purrr::map(~find_bestfit(fitslist[[.x]], crit="bic")$fit)
+  fits_all <- c(fits_aic, fits_bic)
+  # Pull out scales and knots
+  fits_scales <- seq(2*length(eps)) |>
+    purrr::map_vec(~fits_all[[.x]]$aux$scale)
+  fits_knots <- seq(2*length(eps)) |>
+    purrr::map_vec(~length(fits_all[[.x]]$aux$knots))
+  # Summarize parametric results in a table
+  bests <- tibble::tibble(
+    ttp_meth = fits_scales[c(1, 4)],
+    ttp_knots = fits_knots[c(1, 4)],
+    pfs_scales = fits_scales[c(2, 5)],
+    pfs_knots = fits_knots[c(2, 5)],
+    os_scales = fits_scales[c(3, 6)],
+    os_knots = fits_knots[c(3, 6)],
+    meth = "ind",
+    ic = c("aic", "bic")
+  )
+  # Create results table for each model combination
+  ndists <- eps |>
+    purrr::map_vec(~length(fitslist[[.x]]))
+  res <- tibble::tibble(
+    id = 1:(ndists[3]*ndists[2]*(ndists[1]+1)),
+    ttp_meth = NA,
+    ttp_knots = NA,
+    pfs_scales = NA,
+    pfs_knots = NA,
+    os_scales = NA,
+    os_knots = NA,
+    ll = NA,
+    npar = NA,
+    npts = NA
+  )
+  # Create a safe calculation of the PSM-simple likelihood (returns NA on error)
+  slike_simple <- purrr::possibly(
+    ~calc_likes_psm_simple(
+      ptdata=ptdata,
+      dpam=.x,
+      cuttime=cuttime),
+    otherwise = NA)
+  # Create a safe calculation of the PSM-complex likelihood (returns NA on error)
+  slike_complex <- purrr::possibly(
+    ~calc_likes_psm_complex(
+      ptdata=ptdata,
+      dpam=.x,
+      cuttime=cuttime),
+    otherwise = NA)
+  # Compute results for PSM-simple models
+  message("Calculating PSM simple")
+  thisfit <- list(ttp=NA, pfs=NA, os=NA)
+  for (p in 1:ndists[2]) {
+    thisfit$pfs <- fitslist$pfs[[p]]$result
+    for (o in 1:ndists[3]) {
+      thisfit$os <- fitslist$os[[o]]$result
+      resrow <- (p-1)*ndists[3] + o
+      res$ttp_meth[resrow] <- "simple"
+      res$ttp_knots[resrow] <- 0
+      res$pfs_scales[resrow] <- thisfit$pfs$aux$scale
+      res$pfs_knots[resrow] <- length(thisfit$pfs$aux$knots)
+      res$os_scales[resrow] <- thisfit$os$aux$scale
+      res$os_knots[resrow] <- length(thisfit$os$aux$knots)
+      psmsimple <- slike_simple(thisfit)
+      if (is.na(psmsimple)[1]==FALSE) {
+        res$ll[resrow] <- psmsimple$ll[2]
+        res$npar[resrow] <- thisfit$pfs$npars + thisfit$os$npars + 1
+        res$npts[resrow] <- psmsimple$npts[2]
+      }
+    }
+  }
+  # Compute results for PSM-complex models
+  message("Calculating PSM complex")
+  thisfit <- list(ttp=NA, pfs=NA, os=NA)
+  for (t in 1:ndists[1]) {
+    thisfit$ttp <- fitslist$ttp[[t]]$result
+    for (p in 1:ndists[2]) {
+      thisfit$pfs <- fitslist$pfs[[p]]$result
+      for (o in 1:ndists[3]) {
+        thisfit$os <- fitslist$os[[o]]$result
+        resrow <- t*ndists[3]*ndists[2] + (p-1)*ndists[3] + o
+        res$ttp_meth[resrow] <- thisfit$ttp$aux$scale
+        res$ttp_knots[resrow] <- length(thisfit$ttp$aux$knots)
+        res$pfs_scales[resrow] <- thisfit$pfs$aux$scale
+        res$pfs_knots[resrow] <- length(thisfit$pfs$aux$knots)
+        res$os_scales[resrow] <- thisfit$os$aux$scale
+        res$os_knots[resrow] <- length(thisfit$os$aux$knots)
+        psmcomplex <- slike_complex(thisfit)
+        if (is.na(psmcomplex)[1]==FALSE) {
+          res$ll[resrow] <- psmcomplex$ll[2]
+          res$npar[resrow] <- thisfit$ttp$npars + thisfit$pfs$npars + thisfit$os$npars
+          res$npts[resrow] <- psmcomplex$npts[2]
+        }
+      }
+    }
+  }
+  # Set log-likelihood values to NA if if cannot be calculated (=-Inf)
+  res$ll[res$ll==-Inf] <- NA
+  # Add AIC and BIC, with ranks
+  message("Wrapping up")
+  res <- res |>
+    dplyr::mutate(
+      aic = 2*.data$npar-2*ll,
+      bic = .data$npar*log(.data$npts)-2*ll,
+      rank_aic = rank(.data$aic),
+      rank_bic = rank(.data$bic),
+      best_aic = 0,
+      best_bic = 0
+    )
+  # Identify best AIC and best BIC model
+  res$best_aic[res$ttp_meth==bests$ttp_meth[1] & res$ttp_knots==bests$ttp_knots[1] & res$pfs_scales==bests$pfs_scales[1] & res$pfs_knots==bests$pfs_knots[1] & res$os_scales==bests$os_scales[1] & res$os_knots==bests$os_knots[1]] <- 1
+  res$best_bic[res$ttp_meth==bests$ttp_meth[2] & res$ttp_knots==bests$ttp_knots[2] & res$pfs_scales==bests$pfs_scales[2] & res$pfs_knots==bests$pfs_knots[2] & res$os_scales==bests$os_scales[2] & res$os_knots==bests$os_knots[2]] <- 1
+  # Identify best distributions for overall AIC and BIC
+  aic_jtbest <- res |>
+    dplyr::filter(rank_aic==1) |>
+    dplyr::select(ttp_meth, ttp_knots, pfs_scales, pfs_knots, os_scales, os_knots) |>
     dplyr::mutate(meth="joint", ic="aic")
+  bic_jtbest <- res |>
+    dplyr::filter(rank_bic==1) |>
+    dplyr::select(ttp_meth, ttp_knots, pfs_scales, pfs_knots, os_scales, os_knots) |>
+    dplyr::mutate(meth="joint", ic="bic")
   # Join together
   bests <- bests |>
     tibble::add_row(aic_jtbest) |>

diff --git a/man/compare_psm_likes.Rd b/man/compare_psm_likes.Rd