# RESULTS REPLICATION # ==================== # This script fits generalized linear mixed models (GLMMs) to estimate # proportions of correct case management and other clinical quality indicators, # accounting for clustering at the clinic and standardized patient (SP) levels. library(dplyr) library(tidyr) library(glmmTMB) library(emmeans) ############################################################### ######################## MODEL FORM ########################### ############################################################### # Define the site factor levels used for stratified analysis site_levels <- c("A", "B", "C") #' Fit a binomial GLMM and return summary statistics #' #' @param dat Data frame containing the outcome and grouping variables #' @param outcome Character string specifying the outcome variable (or cbind formula for proportions) #' @param site_name Optional site identifier for subsetting; NULL fits overall model #' @param db_cols Character vector of column names required for complete-case analysis #' #' @return A list containing group label, outcome name, sample size, number of clinics/SPs, #' degrees of freedom, estimated probability (p_hat), and mean linear predictor (eta_bar) fit_and_summarise <- function(dat, outcome, site_name = NULL, db_cols) { # Subset to specific site if requested; otherwise use full dataset d <- if (is.null(site_name)) dat else subset(dat, site == site_name) # Restrict to complete cases for the columns needed in the model d <- d[complete.cases(d[, db_cols]), ] # Drop unused factor levels from clustering variables after subsetting d$clinic_names <- droplevels(factor(d$clinic_names)) d$sp_names <- droplevels(factor(d$sp_names)) # Count unique clusters n_clinics <- nlevels(d$clinic_names) n_sps <- nlevels(d$sp_names) # Require at least 2 clinics to estimate between-clinic variance if (n_clinics < 2) stop("Need at least 2 clinics after cleaning for: ", ifelse(is.null(site_name), "Overall", site_name)) # Degrees of freedom based on number of clinics (conservative choice for CIs) df_use <- n_clinics - 1 tcrit <- qt(0.975, df = df_use) # Two-sided 95% critical value # Build formula: intercept-only fixed effects with crossed random intercepts form <- as.formula(paste0(outcome, " ~ 1 + (1 | clinic_names) + (1 | sp_names)")) # Fit the GLMM with REML estimation mod <- glmmTMB( form, data = d, REML = TRUE, family = binomial(link = "logit") ) # ----------------------------- # Compute marginal predictions # ----------------------------- # Predicted linear predictor (eta) including random effects eta_hat <- predict(mod, type = "link", re.form = NULL) # Convert to probability scale p_hat_i <- plogis(eta_hat) # Average predicted probability across observations p_hat <- mean(p_hat_i) # Extract variance-covariance matrix for fixed effects V <- vcov(mod)$cond X <- model.matrix(mod) # ----------------------------- # Delta-method SE for mean probability # ----------------------------- # Gradient of mean(p) w.r.t. fixed effects uses chain rule through logistic w <- p_hat_i * (1 - p_hat_i) # Derivative of logistic at each eta grad <- colMeans(X * w) # Average gradient se_p <- sqrt(drop(t(grad) %*% V %*% grad)) # Wald CI on probability scale (truncated to [0,1]) ci_prob <- p_hat + c(-1, 1) * tcrit * se_p ci_prob <- pmin(pmax(ci_prob, 0), 1) # ----------------------------- # Alternative: CI via mean eta then back-transform # ----------------------------- eta_bar <- mean(eta_hat) se_eta_bar <- sqrt(drop(t(colMeans(X)) %*% V %*% colMeans(X))) ci_eta <- eta_bar + c(-1, 1) * tcrit * se_eta_bar ci_eta_prob <- plogis(ci_eta) # Return summary statistics list( group = if (is.null(site_name)) "Overall" else site_name, outcome = outcome, n = nrow(d), n_clinics = n_clinics, n_sps = n_sps, df = df_use, p_hat = p_hat, eta_bar = eta_bar ) } ############################################################### ################# CORRECT CASE MANAGEMENT ##################### ############################################################### # Fit overall model (all sites combined) overall_res <- fit_and_summarise( wp3data_rep, outcome = "correct_case_management", db_cols = c("correct_case_management", "sp_names", "clinic_names") ) # Fit site-stratified models site_res <- setNames( lapply(site_levels, \(s) fit_and_summarise( wp3data_rep, outcome = "correct_case_management", site_name = s, db_cols = c("correct_case_management", "sp_names", "clinic_names") )), site_levels ) # Combine results and print results_all <- c(list(Overall = overall_res), site_res) print(results_all) ############################################################### ############### CARE THAT AVOIDS NEGLECT ###################### ############################################################### # Overall model for "avoiding neglect" outcome overall_res <- fit_and_summarise( wp3data_rep, outcome = "avoiding_neglect_management", db_cols = c("avoiding_neglect_management", "sp_names", "clinic_names") ) # Site-stratified models # NOTE: db_cols uses correct_case_management here—verify this matches intended filtering site_res <- setNames( lapply(site_levels, \(s) fit_and_summarise( wp3data_rep, outcome = "avoiding_neglect_management", site_name = s, db_cols = c("correct_case_management", "sp_names", "clinic_names") )), site_levels ) results_all <- c(list(Overall = overall_res), site_res) print(results_all) ############################################################### ################## HISTORY PROPORTIONS ######################## ############################################################### # Model history-taking as a binomial proportion: successes / total possible items # cbind(successes, failures) format for binomial family overall_res <- fit_and_summarise( wp3data_rep, outcome = "cbind(history_score, (max_history_score - history_score))", db_cols = c("history_score", "max_history_score", "sp_names", "clinic_names") ) site_res <- setNames( lapply(site_levels, \(s) fit_and_summarise( wp3data_rep, outcome = "cbind(history_score, (max_history_score - history_score))", site_name = s, db_cols = c("history_score", "max_history_score", "sp_names", "clinic_names") )), site_levels ) results_all <- c(list(Overall = overall_res), site_res) print(results_all) ############################################################### ################ EXAMINATION PROPORTIONS ###################### ############################################################### # Model physical examination as a binomial proportion overall_res <- fit_and_summarise( wp3data_rep, outcome = "cbind(examination_score, (max_examination_score - examination_score))", db_cols = c("examination_score", "max_examination_score", "sp_names", "clinic_names") ) site_res <- setNames( lapply(site_levels, \(s) fit_and_summarise( wp3data_rep, outcome = "cbind(examination_score, (max_examination_score - examination_score))", site_name = s, db_cols = c("examination_score", "max_examination_score", "sp_names", "clinic_names") )), site_levels ) results_all <- c(list(Overall = overall_res), site_res) print(results_all) ############################################################### ## ADJUSTED MODEL: Correct Case Management with Covariates ## ############################################################### # This model adjusts for potential confounders: # - site: geographic location # - case: clinical case type presented by SP # - private: clinic ownership (public vs. private) # - patient_volume: average daily patient volume at clinic # - clinician_cadre: professional qualification level mod_adjusted <- glmmTMB( correct_case_management ~ -1 + # Suppress global intercept factor(site) + # Site fixed effects factor(case) + # Case-type fixed effects factor(private) + # Ownership fixed effect patient_volume + # Continuous covariate factor(clinician_cadre) + # Clinician qualification (1 | clinic_names) + # Random intercept for clinics (1 | sp_names), # Random intercept for SPs data = wp3data_rep, REML = TRUE, family = binomial(link = "logit") ) summary(mod_adjusted) # ----------------------------- # Estimated Marginal Means: Correct Case Management by Case Type # ----------------------------- # Quick look at response-scale EMMs (default z-based CIs) emmeans(mod_adjusted, ~ case, type = "response") # Extract EMMs on link (logit) scale for custom t-based CIs emm_link <- emmeans(mod_adjusted, ~ case) s <- as.data.frame(summary(emm_link)) # Use clinic-based df for conservative inference df_use <- length(unique(wp3data_rep$clinic_names)) - 1 tcrit <- qt(0.975, df = df_use) # Compute t-based confidence limits on logit scale, then back-transform s$LCL_link <- s$emmean - tcrit * s$SE s$UCL_link <- s$emmean + tcrit * s$SE s$prob <- plogis(s$emmean) s$LCL_prob <- plogis(s$LCL_link) s$UCL_prob <- plogis(s$UCL_link) # Display case-specific probabilities with CIs s[, c("case", "prob", "SE", "LCL_prob", "UCL_prob")] # ----------------------------- # Odds Ratio: Private vs. Public Clinics # ----------------------------- # Pairwise contrast on logit scale gives log-odds ratio or_private <- contrast( emmeans(mod_adjusted, ~ private), method = "revpairwise" ) # Default output with z-based inference summary(or_private, infer = c(TRUE, TRUE), type = "response") # Recompute with t-based CIs s <- as.data.frame(summary(or_private)) df_use <- length(unique(wp3data$clinic_names)) - 1 tcrit <- qt(0.975, df = df_use) # CI on log(OR) scale s$LCL_logOR <- s$estimate - tcrit * s$SE s$UCL_logOR <- s$estimate + tcrit * s$SE # Back-transform to odds ratio scale s$OR <- exp(s$estimate) s$LCL <- exp(s$LCL_logOR) s$UCL <- exp(s$UCL_logOR) s[, c("contrast", "OR", "SE", "LCL", "UCL")] # ----------------------------- # Odds Ratio: Effect of Patient Volume (per unit increase) # ----------------------------- # emtrends extracts the slope of a continuous predictor on the link scale vol_trend <- emtrends( mod_adjusted, ~ 1, var = "patient_volume" ) vol_out <- as.data.frame(summary(vol_trend)) # t-based CI on log(OR) scale vol_out$LCL_logOR <- vol_out$patient_volume.trend - tcrit * vol_out$SE vol_out$UCL_logOR <- vol_out$patient_volume.trend + tcrit * vol_out$SE # Back-transform: exp(slope) = OR per one-unit increase in patient_volume vol_out$OR <- exp(vol_out$patient_volume.trend) vol_out$LCL <- exp(vol_out$LCL_logOR) vol_out$UCL <- exp(vol_out$UCL_logOR) vol_out[, c("OR", "SE", "LCL", "UCL")]