Claude
Skills
Sign in
Back

diagnostic-accuracy

Included with Lifetime
$97 forever

Diagnostic accuracy analysis in R, including sensitivity, specificity, ROC curves, likelihood ratios, and decision curves.

General

What this skill does


# Diagnostic Accuracy Analysis in R

## Overview

Comprehensive diagnostic test accuracy analysis covering ROC curve analysis, optimal cutpoint determination, sensitivity and specificity estimation, likelihood ratios, decision curve analysis, inter-rater reliability measures, and diagnostic meta-analysis.

## Basic Diagnostic Measures

### 2x2 Table Analysis

```r
library(epiR)

# Create 2x2 table
# Format: [TP, FN; FP, TN]
diag_table <- matrix(c(85, 15,   # Disease+ (TP, FN)
                       20, 180), # Disease- (FP, TN)
                     nrow = 2, byrow = TRUE,
                     dimnames = list(
                       Test = c("Positive", "Negative"),
                       Disease = c("Present", "Absent")
                     ))

# Calculate diagnostic measures
results <- epi.tests(as.table(diag_table), method = "exact")
print(results)

# Extract specific measures
results$detail  # All measures with CIs
# Includes: Se, Sp, PPV, NPV, LR+, LR-, DOR, accuracy, prevalence
```

### Manual Calculations

```r
# From confusion matrix elements
TP <- 85; FN <- 15; FP <- 20; TN <- 180

# Sensitivity (True Positive Rate)
sensitivity <- TP / (TP + FN)

# Specificity (True Negative Rate)
specificity <- TN / (TN + FP)

# Positive Predictive Value
ppv <- TP / (TP + FP)

# Negative Predictive Value
npv <- TN / (TN + FN)

# Positive Likelihood Ratio
lr_pos <- sensitivity / (1 - specificity)

# Negative Likelihood Ratio
lr_neg <- (1 - sensitivity) / specificity

# Diagnostic Odds Ratio
dor <- (TP * TN) / (FP * FN)

# Youden's Index (J)
youden <- sensitivity + specificity - 1

# Accuracy
accuracy <- (TP + TN) / (TP + TN + FP + FN)

# Wilson confidence intervals
library(binom)
se_ci <- binom.confint(TP, TP + FN, method = "wilson")
sp_ci <- binom.confint(TN, TN + FP, method = "wilson")
```

## ROC Curve Analysis

### Basic ROC Curve

```r
library(pROC)

# Create ROC object
roc_obj <- roc(
  response = df$disease,     # Binary outcome (0/1 or factor)
  predictor = df$biomarker,  # Continuous test value
  levels = c(0, 1),          # Control, case
  direction = "<"            # Lower values = control
)

# Summary
print(roc_obj)

# AUC with confidence interval
auc(roc_obj)
ci.auc(roc_obj, method = "delong")       # DeLong method
ci.auc(roc_obj, method = "bootstrap", boot.n = 2000)  # Bootstrap
```

### ROC Curve Plotting

```r
library(pROC)

# Basic plot
plot(roc_obj, print.auc = TRUE, print.thres = TRUE)

# ggplot2-based ROC curve
ggroc(roc_obj) +
  geom_abline(intercept = 1, slope = 1, linetype = "dashed", color = "gray") +
  annotate("text", x = 0.25, y = 0.25,
           label = paste0("AUC = ", round(auc(roc_obj), 3))) +
  theme_bw() +
  labs(x = "Specificity", y = "Sensitivity",
       title = "ROC Curve for Biomarker X")

# Multiple ROC curves
roc1 <- roc(df$disease, df$biomarker1)
roc2 <- roc(df$disease, df$biomarker2)
roc3 <- roc(df$disease, df$biomarker3)

ggroc(list(Biomarker1 = roc1, Biomarker2 = roc2, Biomarker3 = roc3)) +
  geom_abline(intercept = 1, slope = 1, linetype = "dashed") +
  theme_bw() +
  scale_color_brewer(palette = "Set1")
```

### Comparing ROC Curves

```r
library(pROC)

# DeLong test for comparing AUCs
roc_test <- roc.test(roc1, roc2, method = "delong")
print(roc_test)

# Bootstrap comparison
roc_test_boot <- roc.test(roc1, roc2, method = "bootstrap", boot.n = 2000)

# Venkatraman test (for entire curves, not just AUC)
roc_test_venk <- roc.test(roc1, roc2, method = "venkatraman")

# Compare multiple markers
comparison <- data.frame(
  Marker = c("Biomarker1", "Biomarker2", "Biomarker3"),
  AUC = c(auc(roc1), auc(roc2), auc(roc3)),
  CI_Lower = c(ci.auc(roc1)[1], ci.auc(roc2)[1], ci.auc(roc3)[1]),
  CI_Upper = c(ci.auc(roc1)[3], ci.auc(roc2)[3], ci.auc(roc3)[3])
)
```

### Partial AUC

```r
library(pROC)

# Partial AUC for high specificity region (Sp > 0.9)
pauc_spec <- auc(roc_obj, partial.auc = c(1, 0.9),
                 partial.auc.focus = "specificity")

# Partial AUC for high sensitivity region (Se > 0.9)
pauc_sens <- auc(roc_obj, partial.auc = c(0.9, 1),
                 partial.auc.focus = "sensitivity")

# Standardized partial AUC (McClish correction)
pauc_std <- auc(roc_obj, partial.auc = c(1, 0.9),
                partial.auc.focus = "specificity",
                partial.auc.correct = TRUE)

# CI for partial AUC
ci.auc(roc_obj, partial.auc = c(1, 0.9),
       partial.auc.focus = "specificity")
```

## Optimal Cutpoint Selection

### Using cutpointr Package

```r
library(cutpointr)

# Youden's Index (maximize Se + Sp - 1)
cp_youden <- cutpointr(
  data = df,
  x = biomarker,
  class = disease,
  method = maximize_metric,
  metric = youden
)
summary(cp_youden)
plot(cp_youden)

# Maximize sensitivity with specificity >= 0.9
cp_constrained <- cutpointr(
  df, biomarker, disease,
  method = maximize_metric,
  metric = sensitivity,
  tol_metric = specificity,
  tol_threshold = 0.9
)

# Cost-based optimization
cp_cost <- cutpointr(
  df, biomarker, disease,
  method = minimize_metric,
  metric = misclassification_cost,
  cost_fp = 1,    # Cost of false positive
  cost_fn = 5     # Cost of false negative (5x higher)
)

# Multiple optimal cutpoints
cp_multi <- multi_cutpointr(
  df, biomarker, disease,
  method = maximize_metric,
  metric = youden,
  boot_cut = 1000
)
```

### Using OptimalCutpoints Package

```r
library(OptimalCutpoints)

# Multiple methods simultaneously
opt_cut <- optimal.cutpoints(
  X = "biomarker",
  status = "disease",
  methods = c("Youden", "MaxSpSe", "MaxProdSpSe", "ROC01",
              "MinValueSp", "MinValueSe", "MaxEfficiency"),
  data = df,
  tag.healthy = 0
)

summary(opt_cut)

# Cost-benefit method
opt_cost <- optimal.cutpoints(
  X = "biomarker",
  status = "disease",
  methods = "CB",
  data = df,
  tag.healthy = 0,
  costs.ratio = 5,        # FN cost / FP cost
  prevalence = 0.10       # Disease prevalence
)
```

### Manual Cutpoint Selection

```r
library(pROC)

# Youden's optimal threshold
coords_youden <- coords(roc_obj, "best", best.method = "youden")
print(coords_youden)

# Closest to (0,1) corner
coords_closest <- coords(roc_obj, "best", best.method = "closest.topleft")

# At specific sensitivity/specificity
coords_se90 <- coords(roc_obj, x = 0.90, input = "sensitivity",
                      ret = c("threshold", "sensitivity", "specificity"))

# All coordinates
all_coords <- coords(roc_obj, x = "all",
                     ret = c("threshold", "sensitivity", "specificity",
                             "ppv", "npv", "accuracy"))
```

## Decision Curve Analysis

### Using dcurves Package

```r
library(dcurves)

# Fit prediction models
model1 <- glm(cancer ~ age + psa, data = df, family = binomial)
model2 <- glm(cancer ~ age + psa + dre, data = df, family = binomial)

df$pred1 <- predict(model1, type = "response")
df$pred2 <- predict(model2, type = "response")

# Decision curve analysis
dca_result <- dca(
  cancer ~ pred1 + pred2,
  data = df,
  thresholds = seq(0, 0.5, by = 0.01),
  label = list(pred1 = "PSA Model", pred2 = "PSA + DRE Model")
)

# Plot decision curves
plot(dca_result)

# Customized plot
plot(dca_result, smooth = TRUE) +
  ggplot2::coord_cartesian(ylim = c(-0.05, 0.2)) +
  ggplot2::labs(x = "Threshold Probability",
                y = "Net Benefit",
                title = "Decision Curve Analysis")
```

### Net Benefit Calculation

```r
library(dcurves)

# Extract net benefit at specific threshold
nb_data <- as_tibble(dca_result)

# Net interventions avoided
net_intervention_avoided(dca_result)

# Standardized net benefit
standardized_net_benefit <- function(nb, prevalence, threshold) {
  max_nb <- prevalence - (1 - prevalence) * threshold / (1 - threshold)
  nb / max_nb
}
```

### Clinical Utility Visualization

```r
library(dcurves)

# Clinical impact plot
dca_result |>
  plot(type = "clinical_impact")

# Net benefit with confidence intervals (bootstrap)
dca_boot <- dca(
  cancer ~ pred1,
  data = df,
  thresholds = seq(0, 0.5, by = 0.05)
)
```

## Inter-Rater Relia

Related in General