if(!require("tidyverse")) install.packages("tidyverse")
if(!require("shiny")) install.packages("shiny")
library(tidyverse)
library(shiny)
13 Change Point finding
One method to find the threshold is to perform piecewise regression repeatedly at each threshold point and find the optimal model. Here’s a brief explanation of piecewise regression:
threshold points (piecewise regression) | codes |
---|---|
total | Resp = α + β1 · Dose + β2 ·( Dose – Ɵ) + + Ɛ0 |
If Dose < Ɵ | Resp= α + β1 · Dose + Ɛ0 |
If Dose > Ɵ | Resp= α - β2 ·Ɵ +( β1 + β2 )· Dose + Ɛ0 |
model selection | minimal AIC value |
13.0.1 Data Generation
We generated some sample data. Let’s consider a scenario where exposure to a certain substance (Dose) affects health (Resp), and assume there’s a threshold level.
set.seed(0)
<- seq(0,10, 0.1)
dose length(dose)
[1] 101
<-c(rnorm(50, 0, 0.001), rnorm(30, 0, 0.01), rnorm(10, 0.1, 0.05),rnorm(11, -0.1, 0.05))
pb<-1/(1+exp(-(dose-5)))+rnorm(length(dose), 0, 0.1)+pb
resp
plot(dose, resp, xlab='Dose', ylab='Response', cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
<-data.frame(dose, resp, pb) cohort
13.1 Trying Out Hypothetical Threshold Values
It looks like there might be a threshold between 1 and 5. Let’s calculate the expected matrix (outdata) for some hypothetical values. We’ll get the intercept, beta for before threshold, and its p-value, beta for post threshold and its p-value, and the AIC value for each threshold. First, let’s try thresholds at 1 and 5:
<- ifelse(dose -1 >0, dose -1, 0)
cpdose <- glm(resp ~ dose + cpdose)
cpm summary(cpm)$aic
[1] -92.20184
<- ifelse(dose -5 >0, dose -5, 0)
cpdose <- glm(resp ~ dose + cpdose)
cpm summary(cpm)$aic
[1] -86.9744
Which assumption improves model fit? It turns out the threshold at 1 is better. We should also compare it with thresholds like 2 and 2.5. Let’s perform repeated analysis for this.
We created a function for the above model:
<- function(thres){
thr_fun <- ifelse(dose - thres >0, dose - thres, 0)
cpdose <- glm(resp ~ dose + cpdose)
cpm <- summary(cpm)$aic
aic data.frame(
'threshold' = thres,
'aic' = aic)
}
Let’s set the range to test:
# 이게 어떤 의미 일까요?
which(dose == 1):which(dose == 5)] dose[
[1] 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8
[20] 2.9 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 4.1 4.2 4.3 4.4 4.5 4.6 4.7
[39] 4.8 4.9 5.0
Now, we’ll run the analysis repeatedly:
<- list()
simul_list <- lapply(dose[which(dose ==1):which(dose ==5)], thr_fun
simul_list )
Next, let’s combine the results into a data frame:
<- do.call(rbind, simul_list) simul_dat
Let’s plot the results:
library(ggplot2)
<- simul_dat$threshold[which.min(simul_dat$aic)]
opt.thres
%>%
simul_dat ggplot(aes(x = threshold, y = aic)) +
geom_line() +
geom_vline(xintercept = opt.thres) +
geom_text(x = opt.thres + 0.8, y = -90, color = 'red',
label = paste0(round(opt.thres, 3), '점에서 최소 AIC를 보입니다.' )) +
theme_minimal()
It shows that the optimal threshold is at 2.1.
= 2.1
thres <- ifelse(dose - thres >0, dose - thres, 0)
f_cpdose <- glm(resp ~ dose + f_cpdose) f_cpm
<- predict(f_cpm)
prepwlm <- function(x) sprintf("%.2f", x)
scaleFUN %>%
cohort ggplot(aes(x= dose, y = resp)) +
geom_point() +
theme_minimal() +
scale_y_continuous(labels = scaleFUN) +
geom_line(aes(y = prepwlm), color ='red')
13.2 Quiz
Assume there is a change point between doses of 6 and 10. Perform a repeated analysis to find the change point that results in the lowest AIC value. Please submit your code.
13.3 Quiz for Shiny app for threshold
for global
# Load required libraries
library(shiny)
library(ggplot2)
# Generate sample data
set.seed(0)
<- seq(0, 10, 0.1)
dose <- c(rnorm(50, 0, 0.001), rnorm(30, 0, 0.01), rnorm(10, 0.1, 0.05), rnorm(11, -0.1, 0.05))
pb <- 1 / (1 + exp(-(dose - 5))) + rnorm(length(dose), 0, 0.1) + pb
resp <- data.frame(dose, resp, pb) cohort
for UI
# Define UI
<- fluidPage(
ui titlePanel("Find Optimal Threshold using Piecewise Regression"),
sidebarLayout(
sidebarPanel(
sliderInput("range", "Select Dose Range:",
min = 0, max = 10, value = c(1, 5), step = 0.1),
actionButton("run", "Run Analysis")
),mainPanel(
plotOutput("initialPlot"),
plotOutput("aicPlot"),
plotOutput("thresholdPlot")
)
) )
for server
# Define server logic
<- function(input, output) {
server <- function(thres) {
thr_fun <- ifelse(dose - thres > 0, dose - thres, 0)
cpdose <- glm(resp ~ dose + cpdose)
cpm <- summary(cpm)$aic
aic data.frame('threshold' = thres, 'aic' = aic)
}
<- eventReactive(input$run, {
analyze <- seq(input$range[1], input$range[2], by = 0.1)
range <- lapply(range, thr_fun)
simul_list <- do.call(rbind, simul_list)
simul_dat
simul_dat
})
$initialPlot <- renderPlot({
outputggplot(cohort, aes(x = dose, y = resp)) +
geom_point() +
labs(title = "Scatter Plot of Dose vs Response", x = "Dose", y = "Response") +
theme_minimal()
})
$aicPlot <- renderPlot({
output<- analyze()
simul_dat <- simul_dat$threshold[which.min(simul_dat$aic)]
opt.thres
ggplot(simul_dat, aes(x = threshold, y = aic)) +
geom_line() +
geom_vline(xintercept = opt.thres, color = 'blue') +
geom_text(x = opt.thres + 0.2, y = min(simul_dat$aic), color = 'red',
label = paste0("Optimal Threshold: ", round(opt.thres, 2))) +
labs(title = "AIC Values for Different Thresholds", x = "Threshold", y = "AIC") +
theme_minimal()
})
$thresholdPlot <- renderPlot({
output# Quiz
# fill the code
}) }