デルタ法のサンプルサイズ設計には気をつけて
2024-10-19
power.prop.test(p1 = 0.30, # C群のCTR(過去のデータから30%とわかっている)
p2 = 0.31, # T群のCTR(1%ptの効果量を仮定)
sig.level = 0.05, # 有意水準α(真に差がない時に誤って帰無仮説を棄却してしまう割合)
power = 0.8) # 検出力β(真に差がある時に正しく差を検出できる割合)
Two-sample comparison of proportions power calculation
n = 33274.15
p1 = 0.3
p2 = 0.31
sig.level = 0.05
power = 0.8
alternative = two.sided
NOTE: n is number in *each* group
control <- get_control_data(95) %>%
summarise(imp = sum(impressions),
click = sum(clicks))
treat <- get_control_data(95) %>%
summarise(imp = sum(impressions),
click = sum(clicks))
aa <- bind_rows(control,treat) %>%
mutate(group = c("C_1", "C_2")) %>%
select(group, imp, click) %>%
mutate(CTR = click/imp)
group | imp | click | CTR |
---|---|---|---|
C_1 | 34606 | 10370 | 0.30 |
C_2 | 35097 | 9952 | 0.28 |
2-sample test for equality of proportions with continuity correction
data: aa$click out of aa$imp
X-squared = 21.795, df = 1, p-value = 3.034e-06
alternative hypothesis: two.sided
95 percent confidence interval:
0.009325615 0.022878424
sample estimates:
prop 1 prop 2
0.299659 0.283557
simulate_test <- function(i) {
control <- get_control_data(95) %>%
summarise(imp = sum(impressions),
click = sum(clicks))
treat <- get_control_data(95) %>%
summarise(imp = sum(impressions),
click = sum(clicks))
tmp <- bind_rows(control, treat)
tmp <- prop.test(n = tmp$imp, x = tmp$click)
tmp$p.value
}
p_values <- future_map_dbl(1:1000,
simulate_test,
.options = furrr_options(seed = 123)
,.progress = TRUE)
手法が必要とする仮定を満たさない状況下では誤った結論を導く可能性がある
var_delta <- function(x, y){
mean_x <- mean(x)
mean_y <- mean(y)
var_x <- var(x)
var_y <- var(y)
cov_xy <- cov(x, y)
result <- (var_x / mean_x**2 + var_y / mean_y**2 - 2 * cov_xy /
(mean_x * mean_y)) * (mean_x * mean_x) / (mean_y * mean_y * length(x))
return(result)
}
delta_test <- function(mean_x, mean_y, var_x, var_y){
diff = mean_y - mean_x
var = var_x + var_y
z = diff / sqrt(var)
p_val <- 2 * pnorm(abs(z), lower.tail = FALSE)
result <- data.frame(difference = diff, p_value = p_val)
return(result)
}
simulate_test <- function(i) {
control <- get_control_data(95) %>%
summarise(imp = sum(impressions),
click = sum(clicks),
.by = user_id)
treat <- get_control_data(95) %>%
summarise(imp = sum(impressions),
click = sum(clicks),
.by = user_id)
mean_c <- sum(control$click)/sum(control$imp)
mean_t <- sum(treat$click)/sum(treat$imp)
var_c <- var_delta(control$click, control$imp)
var_t <- var_delta(treat$click, treat$imp)
tmp <- delta_test(mean_c, mean_t, var_c, var_t)
tmp$p_value
}
p_values_aa <- future_map_dbl(1:1000,
simulate_test,
.options = furrr_options(seed = 123)
,.progress = TRUE)
simulate_test <- function(i) {
control <- get_control_data(95) %>%
summarise(imp = sum(impressions),
click = sum(clicks),
.by = user_id)
treat <- get_treat_data(95) %>%
summarise(imp = sum(impressions),
click = sum(clicks),
.by = user_id)
mean_c <- sum(control$click)/sum(control$imp)
mean_t <- sum(treat$click)/sum(treat$imp)
var_c <- var_delta(control$click, control$imp)
var_t <- var_delta(treat$click, treat$imp)
tmp <- delta_test(mean_c, mean_t, var_c, var_t)
tmp$p_value
}
p_values_ab <- future_map_dbl(1:1000,
simulate_test,
.options = furrr_options(seed = 123)
,.progress = TRUE)
\[ \begin{aligned} k &= 2h・(z_{1-\frac{\alpha}{2}} + z_{1-\beta}) / \delta^{2}, \\ ただし,h &= \frac{1}{\mu^2_{N}}( \sigma^{2}_{S} - 2\frac{\mu_{S}}{\mu_{N}}\sigma_{SN} + \frac{\mu^{2}_{S}}{\mu^{2}_{N}}\sigma^{2}_{N} ) \end{aligned} \]
simulate_test <- function(i) {
control <- get_control_data(3427) %>%
summarise(imp = sum(impressions),
click = sum(clicks),
.by = user_id)
treat <- get_treat_data(3427) %>%
summarise(imp = sum(impressions),
click = sum(clicks),
.by = user_id)
mean_c <- sum(control$click)/sum(control$imp)
mean_t <- sum(treat$click)/sum(treat$imp)
var_c <- var_delta(control$click, control$imp)
var_t <- var_delta(treat$click, treat$imp)
tmp <- delta_test(mean_c, mean_t, var_c, var_t)
tmp$p_value
}
p_values_ok <- future_map_dbl(1:200,
simulate_test,
.options = furrr_options(seed = 123)
,.progress = TRUE)
独立な試行ではあるが同一でない分布を仮定していた