デルタ法のサンプルサイズ設計には気をつけて
2024-10-19



power.prop.test(p1 = 0.30, # C群のCTR(過去のデータから30%とわかっている)
                p2 = 0.31, # T群のCTR(1%ptの効果量を仮定)
                sig.level = 0.05, # 有意水準α(真に差がない時に誤って帰無仮説を棄却してしまう割合)
                power = 0.8) # 検出力β(真に差がある時に正しく差を検出できる割合)
     Two-sample comparison of proportions power calculation 
              n = 33274.15
             p1 = 0.3
             p2 = 0.31
      sig.level = 0.05
          power = 0.8
    alternative = two.sided
NOTE: n is number in *each* groupA/Aテストのイメージ
control <- get_control_data(95) %>% 
  summarise(imp = sum(impressions),
            click = sum(clicks))
treat <- get_control_data(95) %>% 
  summarise(imp = sum(impressions),
            click = sum(clicks))
aa <- bind_rows(control,treat) %>% 
  mutate(group = c("C_1", "C_2")) %>% 
  select(group, imp, click) %>% 
  mutate(CTR = click/imp)| group | imp | click | CTR | 
|---|---|---|---|
| C_1 | 34606 | 10370 | 0.30 | 
| C_2 | 35097 | 9952 | 0.28 | 
    2-sample test for equality of proportions with continuity correction
data:  aa$click out of aa$imp
X-squared = 21.795, df = 1, p-value = 3.034e-06
alternative hypothesis: two.sided
95 percent confidence interval:
 0.009325615 0.022878424
sample estimates:
  prop 1   prop 2 
0.299659 0.283557 simulate_test <- function(i) {
  control <- get_control_data(95) %>% 
    summarise(imp = sum(impressions),
              click = sum(clicks))
  treat <- get_control_data(95) %>% 
    summarise(imp = sum(impressions),
              click = sum(clicks))
  tmp <- bind_rows(control, treat) 
  tmp <- prop.test(n = tmp$imp, x = tmp$click)
  tmp$p.value
}
p_values <- future_map_dbl(1:1000, 
                           simulate_test, 
                           .options = furrr_options(seed = 123) 
                           ,.progress = TRUE)
手法が必要とする仮定を満たさない状況下では誤った結論を導く可能性がある
var_delta <- function(x, y){
  mean_x <- mean(x)
  mean_y <- mean(y)
  var_x <- var(x)
  var_y <- var(y)
  cov_xy <- cov(x, y)
  result <- (var_x / mean_x**2 + var_y / mean_y**2 - 2 * cov_xy /
            (mean_x * mean_y)) * (mean_x * mean_x) / (mean_y * mean_y * length(x))
  return(result)
}
delta_test <- function(mean_x, mean_y, var_x, var_y){
  diff = mean_y - mean_x
  var = var_x + var_y
  z = diff / sqrt(var)
  p_val <- 2 * pnorm(abs(z), lower.tail = FALSE)
  result <- data.frame(difference = diff, p_value = p_val)
  return(result)
}simulate_test <- function(i) {
  control <- get_control_data(95) %>% 
    summarise(imp = sum(impressions),
              click = sum(clicks),
              .by = user_id)
  treat <- get_control_data(95) %>% 
    summarise(imp = sum(impressions),
              click = sum(clicks),
              .by = user_id)
  mean_c <- sum(control$click)/sum(control$imp)
  mean_t <- sum(treat$click)/sum(treat$imp)
  var_c <- var_delta(control$click, control$imp)
  var_t <- var_delta(treat$click, treat$imp)
  tmp <- delta_test(mean_c, mean_t, var_c, var_t)
  tmp$p_value
}
p_values_aa <- future_map_dbl(1:1000, 
                           simulate_test, 
                           .options = furrr_options(seed = 123) 
                           ,.progress = TRUE)
simulate_test <- function(i) {
  control <- get_control_data(95) %>% 
    summarise(imp = sum(impressions),
              click = sum(clicks),
              .by = user_id)
  treat <- get_treat_data(95) %>% 
    summarise(imp = sum(impressions),
              click = sum(clicks),
              .by = user_id)
  mean_c <- sum(control$click)/sum(control$imp)
  mean_t <- sum(treat$click)/sum(treat$imp)
  var_c <- var_delta(control$click, control$imp)
  var_t <- var_delta(treat$click, treat$imp)
  tmp <- delta_test(mean_c, mean_t, var_c, var_t)
  tmp$p_value
}
p_values_ab <- future_map_dbl(1:1000, 
                           simulate_test, 
                           .options = furrr_options(seed = 123) 
                           ,.progress = TRUE)
\[ \begin{aligned} k &= 2h・(z_{1-\frac{\alpha}{2}} + z_{1-\beta}) / \delta^{2}, \\ ただし,h &= \frac{1}{\mu^2_{N}}( \sigma^{2}_{S} - 2\frac{\mu_{S}}{\mu_{N}}\sigma_{SN} + \frac{\mu^{2}_{S}}{\mu^{2}_{N}}\sigma^{2}_{N} ) \end{aligned} \]
simulate_test <- function(i) {
  control <- get_control_data(3427) %>% 
    summarise(imp = sum(impressions),
              click = sum(clicks),
              .by = user_id)
  treat <- get_treat_data(3427) %>% 
    summarise(imp = sum(impressions),
              click = sum(clicks),
              .by = user_id)
  mean_c <- sum(control$click)/sum(control$imp)
  mean_t <- sum(treat$click)/sum(treat$imp)
  var_c <- var_delta(control$click, control$imp)
  var_t <- var_delta(treat$click, treat$imp)
  tmp <- delta_test(mean_c, mean_t, var_c, var_t)
  tmp$p_value
}
p_values_ok <- future_map_dbl(1:200, 
                           simulate_test, 
                           .options = furrr_options(seed = 123) 
                           ,.progress = TRUE)
独立な試行ではあるが同一でない分布を仮定していた