set.seed(456)
customer_data <- tibble(
customer_id = 1:300,
service_type = sample(c("ベーシック", "プレミアム", "エンタープライズ"),
300, replace = TRUE, prob = c(0.5, 0.3, 0.2)),
region = sample(c("関東", "関西", "その他"), 300, replace = TRUE),
satisfaction = case_when(
service_type == "ベーシック" ~ rnorm(n(), 6.5, 1.2),
service_type == "プレミアム" ~ rnorm(n(), 7.5, 1.0),
service_type == "エンタープライズ" ~ rnorm(n(), 8.2, 0.8)
),
monthly_usage = case_when(
service_type == "ベーシック" ~ rpois(n(), 15),
service_type == "プレミアム" ~ rpois(n(), 25),
service_type == "エンタープライズ" ~ rpois(n(), 40)
)
) %>%
mutate(satisfaction = pmax(1, pmin(10, satisfaction)))
descriptive_stats <- customer_data %>%
group_by(service_type) %>%
summarise(
count = n(),
mean_satisfaction = mean(satisfaction),
sd_satisfaction = sd(satisfaction),
mean_usage = mean(monthly_usage),
sd_usage = sd(monthly_usage),
.groups = 'drop'
)
print("サービス別記述統計:")
print(descriptive_stats)
basic_premium_test <- customer_data %>%
filter(service_type %in% c("ベーシック", "プレミアム")) %>%
t.test(satisfaction ~ service_type, data = .)
print("ベーシック vs プレミアムのt検定:")
print(tidy(basic_premium_test))
anova_model <- aov(satisfaction ~ service_type, data = customer_data)
anova_results <- tidy(anova_model)
print("サービス別満足度の分散分析:")
print(anova_results)
tukey_results <- TukeyHSD(anova_model)
tukey_tidy <- tidy(tukey_results)
print("多重比較結果:")
print(tukey_tidy)
contingency_table <- customer_data %>%
count(region, service_type) %>%
pivot_wider(names_from = service_type, values_from = n, values_fill = 0)
print("地域別サービス利用状況:")
print(contingency_table)
chi_square_test <- customer_data %>%
select(region, service_type) %>%
table() %>%
chisq.test()
print("カイ二乗検定結果:")
print(tidy(chi_square_test))
cohens_d_calculation <- customer_data %>%
filter(service_type %in% c("ベーシック", "プレミアム")) %>%
group_by(service_type) %>%
summarise(
mean_sat = mean(satisfaction),
sd_sat = sd(satisfaction),
n = n(),
.groups = 'drop'
)
mean_diff <- cohens_d_calculation$mean_sat[2] - cohens_d_calculation$mean_sat[1]
pooled_sd <- sqrt(((cohens_d_calculation$n[1] - 1) * cohens_d_calculation$sd_sat[1]^2 +
(cohens_d_calculation$n[2] - 1) * cohens_d_calculation$sd_sat[2]^2) /
(cohens_d_calculation$n[1] + cohens_d_calculation$n[2] - 2))
cohens_d <- mean_diff / pooled_sd
print(paste("Cohen's d (ベーシック vs プレミアム):", round(cohens_d, 3)))