forcatsとpurrrを組み合わせることで、複雑なカテゴリカルデータの分析を効率的かつエレガントに実現できます。
ここでは実際のビジネスシナリオを想定した高度な応用例を紹介します。
multilingual_data <- list(
japanese = tibble(
satisfaction = sample(c("とても不満", "不満", "普通", "満足", "とても満足"), 200, TRUE),
product = sample(c("製品A", "製品B", "製品C"), 200, TRUE),
country = "Japan"
),
english = tibble(
satisfaction = sample(c("Very Unsatisfied", "Unsatisfied", "Neutral", "Satisfied", "Very Satisfied"), 150, TRUE),
product = sample(c("Product A", "Product B", "Product C"), 150, TRUE),
country = "USA"
),
chinese = tibble(
satisfaction = sample(c("非常不满", "不满", "一般", "满意", "非常满意"), 180, TRUE),
product = sample(c("产品A", "产品B", "产品C"), 180, TRUE),
country = "China"
)
)
satisfaction_mapping <- list(
japanese = c(
"とても不満" = "Very Unsatisfied",
"不満" = "Unsatisfied",
"普通" = "Neutral",
"満足" = "Satisfied",
"とても満足" = "Very Satisfied"
),
chinese = c(
"非常不满" = "Very Unsatisfied",
"不满" = "Unsatisfied",
"一般" = "Neutral",
"满意" = "Satisfied",
"非常满意" = "Very Satisfied"
)
)
product_mapping <- list(
japanese = c("製品A" = "Product A", "製品B" = "Product B", "製品C" = "Product C"),
chinese = c("产品A" = "Product A", "产品B" = "Product B", "产品C" = "Product C")
)
standardized_data <- multilingual_data %>%
imap_dfr(~ {
language <- .y
data <- .x
if(language != "english") {
data$satisfaction <- fct_recode(data$satisfaction, !!!satisfaction_mapping[[language]])
data$product <- fct_recode(data$product, !!!product_mapping[[language]])
}
data$satisfaction <- fct_relevel(data$satisfaction,
"Very Unsatisfied", "Unsatisfied", "Neutral", "Satisfied", "Very Satisfied")
data %>% mutate(language = language)
})
country_analysis <- standardized_data %>%
group_by(country, product) %>%
nest() %>%
mutate(
satisfaction_scores = map(data, ~ as.numeric(.x$satisfaction)),
mean_score = map_dbl(satisfaction_scores, mean),
median_score = map_dbl(satisfaction_scores, median),
count = map_int(data, nrow)
) %>%
select(-data, -satisfaction_scores) %>%
arrange(desc(mean_score))