anomaly_detection <- sales_ts %>%
plot_anomaly_diagnostics(.date_var = date, .value = sales,
.alpha = 0.05, .max_anomalies = 0.1)
anomalies <- sales_ts %>%
tk_anomaly_diagnostics(.date_var = date, .value = sales) %>%
filter(anomaly == "Yes")
print(paste("検出された異常値数:", nrow(anomalies)))
print("異常値の例:")
print(head(anomalies, 5))
enhanced_sales <- sales_ts %>%
mutate(
ad_spend = case_when(
wday(date) %in% c(1, 7) ~ rnorm(n(), 50000, 10000),
TRUE ~ rnorm(n(), 30000, 8000)
),
temperature = 20 + 15 * sin(2 * pi * yday(date) / 365) + rnorm(n(), 0, 3),
competitor_campaign = rbinom(n(), 1, 0.1),
is_holiday = case_when(
month(date) == 1 && day(date) %in% c(1:3) ~ 1,
month(date) == 5 && day(date) %in% c(3:5) ~ 1,
month(date) == 12 && day(date) %in% c(23:25) ~ 1,
TRUE ~ 0
)
) %>%
mutate(ad_spend = pmax(ad_spend, 10000))
enhanced_splits <- enhanced_sales %>%
time_series_split(assess = "3 months", cumulative = TRUE)
enhanced_train <- training(enhanced_splits)
enhanced_test <- testing(enhanced_splits)
model_enhanced_lm <- linear_reg() %>%
set_engine("lm") %>%
fit(sales ~ as.numeric(date) + wday(date, label = TRUE) + month(date, label = TRUE) +
ad_spend + temperature + competitor_campaign + is_holiday,
data = enhanced_train)
model_enhanced_rf <- rand_forest(trees = 500) %>%
set_engine("ranger") %>%
set_mode("regression") %>%
fit(sales ~ as.numeric(date) + wday(date, label = TRUE) + month(date, label = TRUE) +
ad_spend + temperature + competitor_campaign + is_holiday,
data = enhanced_train)
enhanced_models <- modeltime_table(
model_enhanced_lm,
model_enhanced_rf
)
enhanced_accuracy <- enhanced_models %>%
modeltime_accuracy(enhanced_test)
print("外部変数を含むモデルの精度:")
print(enhanced_accuracy)
if (require(vip, quietly = TRUE)) {
feature_importance <- model_enhanced_rf %>%
extract_fit_parsnip() %>%
vip::vi() %>%
arrange(desc(Importance))
print("特徴量重要度:")
print(feature_importance)
}
residuals_analysis <- enhanced_models %>%
modeltime_forecast(new_data = enhanced_test, actual_data = enhanced_train) %>%
filter(.model_desc == "RANGER", .key == "prediction") %>%
left_join(enhanced_test %>% select(date, actual_sales = sales), by = c(".index" = "date")) %>%
mutate(residuals = actual_sales - .value)
residual_stats <- residuals_analysis %>%
summarise(
mean_residual = mean(residuals, na.rm = TRUE),
sd_residual = sd(residuals, na.rm = TRUE),
rmse = sqrt(mean(residuals^2, na.rm = TRUE)),
mae = mean(abs(residuals), na.rm = TRUE)
)
print("残差統計:")
print(residual_stats)
prediction_intervals <- enhanced_models %>%
filter(.model_id == 2) %>%
modeltime_forecast(
new_data = enhanced_test,
actual_data = enhanced_train,
conf_interval = 0.95
)
interval_summary <- prediction_intervals %>%
filter(.key == "prediction") %>%
summarise(
avg_prediction = mean(.value),
avg_lower = mean(.conf_lo),
avg_upper = mean(.conf_hi),
interval_width = mean(.conf_hi - .conf_lo)
)
print("予測区間の統計:")
print(interval_summary)