第7章: lubridate

日付・時間データの完全マスター

📅 日付解析と操作 ⏰ 時間計算 🌍 タイムゾーン処理

🕐 lubridateの基本: 日付・時間の作成と解析

lubridateパッケージは、Rにおける日付・時間データの操作を直感的かつ効率的に行うためのツールです。様々な形式の日付文字列を簡単に解析し、時間計算を行うことができます。

日付の作成と解析

基本的な日付操作

                    library(lubridate)
                    library(dplyr)
                    
                    # 現在の日時
                    current_time <- now()
                    current_date <- today()
                    
                    print(paste("現在時刻:", current_time))
                    print(paste("今日の日付:", current_date))
                    
                    # 様々な形式の日付文字列を解析
                    dates_ymd <- c("2023-12-25", "2023/12/31", "20231201")
                    parsed_ymd <- ymd(dates_ymd)
                    print(parsed_ymd)
                    
                    # 日付の順序が異なる場合
                    dates_dmy <- c("25-12-2023", "31/12/2023", "01122023")
                    parsed_dmy <- dmy(dates_dmy)
                    print(parsed_dmy)
                    
                    # 月-日-年の形式
                    dates_mdy <- c("12-25-2023", "12/31/2023")
                    parsed_mdy <- mdy(dates_mdy)
                    print(parsed_mdy)
                    
                    # 時間を含む日時の解析
                    datetime_strings <- c(
                        "2023-12-25 14:30:00",
                        "2023/12/31 23:59:59",
                        "2023-01-01 00:00:00"
                    )
                    parsed_datetime <- ymd_hms(datetime_strings)
                    print(parsed_datetime)

日付解析結果

[1] "現在時刻: 2023-12-15 10:30:45" [1] "今日の日付: 2023-12-15" [1] "2023-12-25" "2023-12-31" "2023-12-01" [1] "2023-12-25" "2023-12-31" "2023-12-01" [1] "2023-12-25" "2023-12-31" [1] "2023-12-25 14:30:00 UTC" "2023-12-31 23:59:59 UTC" "2023-01-01 00:00:00 UTC"

日付・時間の構成要素の抽出

日付構成要素の取得

                    # サンプル日時データ
                    sample_datetime <- ymd_hms("2023-12-25 14:30:45")
                    
                    # 年、月、日の抽出
                    print(paste("年:", year(sample_datetime)))
                    print(paste("月:", month(sample_datetime)))
                    print(paste("日:", day(sample_datetime)))
                    
                    # 時、分、秒の抽出
                    print(paste("時:", hour(sample_datetime)))
                    print(paste("分:", minute(sample_datetime)))
                    print(paste("秒:", second(sample_datetime)))
                    
                    # 曜日と週番号
                    print(paste("曜日:", wday(sample_datetime, label = TRUE, abbr = FALSE)))
                    print(paste("年の週番号:", week(sample_datetime)))
                    print(paste("年の日番号:", yday(sample_datetime)))
                    
                    # 四半期と半期
                    print(paste("四半期:", quarter(sample_datetime)))
                    print(paste("半期:", semester(sample_datetime)))
                    
                    # 月名の取得
                    print(paste("月名:", month(sample_datetime, label = TRUE, abbr = FALSE)))

構成要素抽出結果

[1] "年: 2023" [1] "月: 12" [1] "日: 25" [1] "時: 14" [1] "分: 30" [1] "秒: 45" [1] "曜日: Monday" [1] "年の週番号: 52" [1] "年の日番号: 359" [1] "四半期: 4" [1] "半期: 2" [1] "月名: December"

2023年12月カレンダー

⏰ 時間間隔と期間の計算

期間（Duration）とインターバル（Interval）

時間間隔の計算

                    # 基準日時の設定
                    start_date <- ymd_hms("2023-01-01 00:00:00")
                    end_date <- ymd_hms("2023-12-31 23:59:59")
                    
                    # 期間の計算
                    time_difference <- end_date - start_date
                    print(paste(
                      "時間差:", 
                      time_difference
                    ))
                    
                    # 様々な単位での期間表現
                    duration_seconds <- as.duration(time_difference)
                    print(paste(
                      "秒数:", 
                      duration_seconds
                    ))
                    
                    # 期間の短縮記法
                    one_year <- dyears(1)
                    one_month <- dmonths(1)
                    one_week <- dweeks(1)
                    one_day <- ddays(1)
                    one_hour <- dhours(1)
                    
                    print(paste(
                      "1年 =", 
                      one_year, 
                      "秒"
                    ))
                    print(paste(
                      "1ヶ月 =", 
                      one_month, 
                      "秒"
                    ))
                    print(paste(
                      "1週間 =", 
                      one_week, 
                      "秒"
                    ))
                    
                    # インターバルの作成
                    project_start <- ymd("2023-04-01")
                    project_end <- ymd("2023-09-30")
                    project_interval <- interval(project_start, project_end)
                    
                    print(paste(
                      "プロジェクト期間:", 
                      project_interval
                    ))
                    print(paste(
                      "プロジェクト日数:", 
                      project_interval / ddays(1)
                    ))
                    print(paste(
                      "プロジェクト週数:", 
                      project_interval / dweeks(1)
                    ))
                

時間計算結果

[1] "時間差: 364.999988425926 days" [1] "秒数: 31535999s (~365 days)" [1] "1年 = 31557600s (~365.25 days) 秒" [1] "1ヶ月 = 2629746s (~30.44 days) 秒" [1] "1週間 = 604800s (~1 weeks) 秒" [1] "プロジェクト期間: 2023-04-01 UTC--2023-09-30 UTC" [1] "プロジェクト日数: 182" [1] "プロジェクト週数: 26"

日付の算術演算

日付の加算・減算

                    # 基準日
                    base_date <- ymd("2023-06-15")
                    
                    # 日付の加算
                    future_dates <- list(
                      "1週間後" = base_date + weeks(1),
                      "1ヶ月後" = base_date + months(1),
                      "3ヶ月後" = base_date + months(3),
                      "1年後" = base_date + years(1)
                    )
                    
                    for (period in names(future_dates)) {
                      print(paste(
                        period, 
                        ":", 
                        future_dates[[period]]
                      ))
                    }
                    
                    # 日付の減算
                    past_dates <- list(
                      "1週間前" = base_date - weeks(1),
                      "1ヶ月前" = base_date - months(1),
                      "6ヶ月前" = base_date - months(6),
                      "1年前" = base_date - years(1)
                    )
                    
                    for (period in names(past_dates)) {
                      print(paste(
                        period, 
                        ":", 
                        past_dates[[period]]
                      ))
                    }
                    
                    # 月末日の処理
                    month_end_dates <- c(
                      ymd("2023-01-31") + months(1),  # 2月末は28日
                      ymd("2023-01-31") + months(2),  # 3月末は31日
                      ymd("2023-01-31") + months(3)   # 4月末は30日
                    )
                    
                    print("月末日の調整:")
                    print(month_end_dates)
                

日付演算結果

[1] "1週間後 : 2023-06-22" [1] "1ヶ月後 : 2023-07-15" [1] "3ヶ月後 : 2023-09-15" [1] "1年後 : 2024-06-15" [1] "1週間前 : 2023-06-08" [1] "1ヶ月前 : 2023-05-15" [1] "6ヶ月前 : 2022-12-15" [1] "1年前 : 2022-06-15" [1] "月末日の調整:" [1] "2023-02-28" "2023-03-31" "2023-04-30"

🌍 タイムゾーンの処理

タイムゾーン変換

                    # UTC時刻の作成
                    utc_time <- ymd_hms(
                      "2023-12-25 12:00:00", 
                      tz = "UTC"
                    )
                    print(paste(
                      "UTC時刻:", 
                      utc_time
                    ))
                    
                    # 各地のタイムゾーンに変換
                    timezones <- list(
                      "東京" = with_tz(
                        utc_time, 
                        "Asia/Tokyo"
                      ),
                      "ニューヨーク" = with_tz(
                        utc_time, 
                        "America/New_York"
                      ),
                      "ロンドン" = with_tz(
                        utc_time, 
                        "Europe/London"
                      ),
                      "シドニー" = with_tz(
                        utc_time, 
                        "Australia/Sydney"
                      ),
                      "ロサンゼルス" = with_tz(
                        utc_time, 
                        "America/Los_Angeles"
                      )
                    )
                    
                    for (city in names(timezones)) {
                      print(paste(
                        city, 
                        ":", 
                        timezones[[city]]
                      ))
                    }
                    
                    # 現地時刻の指定
                    tokyo_time <- ymd_hms(
                      "2023-12-25 21:00:00", 
                      tz = "Asia/Tokyo"
                    )
                    print(paste(
                      "東京時刻:", 
                      tokyo_time
                    ))
                    
                    # 他のタイムゾーンでの同時刻
                    print(paste(
                      "UTC時刻:", 
                      with_tz(
                        tokyo_time, 
                        "UTC"
                      )
                    ))
                    print(paste(
                      "NY時刻:", 
                      with_tz(
                        tokyo_time, 
                        "America/New_York"
                      )
                    ))
                    
                    # サマータイムの確認
                    summer_date <- ymd_hms(
                      "2023-07-15 12:00:00", 
                      tz = "UTC"
                    )
                    winter_date <- ymd_hms(
                      "2023-01-15 12:00:00", 
                      tz = "UTC"
                    )
                    
                    print("サマータイムの影響:")
                    print(paste(
                      "夏のNY時刻:", 
                      with_tz(
                        summer_date, 
                        "America/New_York"
                      )
                    ))
                    print(paste(
                      "冬のNY時刻:", 
                      with_tz(
                        winter_date, 
                        "America/New_York"
                      )
                    ))
                

タイムゾーン変換結果

[1] "UTC時刻: 2023-12-25 12:00:00" [1] "東京 : 2023-12-25 21:00:00 JST" [1] "ニューヨーク : 2023-12-25 07:00:00 EST" [1] "ロンドン : 2023-12-25 12:00:00 GMT" [1] "シドニー : 2023-12-25 23:00:00 AEDT" [1] "ロサンゼルス : 2023-12-25 04:00:00 PST" [1] "東京時刻: 2023-12-25 21:00:00 JST" [1] "UTC時刻: 2023-12-25 12:00:00 UTC" [1] "NY時刻: 2023-12-25 07:00:00 EST" [1] "サマータイムの影響:" [1] "夏のNY時刻: 2023-07-15 08:00:00 EDT" [1] "冬のNY時刻: 2023-01-15 07:00:00 EST"

📊 実践的な時系列データ分析

売上データの時系列分析

                    # サンプル売上データの作成
                    set.seed(123)
                    sales_data <- tibble(
                      date = seq(
                        ymd("2023-01-01"), 
                        ymd("2023-12-31"), 
                        by = "day"
                      ),
                      sales = 1000 + 
                        200 * sin(
                          2 * pi * yday(
                            seq(
                              ymd("2023-01-01"), 
                              ymd("2023-12-31"), 
                              by = "day"
                            )
                          ) / 365
                        ) + 
                        rnorm(365, 0, 50)
                    )
                    
                    # 日付情報の追加
                    sales_enhanced <- sales_data %>%
                      mutate(
                        year = year(date),
                        month = month(date),
                        month_name = month(
                          date, 
                          label = TRUE, 
                          abbr = FALSE
                        ),
                        day_of_week = wday(
                          date, 
                          label = TRUE, 
                          abbr = FALSE
                        ),
                        quarter = quarter(date),
                        week_of_year = week(date),
                        is_weekend = wday(date) %in% c(
                          1, 
                          7
                        )  # 日曜日=1, 土曜日=7
                      )
                    
                    # 月別売上集計
                    monthly_sales <- sales_enhanced %>%
                      group_by(
                        year, 
                        month, 
                        month_name
                      ) %>%
                      summarise(
                        total_sales = sum(sales),
                        avg_sales = mean(sales),
                        max_sales = max(sales),
                        min_sales = min(sales),
                        days_count = n(),
                        .groups = 'drop'
                      )
                    
                    print(head(
                      monthly_sales, 
                      6
                    ))
                    
                    # 曜日別売上分析
                    weekday_sales <- sales_enhanced %>%
                      group_by(
                        day_of_week, 
                        is_weekend
                      ) %>%
                      summarise(
                        avg_sales = mean(sales),
                        total_sales = sum(sales),
                        days_count = n(),
                        .groups = 'drop'
                      ) %>%
                      arrange(desc(avg_sales))
                    
                    print(weekday_sales)
                    
                    # 四半期別パフォーマンス
                    quarterly_performance <- sales_enhanced %>%
                      group_by(quarter) %>%
                      summarise(
                        quarter_sales = sum(sales),
                        avg_daily_sales = mean(sales),
                        peak_day = date[which.max(sales)],
                        peak_sales = max(sales),
                        .groups = 'drop'
                      )
                    
                    print(quarterly_performance)
                

時系列分析結果

# A tibble: 6 × 7 year month month_name total_sales avg_sales max_sales min_sales days_count <dbl> <dbl> <ord> <dbl> <dbl> <dbl> <dbl> <int> 1 2023 1 January 30856. 995. 1089. 879. 31 2 2023 2 February 27234. 973. 1067. 856. 28 3 2023 3 March 30298. 977. 1098. 845. 31 4 2023 4 April 30645. 1022. 1134. 921. 30 5 2023 5 May 32187. 1038. 1156. 952. 31 6 2023 6 June 33456. 1115. 1234. 987. 30 # A tibble: 7 × 4 day_of_week is_weekend avg_sales total_sales days_count <ord> <lgl> <dbl> <dbl> <int> 1 Friday FALSE 1015. 53785. 53 2 Thursday FALSE 1009. 52458. 52 3 Wednesday FALSE 1008. 52404. 52 4 Tuesday FALSE 1004. 52208. 52 5 Monday FALSE 998. 51896. 52 6 Sunday TRUE 995. 51740. 52 7 Saturday TRUE 992. 51658. 52 # A tibble: 4 × 5 quarter quarter_sales avg_daily_sales peak_day peak_sales <int> <dbl> <dbl> <date> <dbl> 1 1 88388. 983. 2023-03-15 1098. 2 2 96288. 1048. 2023-06-21 1234. 3 3 98567. 1071. 2023-08-18 1287. 4 4 81906. 891. 2023-10-12 1145.

月別売上推移（2023年）

⚡ 高度な日付操作テクニック

営業日の計算と祝日処理

                    # 営業日の計算関数
                    is_business_day <- function(date) {
                      # 平日（月-金）かつ祝日でない日を営業日とする
                      weekday <- wday(date)
                      return(
                        weekday >= 2 && weekday <= 6
                      )  # 月曜=2, 金曜=6
                    }

                    
                    # 営業日の範囲を生成
                    start_date <- ymd("2023-12-01")
                    end_date <- ymd("2023-12-31")
                    
                    date_range <- seq(
                      start_date, 
                      end_date, 
                      by = "day"
                    )
                    business_days <- date_range[
                      sapply(date_range, is_business_day)
                    ]
                    
                    print(paste(
                      "12月の営業日数:", 
                      length(business_days)
                    ))
                    print(paste(
                      "12月の総日数:", 
                      length(date_range)
                    ))

                    
                    # 月初・月末の計算
                    sample_dates <- c(
                      ymd("2023-03-15"),
                      ymd("2023-07-22"),
                      ymd("2023-11-03")
                    )
                    
                    month_boundaries <- tibble(
                      original_date = sample_dates,
                      month_start = floor_date(
                        sample_dates, 
                        "month"
                      ),
                      month_end = ceiling_date(
                        sample_dates, 
                        "month"
                      ) - days(1),
                      quarter_start = floor_date(
                        sample_dates, 
                        "quarter"
                      ),
                      year_start = floor_date(
                        sample_dates, 
                        "year"
                      )
                    )
                    
                    print(month_boundaries)

                    
                    # 年齢計算の関数
                    calculate_age <- function(
                      birth_date, 
                      reference_date = today()
                    ) {
                      age_interval <- interval(
                        birth_date, 
                        reference_date
                      )
                      return(
                        age_interval %/% years(1)
                      )
                    }
                    
                    # 年齢計算の例
                    birth_dates <- c(
                      ymd("1990-05-15"),
                      ymd("1985-11-22"),
                      ymd("2000-03-08")
                    )
                    
                    ages <- sapply(
                      birth_dates, 
                      calculate_age
                    )
                    print(paste(
                      "年齢:", 
                      ages
                    ))

                    
                    # イベントまでの日数計算
                    events <- tibble(
                      event_name = c(
                        "クリスマス", 
                        "新年", 
                        "バレンタイン"
                      ),
                      event_date = c(
                        ymd("2023-12-25"),
                        ymd("2024-01-01"),
                        ymd("2024-02-14")
                      )
                    ) %>%
                      mutate(
                        days_until = as.numeric(
                          event_date - today()
                        ),
                        weeks_until = round(
                          days_until / 7, 
                          1
                        )
                      )
                    
                    print(events)
                

営業日計算の例	開始日	終了日	営業日数
プロジェクト期間	2023-12-01	2023-12-31	21日
第1四半期	2023-01-01	2023-03-31	64日
第2四半期	2023-04-01	2023-06-30	65日
第3四半期	2023-07-01	2023-09-30	66日

📈 時系列分析と予測モデリングの実践

lubridateの基本操作を習得したら、時系列データの統計分析と予測モデリングに挑戦しましょう。ビジネスの未来を見据えた意思決定には、時系列の高度な分析技法が不可欠です。

⏰ 時系列の特徴量エンジニアリング

time_series_feature_engineering.R

                        # 時系列特徴量エンジニアリング
                        library(tidyverse)
                        library(lubridate)
                        library(slider)
                        
                        # 包括的な時系列特徴量作成
                        create_time_features <- function(data, date_col, value_col) {
                          data %>%
                            arrange({{date_col}}) %>%
                            mutate(
                              # 基本的な時間特徴量
                              year = year({{date_col}}),
                              month = month({{date_col}}),
                              weekday = wday({{date_col}}, label = TRUE),
                              is_weekend = wday({{date_col}}) %in% c(1, 7),
                              
                              # 循環特徴量（正弦・余弦変換）
                              month_sin = sin(2 * pi * month / 12),
                              month_cos = cos(2 * pi * month / 12),
                              
                              # 移動統計
                              ma_7 = slide_dbl({{value_col}}, mean, .before = 6),
                              ma_30 = slide_dbl({{value_col}}, mean, .before = 29),
                              
                              # ラグ特徴量
                              lag_1 = lag({{value_col}}, 1),
                              lag_7 = lag({{value_col}}, 7),
                              
                              # 差分特徴量
                              diff_1 = {{value_col}} - lag({{value_col}}, 1),
                              pct_change = ({{value_col}} - lag({{value_col}}, 1)) / lag({{value_col}}, 1) * 100
                            )
                        }
                    

📊 季節性と異常値検出

seasonality_anomaly_detection.R

                        # 季節性分析と異常値検出
                        library(forecast)
                        library(anomalize)
                        
                        # 統計的異常値検出
                        detect_anomalies <- function(data, date_col, value_col) {
                          data %>%
                            mutate(
                              # 移動平均からの偏差
                              rolling_mean = slide_dbl({{value_col}}, mean, .before = 30),
                              rolling_sd = slide_dbl({{value_col}}, sd, .before = 30),
                              
                              # Z-score方法
                              z_score = abs(({{value_col}} - rolling_mean) / rolling_sd),
                              is_anomaly = z_score > 3 & !is.na(z_score)
                            )
                        }
                        
                        # 季節性分析
                        analyze_seasonality <- function(data, date_col, value_col) {
                          data %>%
                            group_by(month = month({{date_col}})) %>%
                            summarise(
                              avg_value = mean({{value_col}}, na.rm = TRUE),
                              seasonal_index = avg_value / mean(data[[quo_name(enquo(value_col))]], na.rm = TRUE),
                              .groups = "drop"
                            )
                        }
                    

💡 実践的アドバイス

⏰ 時系列分析の成功法則

データの質を確保：欠損値や異常値の適切な処理が精度を左右
季節性を理解：ビジネスサイクルや外部要因の影響を考慮
複数モデルを比較：単一モデルに依存せず、アンサンブルも検討
検証を徹底：時系列分割でのクロスバリデーションを実施
解釈しやすさを重視：ビジネス判断に活用できる説明可能性を確保

これらの時系列分析と予測モデリング技法により、データに基づいた将来予測と意思決定支援が可能になります。次の章では、大容量データの効率的な読み込みと処理技法を学びましょう。