ggplot2の真価は、実際のデータを使って美しく洞察に富んだグラフを作成することにあります。ここでは、よく使われるグラフパターンと、プロフェッショナルな仕上げのテクニックを学びましょう。
                    
                    
                    時系列データの可視化
                    
                        時系列データは、ビジネスや研究において最も重要なデータタイプの一つです。トレンド、季節性、異常値を効果的に可視化する方法を見てみましょう。
                    
                    
                    
                        
                        
                            
                            library(tidyverse)
                            library(lubridate)
                            
                            
                            sales_ts <- tibble(
                              date = seq(as.Date("2020-01-01"), as.Date("2023-12-31"), by = "month"),
                              product_a = cumsum(rnorm(48, mean = 50, sd = 20)) + 1000,
                              product_b = cumsum(rnorm(48, mean = 30, sd = 15)) + 800,
                              product_c = cumsum(rnorm(48, mean = 25, sd = 12)) + 600
                            ) %>%
                              pivot_longer(
                                cols = starts_with("product"),
                                names_to = "product",
                                values_to = "sales"
                              )
                            
                            
                            timeseries_plot <- ggplot(sales_ts, aes(x = date, y = sales, color = product)) +
                              geom_line(size = 1.2, alpha = 0.8) +
                              geom_point(size = 2, alpha = 0.6) +
                              scale_x_date(
                                date_breaks = "6 months",
                                date_labels = "%Y年%m月",
                                expand = expansion(mult = c(0.02, 0.02))
                              ) +
                              scale_y_continuous(
                                labels = scales::comma_format(suffix = "万円"),
                                expand = expansion(mult = c(0, 0.1))
                              ) +
                              scale_color_manual(
                                values = c("product_a" = "#00ffff", "product_b" = "#ff00ff", "product_c" = "#39ff14"),
                                labels = c("製品A", "製品B", "製品C")
                              ) +
                              labs(
                                title = "製品別売上高の推移",
                                subtitle = "2020年1月〜2023年12月",
                                x = "年月",
                                y = "売上高",
                                color = "製品",
                                caption = "データ:社内売上システム"
                              ) +
                              theme_minimal() +
                              theme(
                                axis.text.x = element_text(angle = 45, hjust = 1),
                                legend.position = "bottom",
                                plot.title = element_text(size = 16, face = "bold")
                              )
                        
                     
                    
                    分布の比較と統計可視化
                    
                        データの分布を理解することは、統計分析の基本です。複数グループの分布を効果的に比較する方法を学びましょう。
                    
                    
                    
                        
                        
                            
                            survey_data <- tibble(
                              age_group = rep(c("20代", "30代", "40代", "50代"), each = 250),
                              income = c(
                                rnorm(250, mean = 350, sd = 80),   
                                rnorm(250, mean = 450, sd = 100),  
                                rnorm(250, mean = 550, sd = 120),  
                                rnorm(250, mean = 600, sd = 150)   
                              ),
                              satisfaction = sample(c("低", "中", "高"), 1000, replace = TRUE)
                            )
                            
                            
                            distribution_plot <- ggplot(survey_data, aes(x = age_group, y = income)) +
                              geom_violin(
                                aes(fill = age_group),
                                alpha = 0.7,
                                scale = "width"
                              ) +
                              geom_boxplot(
                                width = 0.2,
                                alpha = 0.9,
                                outlier.color = "red",
                                outlier.size = 2
                              ) +
                              stat_summary(
                                fun = mean,
                                geom = "point",
                                color = "white",
                                size = 3,
                                shape = 18
                              ) +
                              scale_fill_manual(
                                values = c("#00ffff", "#39ff14", "#ff6600", "#ff00ff")
                              ) +
                              scale_y_continuous(  
                                labels = scales::comma_format(suffix = "万円")  
                              ) +
                              labs(
                                title = "年代別年収分布の比較",
                                subtitle = "バイオリンプロット + 箱ひげ図 + 平均値",
                                x = "年代",
                                y = "年収",
                                fill = "年代"
                              ) +
                              theme_minimal() +
                              theme(legend.position = "none")
                        
                     
                    
                    相関関係とパターンの発見
                    
                        散布図は変数間の関係を理解するための最も強力なツールの一つです。回帰線、信頼区間、グループ分けを組み合わせた高度な分析を行いましょう。
                    
                    
                    
                        
                        
                            
                            car_data <- mtcars %>%
                              rownames_to_column("model") %>%
                              as_tibble() %>%
                              mutate(
                                transmission = ifelse(am == 1, "マニュアル", "オートマ"),
                                efficiency_class = case_when(
                                  mpg >= 25 ~ "高効率",
                                  mpg >= 20 ~ "中効率",
                                  TRUE ~ "低効率"
                                )
                              )
                            
                            
                            correlation_plot <- ggplot(car_data, aes(x = wt, y = mpg)) +
                              
                              stat_density_2d(alpha = 0.3, color = "gray70") +
                              
                              geom_smooth(
                                method = "lm",
                                color = "#ff00ff",
                                fill = "#ff00ff",
                                alpha = 0.2
                              ) +
                              
                              geom_point(
                                aes(color = transmission, size = hp, shape = efficiency_class),
                                alpha = 0.8
                              ) +
                              
                              geom_text(
                                data = car_data %>% filter(mpg > 30 | wt > 5),
                                aes(label = model),
                                nudge_y = 1,
                                size = 3,
                                color = "white"
                              ) +
                              scale_color_manual(
                                values = c("マニュアル" = "#00ffff", "オートマ" = "#39ff14")
                              ) +
                              scale_size_continuous(range = c(3, 8)) +
                              labs(
                                title = "自動車の重量と燃費の関係",
                                subtitle = "変速機タイプ、馬力、効率クラス別分析",
                                x = "重量 (1000 lbs)",
                                y = "燃費 (mpg)",
                                color = "変速機",
                                size = "馬力",
                                shape = "効率クラス"
                              ) +
                              theme_dark() +
                              theme(
                                plot.background = element_rect(fill = "black"),
                                panel.background = element_rect(fill = "gray10"),
                                legend.position = "bottom"
                              )
                        
                     
                    
                    ファセットによる多次元分析
                    
                        ファセットは、データの複数の側面を同時に可視化する強力な機能です。Small Multiplesの原理により、複雑なパターンを理解しやすくします。
                    
                    
                    
                        
                        
                            
                            business_data <- expand_grid(
                              region = c("東京", "大阪", "名古屋", "福岡"),
                              quarter = c("Q1", "Q2", "Q3", "Q4"),
                              product_category = c("電子機器", "家具", "衣料品")
                            ) %>%
                              mutate(
                                sales = case_when(
                                  region == "東京" ~ rnorm(n(), mean = 1500, sd = 300),
                                  region == "大阪" ~ rnorm(n(), mean = 1200, sd = 250),
                                  region == "名古屋" ~ rnorm(n(), mean = 800, sd = 200),
                                  TRUE ~ rnorm(n(), mean = 600, sd = 150)
                                ),
                                profit_margin = case_when(
                                  product_category == "電子機器" ~ runif(n(), 0.15, 0.25),
                                  product_category == "家具" ~ runif(n(), 0.30, 0.45),
                                  TRUE ~ runif(n(), 0.50, 0.70)
                                )
                              )
                            
                            
                            facet_plot <- ggplot(business_data, aes(x = quarter, y = sales)) +
                              geom_col(
                                aes(fill = product_category),
                                position = "dodge",
                                alpha = 0.8
                              ) +
                              geom_text(
                                aes(label = scales::comma(sales, accuracy = 1), group = product_category),
                                position = position_dodge(width = 0.9),
                                vjust = -0.5,
                                size = 3,
                                color = "white"
                              ) +
                              facet_wrap(~region, scales = "free_y", ncol = 2) +
                              scale_fill_manual(
                                values = c("電子機器" = "#00ffff", "家具" = "#ff6600", "衣料品" = "#39ff14")
                              ) +
                              scale_y_continuous(  
                                labels = scales::comma_format(suffix = "万円")  
                              ) +
                              labs(
                                title = "地域別・四半期別・商品カテゴリ別売上分析",
                                subtitle = "複数次元での売上パフォーマンス比較",
                                x = "四半期",
                                y = "売上高",
                                fill = "商品カテゴリ"
                              ) +
                              theme_minimal() +
                              theme(
                                strip.text = element_text(size = 12, face = "bold"),
                                legend.position = "bottom"
                              )
                        
                     
                    
                    
                    🎯 レーダーチャート:多次元パフォーマンス比較
                    
                        
                        
                            
                            
                            library(ggplot2)  
                            library(dplyr)    
                            
                            
                            skill_data <- data.frame(
                              skill = c("プログラミング", "データ分析", "コミュニケーション", "プロジェクト管理", "創造性", "問題解決"),
                              employee_A = c(9, 7, 6, 4, 8, 9),
                              employee_B = c(6, 9, 8, 9, 5, 7),
                              employee_C = c(7, 5, 9, 8, 9, 6)
                            )
                            
                            
                            skill_long <- skill_data %>%
                              tidyr::pivot_longer(cols = starts_with("employee"),
                                                   names_to = "employee",
                                                   values_to = "score") %>%
                              mutate(employee = gsub("employee_", "従業員", employee))
                            
                            
                            
                            radar_plot <- ggplot(skill_long, aes(
                                x = skill,        
                                y = score,        
                                color = employee, 
                                group = employee  
                              )) +
                              geom_polygon(aes(fill = employee), alpha = 0.2) +  
                              geom_point(size = 3) +     
                              geom_line(size = 1.2) +    
                              scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, 2)) +
                              scale_color_manual(values = c("#00ffff", "#ff1493", "#39ff14")) +
                              scale_fill_manual(values = c("#00ffff", "#ff1493", "#39ff14")) +
                              coord_polar() +  
                              theme_minimal() +
                              theme(
                                plot.background = element_rect(fill = "gray10"),
                                panel.background = element_rect(fill = "gray10"),
                                axis.text.x = element_text(color = "white", size = 10),
                                axis.text.y = element_text(color = "gray70"),
                                panel.grid = element_line(color = "gray30")
                              ) +
                              labs(
                                title = "従業員スキル評価レーダーチャート",
                                subtitle = "6つの主要スキル領域での比較",
                                color = "従業員",
                                fill = "従業員"
                              )
                        
                     
                    
                    
                    📊 積み上げ棒グラフ:構成比の可視化
                    
                        
                        
                            
                            library(ggplot2)
                            library(dplyr)
                            
                            
                            sales_composition <- data.frame(
                              quarter = rep(c("Q1", "Q2", "Q3", "Q4"), each = 3),
                              product = rep(c("スマートフォン", "タブレット", "PC"), 4),
                              sales = c(45, 25, 30,   
                                       50, 28, 22,   
                                       48, 32, 20,   
                                       55, 30, 15)   
                            )
                            
                            
                            stacked_absolute <- ggplot(sales_composition, aes(
                                x = quarter,   
                                y = sales,     
                                fill = product 
                              )) +
                              geom_col(alpha = 0.8, width = 0.7) +  
                              geom_text(aes(label = paste0(sales, "万円")),   
                                        position = position_stack(vjust = 0.5), color = "white", size = 3) +  
                              scale_fill_manual(
                                values = c("スマートフォン" = "#00ffff", "タブレット" = "#ff6600", "PC" = "#39ff14")
                              ) +
                              labs(
                                title = "四半期別売上構成(絶対値)",
                                x = "四半期", y = "売上(万円)", fill = "製品"
                              ) +
                              theme_dark()
                            
                            
                            stacked_percentage <- ggplot(sales_composition, aes(
                                x = quarter,   
                                y = sales,     
                                fill = product 
                              )) +
                              geom_col(position = "fill", alpha = 0.8, width = 0.7) +  
                              scale_y_continuous(labels = scales::percent) +  
                              scale_fill_manual(
                                values = c("スマートフォン" = "#00ffff", "タブレット" = "#ff6600", "PC" = "#39ff14")
                              ) +
                              labs(
                                title = "四半期別売上構成(比率)",
                                x = "四半期", y = "構成比", fill = "製品"
                              ) +
                              theme_dark()
                            
                            
                            library(gridExtra)
                            grid.arrange(stacked_absolute, stacked_percentage, ncol = 2)