messy_data_content <- "name,age,salary,department,rating
田中太郎,28,450000,営業,4.5
佐藤花子,,520000,開発,N/A
鈴木一郎,45,NULL,マネジメント,5.0
高橋美咲,25,380000,,4.2
山田健太,invalid,650000,開発,3.8"
messy_data <- read_csv(messy_data_content,
na = c("", "NA", "N/A", "NULL", "invalid")
)
print(messy_data)
print(paste("欠損値の数:", sum(is.na(messy_data))))
parsing_problems <- problems(messy_data)
if (nrow(parsing_problems) > 0) {
print("パース問題:")
print(parsing_problems)
}
safe_data <- read_csv(messy_data_content,
col_types = cols(
name = col_character(),
age = col_integer(),
salary = col_double(),
department = col_character(),
rating = col_double()
),
na = c("", "NA", "N/A", "NULL", "invalid")
)
print("安全に読み込んだデータ:")
print(safe_data)
missing_summary <- safe_data %>%
summarise_all(~ sum(is.na(.))) %>%
pivot_longer(everything(), names_to = "column", values_to = "missing_count")
print("列別欠損値数:")
print(missing_summary)
jp_locale <- locale(
encoding = "UTF-8",
decimal_mark = ".",
grouping_mark = ",",
date_format = "%Y-%m-%d",
time_format = "%H:%M:%S"
)
numeric_data_content <- "product,price,quantity,date
製品A,\"1,250.50\",100,2023-12-01
製品B,\"2,350.75\",75,2023-12-02
製品C,\"890.25\",200,2023-12-03"
numeric_data <- read_csv(numeric_data_content, locale = jp_locale)
print(numeric_data)
date_format_content <- "event,date_jp,date_us,date_iso
イベント1,2023/12/01,12/01/2023,2023-12-01
イベント2,2023/12/15,12/15/2023,2023-12-15
イベント3,2024/01/05,01/05/2024,2024-01-05"
date_data <- read_csv(date_format_content,
col_types = cols(
event = col_character(),
date_jp = col_date(format = "%Y/%m/%d"),
date_us = col_date(format = "%m/%d/%Y"),
date_iso = col_date()
)
)
print(date_data)
time_data_content <- "task,start_time,end_time,duration
タスク1,09:30:00,11:45:30,02:15:30
タスク2,14:00:00,16:30:45,02:30:45
タスク3,10:15:30,12:00:00,01:44:30"
time_data <- read_csv(time_data_content,
col_types = cols(
task = col_character(),
start_time = col_time(),
end_time = col_time(),
duration = col_time()
)
)
print(time_data)
str(time_data)