1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
| library(tidyverse)
years <- 2021:2025
Name <- read_csv("name.csv") %>% select(Name, MLBAMID)
df <- map_dfr(years, ~read_csv(paste0(.x, ".csv")) %>% mutate(year = .x))
# 新しい列を作成
df <- df %>%
mutate(
# 打者ごとに正規化する
relative_x = plate_x / 0.833,
relative_z = (plate_z - strike_zone_bottom) /
(strike_zone_top - strike_zone_bottom),
pitch_count = paste0(balls, "-", strikes)
) %>%
select(year, pitcher_id, bat_side, pitch_count,
plate_x, plate_z, relative_x, relative_z,
pitch_type, arm_angle)
# ユークリッド距離的散らばりを計算
df_dist <- df %>%
group_by(pitcher_id, pitch_count, pitch_type, bat_side, year) %>%
mutate(
mean_x = mean(plate_x, na.rm = TRUE),
mean_z = mean(plate_z, na.rm = TRUE),
distance = sqrt((plate_x - mean_x)^2 + (plate_z - mean_z)^2)
) %>%
select(year, pitcher_id, bat_side, pitch_count, pitch_type, arm_angle, distance) %>%
ungroup()
# 投手ごとのサマリー
df_dist_summary <- df_dist %>%
group_by(year, pitcher_id) %>%
summarize(
mean_distance = mean(distance, na.rm = TRUE),
sd_distance = sd(distance, na.rm = TRUE),
min_distance = min(distance, na.rm = TRUE),
max_distance = max(distance, na.rm = TRUE),
IQR_distance = IQR(distance, na.rm = TRUE),
p25_distance = quantile(distance, 0.25, na.rm = TRUE),
p75_distance = quantile(distance, 0.75, na.rm = TRUE),
pitches = n(),
.groups = 'drop'
) %>%
group_by(year) %>%
mutate(pitches_rank = percent_rank(pitches)) %>%
filter(pitches_rank >= 0.5) %>%
ungroup() %>%
left_join(Name, by = c("pitcher_id" = "MLBAMID"))
|