-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_data.R
112 lines (98 loc) · 3.43 KB
/
generate_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
library(tidyverse)
library(scales)
message("Running generate_data.R")
# Read player data in
df <- read.csv("player_data.csv")
nfl_teams <- c("ARI", "ATL", "BAL", "BUF", "CAR", "CHI", "CIN", "CLE",
"DAL", "DEN", "DET", "GB", "HOU", "IND", "JAX", "KC",
"LAC", "LA", "LV", "MIA", "MIN", "NE", "NO", "NYG",
"NYJ", "PHI", "PIT", "SEA", "SF", "TB", "TEN", "WAS")
# scoring parameters
pr <- .5
ptd <- 4
pass_yd_pt <- .04
int_point <- (-1)
tp_conv <- 2
fum_lost <- (-2)
# filter df for faster performance
df_filtered <- df %>%
filter(season == 2023,
season_type == "REG") %>%
select(position, player_name, recent_team, targets,target_share, receptions,
receiving_yards, receiving_tds, rushing_yards, rushing_tds,
rushing_fumbles_lost, receiving_fumbles_lost,
passing_yards, passing_tds, attempts, completions,
interceptions, fantasy_points, fantasy_points_ppr, sack_fumbles_lost,
passing_2pt_conversions, rushing_2pt_conversions,
receiving_2pt_conversions, carries)
# Initialize an empty data frame
combined_df <- data.frame()
# Iterate over NFL teams
for (team in nfl_teams) {
team_df <- df_filtered %>%
filter(recent_team == team) %>%
group_by(position, player_name,recent_team) %>%
summarise(
pos = unique(position),
g = n(),
p_att = sum(attempts),
cmp = sum(completions),
p_yd = sum(passing_yards),
p_td = sum(passing_tds),
int = sum(interceptions),
car = sum(carries),
r_yd = sum(rushing_yards),
r_td = sum(rushing_tds),
tgt = sum(targets),
rec = sum(receptions),
rec_yd = sum(receiving_yards),
rec_td = sum(receiving_tds),
fmb = sum(rushing_fumbles_lost + sack_fumbles_lost),
tp_c = sum(passing_2pt_conversions + rushing_2pt_conversions),
f_ppr = sum(fantasy_points_ppr))%>%
filter(pos %in% c("QB","RB","WR","TE","FB"))%>%
mutate(tgt_share = tgt/sum(team_df$p_att),
ypc = r_yd/car,
ypr = rec_yd/rec,
cmp_pct = cmp/p_att,
td_rate = p_td/p_att)
#mutate(
# f_custom = case_when(
# pos == "QB" ~ (r_td * 6) + (.1 * r_yd) + (ptd * p_td)
# + (pass_yd_pt * p_yd) + (int_point * int) + (tp_conv * tp_c) + (fum_lost * fmb),
# pos %in% c("RB","WR","TE","FB") ~ (6 * r_td) + (.1 * r_yd) + (pr * rec)
# + (.1 * rec_yd) + (6 * rec_td) + (tp_conv * tp_c) + (fum_lost * fmb)))
# Append the team's data to the combined data frame
combined_df <- bind_rows(combined_df, team_df)
}
combined_df <- combined_df %>%
select(-position)%>%
mutate(recent_team = ifelse(recent_team == "LA", "LAR", recent_team))
# Saving the dataframe
write.csv(combined_df,"players_2023.csv")
# team stats csv
combined_df2 <- tibble()
team_stats_df <- read.csv("players_2023.csv")
for(team in nfl_teams){
#Fix LAR
if(team == "LA"){
team <- "LAR"
}
df <- team_stats_df %>%
filter(recent_team == team)
off_yd <- sum(df$p_yd) + sum(df$r_yd)
p_yd <- sum(df$p_yd)
car <- sum(df$car)
r_yd <- sum(df$r_yd)
r_td <- sum(df$r_td)
p_ff <- sum(df$f_ppr)
p_att <- sum(df$p_att)
cmp_pct <- sum(df$cmp)/sum(df$p_att)
p_td <- sum(df$p_td)
int <- sum(df$int)
fmb <- sum(df$fmb)
df2 <- tibble(team,off_yd,p_yd,car,r_yd,r_td,p_ff,p_att,cmp_pct,p_td,int,fmb)
combined_df2 <- bind_rows(combined_df2,df2)
}
write.csv(combined_df2,"team_stats_2023.csv")
message("generate_data.R complete")