-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add EWM method for 3PAT averaging #1
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ def run_model( | |
n_components:int=5, | ||
bootstrap_samples:int=100_000, | ||
n_simulated_games:int=200_000, | ||
fga_method:str="simple", | ||
min_samples:int=25, | ||
plot:bool=False, | ||
plot_args:dict={ | ||
|
@@ -41,9 +42,15 @@ def run_model( | |
:param n_components: The number of clusters to use for the GMM | ||
:param bootstrap_samples: The number of bootstrap samples to use (suggested 100,000 - 500,000) | ||
:param n_simulated_games: The number of simulated games to run (suggested 10,000 - 200,000) | ||
:param fga_method: The method to simulate field goals attempted ('simple' moving average or 'ewm') | ||
:param plot: Whether to plot the results | ||
:return: numpy array of simulated shot results (1 = made, 0 = missed), length = n_simulated_games | ||
""" | ||
|
||
fga_attempt_types = ['simple', 'ewm'] | ||
if fga_method not in fga_attempt_types: | ||
raise ValueError("Invalid fga_attempt_type. Expected one of: %s" % fga_attempt_types) | ||
|
||
try: | ||
player_df = get_player_shot_loc_data(player_name, context_measure_simple='FG3A') | ||
except Exception: | ||
|
@@ -60,7 +67,9 @@ def run_model( | |
threes['SHOT_MADE_FLAG'] = threes['SHOT_MADE_FLAG'].astype(np.int64) | ||
|
||
threes_train_xy = threes[['LOC_X', 'LOC_Y']].values.reshape(-1, 2) | ||
fga_per_game_data = threes.groupby('GAME_ID')['SHOT_ATTEMPTED_FLAG'].count().values | ||
# fga_per_game_data = threes.groupby('GAME_ID')['SHOT_ATTEMPTED_FLAG'].count().values | ||
fga_per_game_data = threes.groupby('GAME_ID')['SHOT_ATTEMPTED_FLAG'].count().reset_index().sort_values('GAME_ID') | ||
|
||
|
||
#get league shot data | ||
|
||
|
@@ -114,16 +123,21 @@ def run_model( | |
def_adjustment = opponent_fg_percent_by_cluster / league_fg_percent_by_cluster | ||
|
||
#bootstrap resample from FGA data to find normal distribution fo estimated mean FGA per game | ||
fga_per_game_est = [np.random.choice(fga_per_game_data, size=len(fga_per_game_data), replace=True).mean() for _ in range(bootstrap_samples)] | ||
fga_per_game_est_mean = np.mean(fga_per_game_est) | ||
fga_per_game_est_std = np.std(fga_per_game_est) | ||
if fga_method == 'simple': | ||
fga_per_game_array = fga_per_game_data['SHOT_ATTEMPTED_FLAG'].values | ||
fga_per_game_est = [np.random.choice(fga_per_game_array, size=len(fga_per_game_array), replace=True).mean() for _ in range(bootstrap_samples)] | ||
fga_per_game_est_mean = np.mean(fga_per_game_est) | ||
fga_per_game_est_std = np.std(fga_per_game_est) | ||
elif fga_method == 'ewm': | ||
fga_per_game_est_mean = fga_per_game_data['SHOT_ATTEMPTED_FLAG'].ewm(span = len(fga_per_game_data)).mean().values[-1] | ||
fga_per_game_est_std = fga_per_game_data['SHOT_ATTEMPTED_FLAG'].ewm(span = len(fga_per_game_data)).std().values[-1] | ||
|
||
fg3m_s = [] | ||
#simulate n_simulations games | ||
for _ in trange(n_simulated_games, desc=f'Simulating 3PM outcomes for {player_name} vs {opponent}...'): | ||
|
||
#simulate FGA | ||
fga_i = np.random.poisson(np.random.normal(fga_per_game_est_mean, fga_per_game_est_std)) | ||
fga_i = np.random.poisson(max(np.random.normal(fga_per_game_est_mean, fga_per_game_est_std),0)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So above you're not actually bootstrapping (for ema) so I don't think it's fair to draw from a normal distribution to input into the poisson. In fact I think this may be skewing the results for simulations quite a bit since the rolling std should be much larger than std of the bootstrapped emas. I could be wrong on that second part though. |
||
|
||
if fga_i == 0: | ||
fg3m_s.append(0) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No bootstrapping is getting done here, won't be able to compress into a list comprehension but I think we can write a for loop to do it:
eg.
I've never actually done this before and I'd like to actually examine the distribution here in a notebook or something. Since EMA is dependent on the ordering I'm not sure this is valid, but I think it does work (mentioned this point earlier)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually having messed around with this in a notebook i dont think it does work. ordering does matter