Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add EWM method for 3PAT averaging #1

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@
parser = argparse.ArgumentParser(description='Run the model.')
parser.add_argument('-b', '--boostrap_samples', default=100_000, help='Set # of boostrap samples')
parser.add_argument('-n', '--n_simulated_games', default=10_000, help='Set # of simulated games')
parser.add_argument('-m', '--fga_method', default='simple', help='Set mean method to mwa')
#parser.add_argument("-f", "--fff", help="a dummy argument to fool ipython", default="1")

args = parser.parse_args()
bootstrap_samples = int(args.boostrap_samples)
n_simulated_games = int(args.n_simulated_games)
fga_method = str(args.fga_method)

#logging settings
root = logging.getLogger()
Expand All @@ -27,5 +31,6 @@

model.run_model(
bootstrap_samples=bootstrap_samples,
n_simulated_games=n_simulated_games
n_simulated_games=n_simulated_games,
fga_method=fga_method
)
5 changes: 4 additions & 1 deletion model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ def compress_lines(self):
def run_model(
self,
bootstrap_samples:int=100_000,
n_simulated_games:int=200_000
n_simulated_games:int=200_000,
fga_method:str="simple",

):

self.load_data()
Expand Down Expand Up @@ -81,6 +83,7 @@ def run_model(
defensive_matchup,
bootstrap_samples=bootstrap_samples,
n_simulated_games=n_simulated_games,
fga_method="simple",
plot=False
)
except Exception as e:
Expand Down
24 changes: 19 additions & 5 deletions model/models/threes.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def run_model(
n_components:int=5,
bootstrap_samples:int=100_000,
n_simulated_games:int=200_000,
fga_method:str="simple",
min_samples:int=25,
plot:bool=False,
plot_args:dict={
Expand All @@ -41,9 +42,15 @@ def run_model(
:param n_components: The number of clusters to use for the GMM
:param bootstrap_samples: The number of bootstrap samples to use (suggested 100,000 - 500,000)
:param n_simulated_games: The number of simulated games to run (suggested 10,000 - 200,000)
:param fga_method: The method to simulate field goals attempted ('simple' moving average or 'ewm')
:param plot: Whether to plot the results
:return: numpy array of simulated shot results (1 = made, 0 = missed), length = n_simulated_games
"""

fga_attempt_types = ['simple', 'ewm']
if fga_method not in fga_attempt_types:
raise ValueError("Invalid fga_attempt_type. Expected one of: %s" % fga_attempt_types)

try:
player_df = get_player_shot_loc_data(player_name, context_measure_simple='FG3A')
except Exception:
Expand All @@ -60,7 +67,9 @@ def run_model(
threes['SHOT_MADE_FLAG'] = threes['SHOT_MADE_FLAG'].astype(np.int64)

threes_train_xy = threes[['LOC_X', 'LOC_Y']].values.reshape(-1, 2)
fga_per_game_data = threes.groupby('GAME_ID')['SHOT_ATTEMPTED_FLAG'].count().values
# fga_per_game_data = threes.groupby('GAME_ID')['SHOT_ATTEMPTED_FLAG'].count().values
fga_per_game_data = threes.groupby('GAME_ID')['SHOT_ATTEMPTED_FLAG'].count().reset_index().sort_values('GAME_ID')


#get league shot data

Expand Down Expand Up @@ -114,16 +123,21 @@ def run_model(
def_adjustment = opponent_fg_percent_by_cluster / league_fg_percent_by_cluster

#bootstrap resample from FGA data to find normal distribution fo estimated mean FGA per game
fga_per_game_est = [np.random.choice(fga_per_game_data, size=len(fga_per_game_data), replace=True).mean() for _ in range(bootstrap_samples)]
fga_per_game_est_mean = np.mean(fga_per_game_est)
fga_per_game_est_std = np.std(fga_per_game_est)
if fga_method == 'simple':
fga_per_game_array = fga_per_game_data['SHOT_ATTEMPTED_FLAG'].values
fga_per_game_est = [np.random.choice(fga_per_game_array, size=len(fga_per_game_array), replace=True).mean() for _ in range(bootstrap_samples)]
fga_per_game_est_mean = np.mean(fga_per_game_est)
fga_per_game_est_std = np.std(fga_per_game_est)
elif fga_method == 'ewm':
fga_per_game_est_mean = fga_per_game_data['SHOT_ATTEMPTED_FLAG'].ewm(span = len(fga_per_game_data)).mean().values[-1]
fga_per_game_est_std = fga_per_game_data['SHOT_ATTEMPTED_FLAG'].ewm(span = len(fga_per_game_data)).std().values[-1]
Copy link
Owner

@bendominguez0111 bendominguez0111 Feb 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No bootstrapping is getting done here, won't be able to compress into a list comprehension but I think we can write a for loop to do it:
eg.

bootstrapped_emas = []
for _ in range(bootstrap_samples):
    fga_boot = np.random.choice(fga_per_game_array, size=len(fga_per_game_array), replace=True)
    fga_boot_ema = pd.ewma(fga_boot, span=len(fga_boot)).mean()[-1]
    bootstrapped_emas.append(fga_boot_ema)
fga_per_game_est_mean = np.mean(bootstrapped_means)
fga_per_game_est_std = np.std(bootstrapped_means)

I've never actually done this before and I'd like to actually examine the distribution here in a notebook or something. Since EMA is dependent on the ordering I'm not sure this is valid, but I think it does work (mentioned this point earlier)

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually having messed around with this in a notebook i dont think it does work. ordering does matter


fg3m_s = []
#simulate n_simulations games
for _ in trange(n_simulated_games, desc=f'Simulating 3PM outcomes for {player_name} vs {opponent}...'):

#simulate FGA
fga_i = np.random.poisson(np.random.normal(fga_per_game_est_mean, fga_per_game_est_std))
fga_i = np.random.poisson(max(np.random.normal(fga_per_game_est_mean, fga_per_game_est_std),0))
Copy link
Owner

@bendominguez0111 bendominguez0111 Feb 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So above you're not actually bootstrapping (for ema) so I don't think it's fair to draw from a normal distribution to input into the poisson. In fact I think this may be skewing the results for simulations quite a bit since the rolling std should be much larger than std of the bootstrapped emas. I could be wrong on that second part though.


if fga_i == 0:
fg3m_s.append(0)
Expand Down