diff --git a/lectures/applications/networks.md b/lectures/applications/networks.md index fee601cf..7b7988b6 100644 --- a/lectures/applications/networks.md +++ b/lectures/applications/networks.md @@ -540,7 +540,7 @@ def truncate(f): # define a function that "rounds" a number to 0 if it is lower return 1 # we already know that every stock is perfectly correlated with itself, so the ones on the diagonal are not really useful information. Let's get rid of them. -adj = corr.applymap(truncate) - np.identity(10) +adj = corr.map(truncate) - np.identity(10) adj ``` diff --git a/lectures/applications/recidivism.md b/lectures/applications/recidivism.md index 6de93bc9..7ef51c8b 100644 --- a/lectures/applications/recidivism.md +++ b/lectures/applications/recidivism.md @@ -138,7 +138,7 @@ Let's look at how the dataset is broken down into age, sex, and race. ```{code-cell} python def create_groupcount_barplot(df, group_col, figsize, **kwargs): "call df.groupby(group_col), then count number of records and plot" - counts = df.groupby(group_col)["name"].count().sort_index() + counts = df.groupby(group_col,observed=True)["name"].count().sort_index() fig, ax = plt.subplots(figsize=figsize) counts.plot(kind="bar", **kwargs) @@ -177,7 +177,7 @@ is mostly African-American or Caucasian. We now look into how recidivism is split across groups. ```{code-cell} python -recid = df.groupby(["age_cat", "sex", "race"])["two_year_recid"].mean().unstack(level="race") +recid = df.groupby(["age_cat", "sex", "race"], observed=True)["two_year_recid"].mean().unstack(level="race") recid ``` @@ -201,8 +201,8 @@ create_groupcount_barplot(df, "decile_score", (12, 8), color="DarkBlue", rot=0) How do these scores differ by race? ```{code-cell} python -dfgb = df.groupby("race") -race_count = df.groupby("race")["name"].count() +dfgb = df.groupby("race", observed=True) +race_count = df.groupby("race", observed=True)["name"].count() fig, ax = plt.subplots(3, figsize=(14, 8)) @@ -253,7 +253,7 @@ One of the key critiques from Pro Publica, though, was that the inaccuracies wer Let's now separate the correlations by race and see what happens. ```{code-cell} python -recid_rates = df.pivot_table(index="decile_score", columns="race", values="two_year_recid") +recid_rates = df.pivot_table(index="decile_score", columns="race", values="two_year_recid", observed=True) recid_rates ``` @@ -1021,7 +1021,7 @@ def balance_scorer(y_true, prob, df, weights): -weights[2]*(metrics.log_loss(y_true, prob, normalize=True))) score_params = {"df": df_train, "weights": [10.0, 1.0, 0.0]} -scorer = metrics.make_scorer(balance_scorer, **score_params, needs_proba=True) +scorer = metrics.make_scorer(balance_scorer, **score_params, response_method="predict_proba") grid_cv = model_selection.GridSearchCV( estimator=linear_model.LogisticRegression(penalty="l1", max_iter=100, @@ -1090,7 +1090,7 @@ prediction and balance? ```{code-cell} python score_params = {"df": df_train, "weights": [10.0, 1.0, 5.0]} -grid_cv.set_params(scoring=metrics.make_scorer(balance_scorer, **score_params, needs_proba=True)) +grid_cv.set_params(scoring=metrics.make_scorer(balance_scorer, **score_params, response_method="predict_proba")) bf_mod=grid_cv.fit(X_train,y_train) grid_cv_plot(bf_mod,"CV balance & fit") diff --git a/lectures/tools/matplotlib.md b/lectures/tools/matplotlib.md index c429c016..f16801c4 100644 --- a/lectures/tools/matplotlib.md +++ b/lectures/tools/matplotlib.md @@ -318,7 +318,7 @@ def scale_by_middle(df): ``` ```{code-cell} python -to_plot = prices.groupby("Model").apply(scale_by_middle).T +to_plot = prices.groupby("Model").apply(scale_by_middle, include_groups=False).T to_plot ``` diff --git a/lectures/tools/regression.md b/lectures/tools/regression.md index a005013f..f4cefc52 100644 --- a/lectures/tools/regression.md +++ b/lectures/tools/regression.md @@ -783,7 +783,7 @@ This improves predictions and reduces the variance of the predictions. from sklearn.ensemble import RandomForestRegressor forest = RandomForestRegressor(n_estimators = 10).fit(Xsim,ysim) -fig=surface_scatter_plot(Xsim,ysim,lambda x: forest.predict([x]), +fig=surface_scatter_plot(Xsim,ysim,lambda x: forest.predict([x])[0], show_f0=True) fig ``` @@ -892,7 +892,7 @@ from sklearn import neural_network nn = neural_network.MLPRegressor((6,), activation="logistic", verbose=True, solver="lbfgs", alpha=0.0).fit(Xsim,ysim) -fig=surface_scatter_plot(Xsim,ysim,lambda x: nn.predict([x]), show_f0=True) +fig=surface_scatter_plot(Xsim,ysim,lambda x: nn.predict([x])[0], show_f0=True) fig ```