Tackle most FutureWarnings (#265)

* update patches branch (#263) * WIP: patch pandas issues and upgrades (#251) * patch value counts issue in merge lecture * Update ci.yml hopefully this works * Update ci.yml idk * WIP: fix quandl deprecation * Update timeseries.md * Update environment.yml upgrade pandas * Update environment.yml * Update groupby.md * applymap -> map * Update the_index.md * Update timeseries.md * fix matplotlib * simplify btc time series data * Update timeseries.md * Update timeseries.md this should fix time series * one more * TMP: disable build cache * MAINT: maintenance of cloud infrastructure * TST: upgrade anaconda and software stack * update to python=3.12 in ci workflow * simply build and work through missing dependencies * import some dependencies * enable more dependencies * Update networks.md * oops * fix deprecated map dataset, problem set issue * need to install bokeh for maps lecture * fix ml in econ? * Update recidivism.md * working with text * Update working_with_text.md * working with text * Update working_with_text.md * Update environment.yml * Update working_with_text.md * Update working_with_text.md * try limiting api call delay * try something else * Update working_with_text.md --------- Co-authored-by: Matt McKay <[email protected]> Co-authored-by: mmcky <[email protected]> * fix working with text caching --------- Co-authored-by: Matt McKay <[email protected]> Co-authored-by: mmcky <[email protected]> * update patches from main (#264) * WIP: patch pandas issues and upgrades (#251) * patch value counts issue in merge lecture * Update ci.yml hopefully this works * Update ci.yml idk * WIP: fix quandl deprecation * Update timeseries.md * Update environment.yml upgrade pandas * Update environment.yml * Update groupby.md * applymap -> map * Update the_index.md * Update timeseries.md * fix matplotlib * simplify btc time series data * Update timeseries.md * Update timeseries.md this should fix time series * one more * TMP: disable build cache * MAINT: maintenance of cloud infrastructure * TST: upgrade anaconda and software stack * update to python=3.12 in ci workflow * simply build and work through missing dependencies * import some dependencies * enable more dependencies * Update networks.md * oops * fix deprecated map dataset, problem set issue * need to install bokeh for maps lecture * fix ml in econ? * Update recidivism.md * working with text * Update working_with_text.md * working with text * Update working_with_text.md * Update environment.yml * Update working_with_text.md * Update working_with_text.md * try limiting api call delay * try something else * Update working_with_text.md --------- Co-authored-by: Matt McKay <[email protected]> Co-authored-by: mmcky <[email protected]> * fix working with text caching --------- Co-authored-by: Matt McKay <[email protected]> Co-authored-by: mmcky <[email protected]> * Update matplotlib.md * Update networks.md * Update recidivism.md * Update regression.md * last deprecation fix * last one for real --------- Co-authored-by: Matt McKay <[email protected]> Co-authored-by: mmcky <[email protected]>
QuantEcon · Nov 5, 2024 · 7b10a64 · 7b10a64 · github-actions · Nov 5, 2024
1 parent b80795b
commit 7b10a64
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 11 deletions.
diff --git a/lectures/applications/networks.md b/lectures/applications/networks.md
@@ -540,7 +540,7 @@ def truncate(f): # define a function that "rounds" a number to 0 if it is lower
         return 1
     
 # we already know that every stock is perfectly correlated with itself, so the ones on the diagonal are not really useful information. Let's get rid of them.
-adj = corr.applymap(truncate) - np.identity(10)
+adj = corr.map(truncate) - np.identity(10)
 adj
 ```
 

diff --git a/lectures/applications/recidivism.md b/lectures/applications/recidivism.md
@@ -138,7 +138,7 @@ Let's look at how the dataset is broken down into age, sex, and race.
 ```{code-cell} python
 def create_groupcount_barplot(df, group_col, figsize, **kwargs):
     "call df.groupby(group_col), then count number of records and plot"
-    counts = df.groupby(group_col)["name"].count().sort_index()
+    counts = df.groupby(group_col,observed=True)["name"].count().sort_index()
 
     fig, ax = plt.subplots(figsize=figsize)
     counts.plot(kind="bar", **kwargs)
@@ -177,7 +177,7 @@ is mostly African-American or Caucasian.
 We now look into how recidivism is split across groups.
 
 ```{code-cell} python
-recid = df.groupby(["age_cat", "sex", "race"])["two_year_recid"].mean().unstack(level="race")
+recid = df.groupby(["age_cat", "sex", "race"], observed=True)["two_year_recid"].mean().unstack(level="race")
 recid
 ```
 
@@ -201,8 +201,8 @@ create_groupcount_barplot(df, "decile_score", (12, 8), color="DarkBlue", rot=0)
 How do these scores differ by race?
 
 ```{code-cell} python
-dfgb = df.groupby("race")
-race_count = df.groupby("race")["name"].count()
+dfgb = df.groupby("race", observed=True)
+race_count = df.groupby("race", observed=True)["name"].count()
 
 fig, ax = plt.subplots(3, figsize=(14, 8))
 
@@ -253,7 +253,7 @@ One of the key critiques from Pro Publica, though, was that the inaccuracies wer
 Let's now separate the correlations by race and see what happens.
 
 ```{code-cell} python
-recid_rates = df.pivot_table(index="decile_score", columns="race", values="two_year_recid")
+recid_rates = df.pivot_table(index="decile_score", columns="race", values="two_year_recid", observed=True)
 
 recid_rates
 ```
@@ -1021,7 +1021,7 @@ def balance_scorer(y_true, prob, df, weights):
            -weights[2]*(metrics.log_loss(y_true, prob, normalize=True)))
 
 score_params = {"df": df_train, "weights": [10.0, 1.0, 0.0]}
-scorer = metrics.make_scorer(balance_scorer, **score_params, needs_proba=True)
+scorer = metrics.make_scorer(balance_scorer, **score_params, response_method="predict_proba")
 grid_cv = model_selection.GridSearchCV(
     estimator=linear_model.LogisticRegression(penalty="l1",
                                               max_iter=100,
@@ -1090,7 +1090,7 @@ prediction and balance?
 
 ```{code-cell} python
 score_params = {"df": df_train, "weights": [10.0, 1.0, 5.0]}
-grid_cv.set_params(scoring=metrics.make_scorer(balance_scorer, **score_params, needs_proba=True))
+grid_cv.set_params(scoring=metrics.make_scorer(balance_scorer, **score_params, response_method="predict_proba"))
 bf_mod=grid_cv.fit(X_train,y_train)
 grid_cv_plot(bf_mod,"CV balance & fit")
 

diff --git a/lectures/tools/matplotlib.md b/lectures/tools/matplotlib.md
@@ -318,7 +318,7 @@ def scale_by_middle(df):
 ```
 
 ```{code-cell} python
-to_plot = prices.groupby("Model").apply(scale_by_middle).T
+to_plot = prices.groupby("Model").apply(scale_by_middle, include_groups=False).T
 to_plot
 ```
 

diff --git a/lectures/tools/regression.md b/lectures/tools/regression.md
@@ -783,7 +783,7 @@ This improves predictions and reduces the variance of the predictions.
 
 from sklearn.ensemble import RandomForestRegressor
 forest = RandomForestRegressor(n_estimators = 10).fit(Xsim,ysim)
-fig=surface_scatter_plot(Xsim,ysim,lambda x: forest.predict([x]),
+fig=surface_scatter_plot(Xsim,ysim,lambda x: forest.predict([x])[0],
                          show_f0=True)
 fig
 ```
@@ -892,7 +892,7 @@ from sklearn import neural_network
 nn = neural_network.MLPRegressor((6,), activation="logistic",
                                  verbose=True, solver="lbfgs",
                                  alpha=0.0).fit(Xsim,ysim)
-fig=surface_scatter_plot(Xsim,ysim,lambda x: nn.predict([x]), show_f0=True)
+fig=surface_scatter_plot(Xsim,ysim,lambda x: nn.predict([x])[0], show_f0=True)
 fig
 ```