From 8245d7682083aa1334d7941b30ad2eaf3987aca3 Mon Sep 17 00:00:00 2001 From: Alfred Galichon Date: Sat, 23 Nov 2024 21:08:39 +0100 Subject: [PATCH] v__0.159 --- mec/data.py | 69 +++++++++++++++++++++-------------------------------- setup.py | 2 +- 2 files changed, 28 insertions(+), 43 deletions(-) diff --git a/mec/data.py b/mec/data.py index 75fa51a..668fc4a 100644 --- a/mec/data.py +++ b/mec/data.py @@ -108,6 +108,26 @@ def getcleandata(name,nrow): import numpy as np, pandas as pd + +def create_blp_instruments(X, mkts_firms_prods,include_ones = False): + if include_ones: + X = np.block([[np.ones((X.shape[0],1)), X ]] ) + df = pd.DataFrame() + names = [str(i) for i in range(X.shape[1])] + df[ names ]=X + df[['mkt','firm','prod']] = mkts_firms_prods + thelist1, thelist2 = [], [] + for _, theserie in df[ names ].items(): + thelist1.append ([theserie[(df['mkt']==df['mkt'][i]) & + (df['firm']==df['firm'][i]) & + (df['prod']!=df['prod'][i]) ].sum() for i,_ in df.iterrows() ]) + + thelist2.append([theserie[(df['mkt']==df['mkt'][i]) & + (df['firm']!=df['firm'][i]) ].sum() for i,_ in df.iterrows() ]) + + return np.array(thelist1+thelist2).T + + def load_blp_data(pyblp_compatibility=True): """ Returns the data used by Berry, Levinsohn and Pakes (2005) @@ -161,20 +181,6 @@ def load_blp_data(pyblp_compatibility=True): unobs = pd.read_csv(thepath+'../gentzkow-shapiro/unobs_pub.csv').dropna(axis=1, how='all') - # define a function to load instruments - def create_blp_instruments(df, theseries): - theseries = [pd.Series(len(df)*[1]) if s is None else s for s in theseries] - thelist= [] - for theserie in theseries: - thelist.append ([theserie[(df['market_ids']==df['market_ids'][i]) & - (df['firm_ids']==df['firm_ids'][i]) & - (df['car_ids']!=df['car_ids'][i]) ].sum() for i,_ in df.iterrows() ]) - - for theserie in theseries: - thelist.append([theserie[(df['market_ids']==df['market_ids'][i]) & - (df['firm_ids']!=df['firm_ids'][i]) ].sum() for i,_ in df.iterrows() ]) - - return np.array(thelist).T # convert 1990 displacement from liters to cubic inches prods.loc[prods['market_ids'] == 90, 'disp'] = prods.loc[prods['market_ids'] == 90, 'disp'] * 61.02 @@ -233,14 +239,14 @@ def create_blp_instruments(df, theseries): prods['space']= prods['lngth'] * prods['wdth'] prods['trend'] = prods['market_ids'] - prods['market_ids'][0] # - theseries= [None]+[prods[name] for name in ['hpwt', 'air', 'mpd']] - prods[['demand_instruments'+str(k) for k in range(8)] ] = create_blp_instruments(prods,theseries) - theseries = [None, np.log(prods['hpwt']), prods['air'], np.log(prods['mpg']),np.log(prods['space']) ] - prods[['supply_instruments'+str(k) for k in range(10)] ] = create_blp_instruments(prods,theseries) - theseries= [prods['trend']] - prods[['supply_instruments'+str(k) for k in [10,11] ] ] = create_blp_instruments(prods,theseries) + mkts_firms_prods = prods[['market_ids', 'firm_ids', 'car_ids']].to_numpy() + instr_vals = prods[['hpwt', 'air', 'mpd']].to_numpy() + prods[['demand_instruments'+str(k) for k in range(8)] ] = create_blp_instruments(instr_vals,mkts_firms_prods,include_ones=1) + instr_vals =pd.DataFrame([np.log(prods['hpwt']), prods['air'], np.log(prods['mpg']),np.log(prods['space']) ]).T.to_numpy() + prods[['supply_instruments'+str(k) for k in range(10)] ] = create_blp_instruments(instr_vals,mkts_firms_prods,include_ones=True) + instr_vals = prods['trend'].to_numpy().reshape((-1,1)) + prods[['supply_instruments'+str(k) for k in [10,11] ] ] = create_blp_instruments(instr_vals,mkts_firms_prods,include_ones=0) prods['supply_instruments11'] = prods['mpd'] - # now, prepare agent data mean_incomes_t = otherdf3['meanly'].to_numpy() sd_incomes = 1.72 @@ -281,24 +287,3 @@ def create_blp_instruments(df, theseries): print('Agent data matches pyblp.') return prods,agent_data - - -def create_blp_instruments(X, product_markets, product_firms, product_id): - df = pd.DataFrame() - namesX = ['X'+str(i) for i in range(X.shape[1])] - df[namesX]=X - df['market_ids'] = product_markets - df['firm_ids'] = product_firms - df['car_ids'] = product_id - theseries = [df[name] for name in namesX] - thelist=[] - for theserie in theseries: - thelist.append ([theserie[(df['market_ids']==df['market_ids'][i]) & - (df['firm_ids']==df['firm_ids'][i]) & - (df['car_ids']!=df['car_ids'][i]) ].sum() for i,_ in df.iterrows() ]) - - for theserie in theseries: - thelist.append([theserie[(df['market_ids']==df['market_ids'][i]) & - (df['firm_ids']!=df['firm_ids'][i]) ].sum() for i,_ in df.iterrows() ]) - - return np.array(thelist).T \ No newline at end of file diff --git a/setup.py b/setup.py index b367129..99b697e 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="mec", - version="0.158", + version="0.159", authors=["Alfred Galichon"], author_email="ag133@nyu.edu", licence="",