Skip to content

Commit

Permalink
Adds 32bit support part2
Browse files Browse the repository at this point in the history
  • Loading branch information
PTNobel committed Jan 1, 2025
1 parent a54f000 commit 11d59bc
Showing 1 changed file with 37 additions and 13 deletions.
50 changes: 37 additions & 13 deletions utils/sherlock_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import time
import os
import pickle
import pandas as pd
import pgenlib as pg

Expand All @@ -23,7 +24,9 @@
cache_dir = "/scratch/groups/candes/parth"
df = pd.read_csv(os.path.join(data_dir, "phenotypes.QC.britishonly.csv"), index_col=0)
df = df.drop('ethnicity', axis=1)
covars_dense = df[['age', 'sex'] + [f'PC{i}' for i in range(1, 11)]]
covars_dense = np.array(
df[['age', 'age_squared', 'sex'] + [f'PC{i}' for i in range(1, 11)]].to_numpy(),
dtype=np.float32)
y = np.array(df['height'].to_numpy(), dtype=np.float32)

chromosomes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
Expand All @@ -35,7 +38,7 @@
ad.matrix.snp_unphased(
ad.io.snp_unphased(
os.path.join(cache_dir, f"EUR_subset_chr{chr}.snpdat"),
), n_threads=32, dtype=np.float32,
), n_threads=32, dtype=np.float32
)
for chr in chromosomes],
axis=1,
Expand All @@ -54,16 +57,37 @@
print(f'{X_train.shape=}')
print(f'{X_test.shape=}')

ti_solve = time.monotonic()
state = ad.grpnet(
X=X_train,
glm=ad.glm.gaussian(y_train),
early_exit=False,
min_ratio=1e-6,
n_threads=32,
)
tf_solve = time.monotonic()
model_cache = f'/scratch/groups/candes/parth/fit_model_{task_id}.pkl'

if os.path.exists(model_cache):
class fake_state:
def __init__(self):
with open(model_cache, 'rb') as fd:
d = pickle.load(fd)
self.betas = d['betas']
self.lmda_path = d['lmda_path']
self.intercepts = d['intercepts']
self.intercept = True
self.X = X_train
self.groups = np.arange(self.X.shape[1])
self.alpha = 1.0
state = fake_state()
ti_solve = 0
tf_solve = 0
else:
ti_solve = time.monotonic()
state = ad.grpnet(
X=X_train,
glm=ad.glm.gaussian(y_train, dtype=np.float32),
early_exit=False,
min_ratio=1e-6,
n_threads=32,
lmda_path_size=241,
)
tf_solve = time.monotonic()

with open(model_cache, 'wb') as fd:
pickle.dump({'betas': state.betas, 'lmda_path': state.lmda_path, 'intercepts': state.intercepts}, fd)

loss = torch.nn.MSELoss()
L = state.betas.shape[0]
Expand All @@ -76,7 +100,7 @@
ins[i] = loss(torch.from_numpy(y_hat_train[i]), torch.from_numpy(y_train))

ti_alo = time.monotonic()
ld, alo, ts, r2 = ai.get_alo_for_sweep(y_train, state, loss, 10)
ld, alo, ts, r2 = ai.get_alo_for_sweep_v2(y_train, state, loss, 80)
tf_alo = time.monotonic()

np.savez(sys.argv[1], lamda=ld, alo=alo, oos=oos, in_sample=ins, ts=ts, r2=r2, solve_time=tf_solve - ti_solve, alo_time=tf_alo - ti_alo)
#np.savez(sys.argv[1], alo_lamda=ld, full_lamda=state.lmda_path, alo=alo, oos=oos, in_sample=ins, ts=ts, r2=r2, solve_time=tf_solve - ti_solve, alo_time=tf_alo - ti_alo)

0 comments on commit 11d59bc

Please sign in to comment.