Skip to content

Commit

Permalink
fix path concatenations (#26)
Browse files Browse the repository at this point in the history
* fix path concatenations

* refactor

* update notebook

* update notebook

* update notebook

* update notebook
  • Loading branch information
safiyecelik authored Nov 21, 2023
1 parent 6083660 commit c7d84c4
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 22 deletions.
9 changes: 5 additions & 4 deletions efaar_benchmarking/data_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def load_cpg16_crispr(data_path: str = "data/") -> tuple[pd.DataFrame, pd.DataFr
well_file_path = os.path.join(data_path, well_file_name)
crispr_file_path = os.path.join(data_path, crispr_file_name)
if not (os.path.exists(plate_file_path) and os.path.exists(well_file_path) and os.path.exists(crispr_file_path)):
path_to_zip_file = data_path + "tmp.zip"
path_to_zip_file = os.path.join(data_path, "tmp.zip")
wget.download(metadata_source_path, path_to_zip_file)
with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
for name in zip_ref.namelist():
Expand Down Expand Up @@ -118,9 +118,10 @@ def load_replogle(gene_type: str, data_type: str, data_path: str = "data/") -> s
else:
raise ValueError("gene_type must be either essential or genome_wide")

if not os.path.exists(data_path + filename):
wget.download(src, data_path + filename)
fn = os.path.join(data_path, filename)
if not os.path.exists(fn):
wget.download(src, fn)

adata = sc.read_h5ad(data_path + filename)
adata = sc.read_h5ad(fn)
adata = adata[:, np.all(~np.isnan(adata.X) & ~np.isinf(adata.X), axis=0)]
return adata
38 changes: 26 additions & 12 deletions efaar_benchmarking/efaar.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def embed_by_scvi_anndata(adata, batch_col=cst.REPLOGLE_BATCH_COL, n_latent=128,
Returns:
numpy.ndarray: Embedding of the input data using scVI.
"""
adata = adata.copy()
SCVI.setup_anndata(adata, batch_key=batch_col)
vae = SCVI(adata, n_hidden=n_hidden, n_latent=n_latent)
vae.train(use_gpu=True)
Expand All @@ -47,6 +48,30 @@ def embed_by_pca_anndata(adata, n_latent=100) -> np.ndarray:
return adata.obsm["X_pca"]


def centerscale(features: np.ndarray, metadata: pd.DataFrame = None, plate_col: Optional[str] = None) -> np.ndarray:
"""
Center and scale the input features.
Args:
features (np.ndarray): Input features to be centered and scaled.
metadata (pd.DataFrame): Metadata information for the input features.
plate_col (str): Name of the column in metadata that contains plate information.
Returns:
np.ndarray: Centered and scaled features.
"""
if plate_col is None:
features = StandardScaler().fit_transform(features)
else:
if metadata is None:
raise ValueError("metadata must be provided if plate_col is not None")
unq_plates = metadata[plate_col].unique()
for plate in unq_plates:
ind = metadata[plate_col] == plate
features[ind, :] = StandardScaler().fit_transform(features[ind, :])
return features


def embed_align_by_pca(
features: np.ndarray,
metadata: pd.DataFrame = None,
Expand All @@ -58,6 +83,7 @@ def embed_align_by_pca(
Note that we explicitly center & scale the data by plate before and after calling `PCA`.
Centering and scaling is done by plate if `plate_col` is not None, and on the whole data otherwise.
Note that `PCA` transformer also does mean-centering on the whole data prior to the PCA operation.
Args:
features (np.ndarray): Features to transform
metadata (pd.DataFrame): Metadata. Defaults to None.
Expand All @@ -69,18 +95,6 @@ def embed_align_by_pca(
np.ndarray: Transformed data using PCA.
"""

def centerscale(features, metadata, plate_col):
if plate_col is None:
features = StandardScaler().fit_transform(features)
else:
if metadata is None:
raise ValueError("metadata must be provided if plate_col is not None")
unq_plates = metadata[plate_col].unique()
for plate in unq_plates:
ind = metadata[plate_col] == plate
features[ind, :] = StandardScaler().fit_transform(features[ind, :])
return features

features = centerscale(features, metadata, plate_col)
features = PCA(variance_or_ncomp).fit_transform(features)
features = centerscale(features, metadata, plate_col)
Expand Down
23 changes: 17 additions & 6 deletions notebooks/replogle_map_building.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,19 @@
"cell_type": "code",
"execution_count": null,
"id": "3e9e8847",
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"adata = load_replogle(\"genome_wide\", \"normalized\")\n",
"metadata = adata.obs\n",
"print(metadata.shape)\n",
"embeddings = embed_by_pca_anndata(adata)\n",
"print(embeddings.shape)\n",
"del adata\n",
"embeddings = align_on_controls(embeddings, metadata)\n",
"print(embeddings.shape)\n",
"map_data = aggregate(embeddings, metadata)\n",
"del embeddings, metadata\n",
"metrics = benchmark(map_data, recall_thr_pairs=recall_threshold_pairs)\n",
Expand All @@ -74,6 +79,7 @@
},
{
"cell_type": "markdown",
"id": "2bd66d1d",
"metadata": {},
"source": [
"## scVI Embeddings"
Expand All @@ -86,21 +92,26 @@
"metadata": {},
"outputs": [],
"source": [
"adata = load_replogle(\"essential\", \"raw\")\n",
"adata = load_replogle(\"genome_wide\", \"raw\")\n",
"metadata = adata.obs\n",
"print(metadata.shape)\n",
"embeddings = embed_by_scvi_anndata(adata)\n",
"print(embeddings.shape)\n",
"del adata\n",
"embeddings = align_on_controls(embeddings, metadata)\n",
"map_data = aggregate(embeddings, metadata)\n",
"del embeddings, metadata\n",
"metrics = benchmark(map_data, recall_thr_pairs=recall_threshold_pairs)\n",
"plot_recall(metrics)"
"plot_recall(metrics)\n",
"metrics.groupby('source')['recall_0.05_0.95'].mean()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "eben",
"language": "python",
"name": "python3"
"name": "eben"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -112,7 +123,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.10.4"
}
},
"nbformat": 4,
Expand Down

0 comments on commit c7d84c4

Please sign in to comment.