Skip to content

Commit

Permalink
better selection of relevant enhancers
Browse files Browse the repository at this point in the history
  • Loading branch information
simonvh committed Dec 4, 2020
1 parent a97ab31 commit 0283d28
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions scepia/sc.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,14 +468,22 @@ def infer_motifs(
enhancer_df = enhancer_df.loc[var_enhancers, adata.uns["scepia"]["cell_types"]]
enhancer_df = enhancer_df.groupby(enhancer_df.columns, axis=1).mean()
enhancer_df.loc[:, :] = scale(enhancer_df)
# Select top most variable enhancers

main_cell_types = pd.concat(
(
adata.obs["cluster_annotation"].astype(str),
adata.obs["cell_annotation"].astype(str),
)
)
main_cell_types = [x for x in main_cell_types.unique() if x != "other"]

# Select top most variable enhancers of the most important annotated cell types
enhancer_df = enhancer_df.loc[
enhancer_df.var(1).sort_values().tail(num_enhancers).index
enhancer_df[main_cell_types].var(1).sort_values().tail(num_enhancers).index
]
# Center by mean of the most import cell types
# Here we chose the majority cell type per cluster
cluster_cell_types = adata.obs["cluster_annotation"].unique()
mean_value = enhancer_df[cluster_cell_types].mean(1)
mean_value = enhancer_df[main_cell_types].mean(1)
enhancer_df = enhancer_df.sub(mean_value, axis=0)
fname = NamedTemporaryFile(delete=False).name
enhancer_df.to_csv(fname, sep="\t")
Expand Down

0 comments on commit 0283d28

Please sign in to comment.