diff --git a/scripts/benchmark/result.py b/scripts/benchmark/result.py index 293cf98..95a536b 100644 --- a/scripts/benchmark/result.py +++ b/scripts/benchmark/result.py @@ -99,8 +99,15 @@ def plot_ax(ax, data, xlabel): palette = ["#5847AD", "#F7B449"] kws = dict(orient="v", errorbar="sd", palette=palette, width=0.6) sns.barplot(data=data, ax=ax, **kws) - data.melt().pipe((sns.scatterplot, "data"), y="value", x="variable", ax=ax, - zorder=100, color="grey", alpha=0.5) + data.melt().pipe( + (sns.scatterplot, "data"), + y="value", + x="variable", + ax=ax, + zorder=100, + color="grey", + alpha=0.5, + ) ax.get_xticklabels()[0].set_fontweight("bold") ax.tick_params(axis="x", rotation=45) ax.tick_params(bottom=False) @@ -140,7 +147,9 @@ def plot_ax(ax, data, xlabel): fk.install("Lato", verbose=False) data = pd.DataFrame({"Marsilea": marsilea_tokens, "Matplotlib": matplotlib_tokens}) - _, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(6, 5), gridspec_kw={"wspace": 0.5}) + _, (ax1, ax2, ax3) = plt.subplots( + ncols=3, figsize=(6, 5), gridspec_kw={"wspace": 0.5} + ) # sns.set(style="whitegrid") plot_ax(ax1, data, "Tokens") # Plot the number of lines of code using seaborn @@ -156,4 +165,4 @@ def plot_ax(ax, data, xlabel): root = Path(__file__).parent plt.savefig(root / "marsilea_vs_matplotlib.png", dpi=300, bbox_inches="tight") plt.savefig(root / "marsilea_vs_matplotlib.svg", bbox_inches="tight") - plt.show() \ No newline at end of file + plt.show() diff --git a/scripts/example_figures/arc-diagrams.py b/scripts/example_figures/arc-diagrams.py new file mode 100644 index 0000000..78dfada --- /dev/null +++ b/scripts/example_figures/arc-diagrams.py @@ -0,0 +1,164 @@ +""" +Les Miserables Arc Diagram +=============================== + +This example shows how to create an arc diagram from a network. + +""" + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import networkx as nx + +import marsilea as ma +import marsilea.plotter as mp + +# sphinx_gallery_start_ignore +import mpl_fontkit as fk + +fk.install("Lato", verbose=False) +# sphinx_gallery_end_ignore + +# %% +# Create Arc Diagram +# ------------------ + +data = pd.read_csv("data/PPI.csv") +G = nx.from_pandas_edgelist(data, "source", "target", create_using=nx.DiGraph) + +protein_classification = { + "MAP Kinases": [ + "MAP2K7", + "RPS6KA3", + "MAPK7", + "RPS6KB1", + "MAPKAPK5", + "RPS6KB2", + "JUN", + "RPS6KA1", + "MAPK10", + "MAPK1", + "MAPK14", + "MAPK3", + "MAP2K1", + "MAPK12", + "MAP2K4", + "MAPK9", + "MAPK8", + ], + "Kinases": [ + "CSNK2A1", + "GSK3B", + "CSNK1A1", + "PRKACA", + "SYK", + "JAK2", + "GSK3A", + "PRKCA", + "CDK1", + "ITK", + "ELK1", + "LYN", + "PRKCD", + "CDK4", + "PLK1", + "PAK1", + "CDK7", + "LCK", + "CDK5", + "MAPK8", + "GRK2", + "AURKB", + "PRKCQ", + "CDC25C", + "CHEK2", + "CDK7", + "TP53", + ], + "Transcription Factors": ["NFKBIA", "MEF2A", "ESR1", "CREB1", "NFATC4"], + "Tyrosine Kinases": [ + "HCLS1", + "FCGR2A", + "BTK", + "SYK", + "HCK", + "PTK2B", + "LCK", + "FYN", + "ZAP70", + ], + "Adaptor Proteins": ["IRS1", "CBL", "SHC1", "GRB2"], + "Ubiquitin Ligases": ["MDM2", "CBL"], + "Cell Structure/Signaling": [ + "STK11", + "FGFR1", + "CSK", + "ILK", + "CTTN", + "SNCA", + "KRT8", + ], + "Other": ["HNRNPK"], +} + + +colormap = { + "MAP Kinases": "#E2DFD0", + "Kinases": "#CA8787", + "Transcription Factors": "#E65C19", + "Tyrosine Kinases": "#F8D082", + "Adaptor Proteins": "#0A6847", + "Ubiquitin Ligases": "#7ABA78", + "Cell Structure/Signaling": "#03AED2", + "Other": ".7", +} + +# Reverse mapping to get labels for each protein +protein_labels = {} +for label, proteins in protein_classification.items(): + for protein in proteins: + protein_labels[protein] = label + +nodes = list(G.nodes) +nodes = pd.DataFrame( + {"nodes": nodes, "type": [protein_labels[n] for n in nodes]} +).sort_values("type")["nodes"] + +degs = nx.degree(G) +degree_arr = np.array([[degs[n] for n in nodes]]) +color_arr = np.array([[protein_labels[n] for n in nodes]]) + +edges = list(G.edges) +edges_colors = [colormap[protein_labels[a]] for a, _ in edges] + +sources = set([a for a, _ in edges]) +is_sources = np.array([["*" if n in sources else "" for n in nodes]]) + +wb = ma.SizedHeatmap( + degree_arr, + color_arr, + palette=colormap, + sizes=(10, 300), + frameon=False, + width=10.5, + height=0.3, + size_legend_kws={"func": lambda x: [int(i) for i in x], "title": "Count"}, + color_legend_kws={"title": "Protein Type"}, +) +wb.add_bottom(mp.Labels(nodes, align="bottom")) +wb.add_bottom(mp.Labels(is_sources, fontsize=16)) +arc = mp.Arc(nodes, edges, colors=edges_colors, lw=1, alpha=0.5) +wb.add_top(arc, size=2) +wb.add_legends(stack_size=1) +wb.render() + +# sphinx_gallery_start_ignore +if "__file__" in globals(): + from pathlib import Path + + save_path = Path(__file__).parent / "figures" + wb.save(save_path / "arc_diagram.svg") +else: + plt.show() +# sphinx_gallery_end_ignore diff --git a/scripts/example_figures/cooking_oils.py b/scripts/example_figures/cooking_oils.py new file mode 100644 index 0000000..e7827c6 --- /dev/null +++ b/scripts/example_figures/cooking_oils.py @@ -0,0 +1,122 @@ +""" +Fat content in cooking oils +=========================== + +This example shows how to apply x-layout on statistical plots. + +""" + +import marsilea as ma +import marsilea.plotter as mp + +import mpl_fontkit as fk + +fk.install_fontawesome(verbose=False) +fk.install("Lato", verbose=False) + +# sphinx_gallery_start_ignore +import matplotlib as mpl + +mpl.rcParams["font.size"] = 12 +# sphinx_gallery_end_ignore + + +# %% +# Load data +# --------- +oils = ma.load_data("cooking_oils") + +red = "#cd442a" +yellow = "#f0bd00" +green = "#7e9437" +gray = "#eee" + +mapper = {0: "\uf58a", 1: "\uf11a", 2: "\uf567"} +cmapper = {0: "#609966", 1: "#DC8449", 2: "#F16767"} +flavour = [mapper[i] for i in oils["flavour"].values] +flavour_colors = [cmapper[i] for i in oils["flavour"].values] +fat_content = oils[ + ["saturated", "polyunsaturated (omega 3 & 6)", "monounsaturated", "other fat"] +] + +# %% +# Visualize the oil contents +# -------------------------- + +fat_stack_bar = mp.StackBar( + fat_content.T * 100, + colors=[red, yellow, green, gray], + width=0.8, + orient="h", + label="Fat Content (%)", + legend_kws={"ncol": 2, "fontsize": 10}, +) +fmt = lambda x: f"{x:.1f}" if x > 0 else "" +trans_fat_bar = mp.Numbers( + oils["trans fat"] * 100, + fmt=fmt, + color="#3A98B9", + label="Trans Fat (%)", +) + +flavour_emoji = mp.Labels( + flavour, fontfamily="Font Awesome 6 Free", text_props={"color": flavour_colors} +) + +oil_names = mp.Labels(oils.index.str.capitalize()) + +fmt = lambda x: f"{int(x)}" if x > 0 else "" + +omege_bar = ma.plotter.CenterBar( + (oils[["omega 3", "omega 6"]] * 100).astype(int), + names=["Omega 3 (%)", "Omega 6 (%)"], + colors=["#7DB9B6", "#F5E9CF"], + fmt=fmt, + show_value=True, +) +conditions_text = [ + "Control", + ">230 °C\nDeep-frying", + "200-229 °C\nStir-frying", + "150-199 °C\nLight saute", + "<150 °C\nDressings", +] +colors = ["#e5e7eb", "#c2410c", "#fb923c", "#fca5a5", "#fecaca"] +conditions = ma.plotter.Chunk(conditions_text, colors, rotation=0, padding=10) + +cb = ma.ClusterBoard(fat_content.to_numpy(), height=10) +cb.add_layer(fat_stack_bar) +cb.add_left(trans_fat_bar, pad=0.2, name="trans fat") +cb.add_right(flavour_emoji) +cb.add_right(oil_names, pad=0.1) +cb.add_right(omege_bar, size=2, pad=0.2) + +order = [ + "Control", + ">230 °C (Deep-frying)", + "200-229 °C (Stir-frying)", + "150-199 °C (Light saute)", + "<150 °C (Dressings)", +] +cb.hsplit(labels=oils["cooking conditions"], order=order) +cb.add_left(conditions, pad=0.1) +cb.add_dendrogram( + "left", add_meta=False, colors=colors, linewidth=1.5, size=0.5, pad=0.02 +) +cb.add_title(top="Fat in Cooking Oils", fontsize=16) +cb.add_legends("bottom", pad=0.3) +cb.render() + +axes = cb.get_ax("trans fat") +for ax in axes: + ax.set_xlim(4.2, 0) + +# sphinx_gallery_start_ignore +if "__file__" in globals(): + from pathlib import Path + import matplotlib.pyplot as plt + + plt.rcParams["svg.fonttype"] = "none" + save_path = Path(__file__).parent / "imgs" + plt.savefig(save_path / "oil_well.svg", bbox_inches="tight") +# sphinx_gallery_end_ignore diff --git a/scripts/example_figures/mouse_map.py b/scripts/example_figures/mouse_map.py new file mode 100644 index 0000000..2e2799f --- /dev/null +++ b/scripts/example_figures/mouse_map.py @@ -0,0 +1,157 @@ +""" +Mouse Embryo Map +================ + +This example shows how to enhance a mouse embryo map. + +""" + +# %% +# Load dataset and prepare data +# ----------------------------- + +import numpy as np +from matplotlib.colors import LinearSegmentedColormap +from legendkit import cat_legend +import marsilea as ma +import matplotlib.pyplot as plt + +# sphinx_gallery_start_ignore +import mpl_fontkit as fk + +fk.install("Lato", verbose=False) +# sphinx_gallery_end_ignore + +embryo = ma.load_data("mouse_embryo") +# Rotate x, y by 90 degree +embryo["cell_x"], embryo["cell_y"] = embryo["cell_y"], embryo["cell_x"] + + +xmax = embryo["cell_x"].max() +ymax = embryo["cell_y"].max() +xstart, xend = -xmax * 0.05, xmax * 1.05 +ystart, yend = -ymax * 0.05, ymax * 1.05 + +xrange = np.linspace(xstart, xend, 200) +yrange = np.linspace(ystart, yend, 200) + +xmid = (xrange[1:] + xrange[:-1]) / 2 +ymid = (yrange[1:] + yrange[:-1]) / 2 + + +def get_xy_hist(ct): + x = embryo[embryo["cell_type"] == ct]["cell_x"].to_numpy() + y = embryo[embryo["cell_type"] == ct]["cell_y"].to_numpy() + xhist, _ = np.histogram(x, bins=xrange) + yhist, _ = np.histogram(y, bins=yrange) + return xhist, yhist + + +# %% +# Here we have a predefined colormap for each cell type. + +colormap = { + "Cavity": "#6d32e6", + "Brain": "#bf024f", + "Meninges": "#d147a3", + "Choroid plexus": "#b3a726", + "Cartilage primordium": "#103a14", + "Jaw and tooth": "#ef833a", + "Connective tissue": "#b38b5c", + "Epidermis": "#35586d", + "Lung primordium": "#3cb44b", + "Sympathetic nerve": "#dfdce0", + "Liver": "#bd3add", + "Mucosal epithelium": "#0bd3b1", + "GI tract": "#ff4374", + "Mesentery": "#b74c11", + "Dorsal root ganglion": "#036df4", + "Muscle": "#dd7936", + "Mesothelium": "#5c5ca6", + "Blood vessel": "#be9b72", + "Urogenital ridge": "#d3245a", + "Heart": "#03fff4", + "Pancreas": "#f062f9", + "Kidney": "#62cfe8", + "Ovary": "#c923b1", +} + +width = 4 +height = width * (yend - ystart) / (xend - xstart) +b = ma.WhiteBoard(height=height, width=width) + +cell_types = ["Brain", "GI tract", "Liver", "Ovary", "Pancreas", "Heart", "Kidney"] +for n in cell_types: + b.add_canvas("bottom", size=0.15, pad=0.03, name=f"{n}-x") + b.add_canvas("left", size=0.15, pad=0.03, name=f"{n}-y") +b.render() + +# Draw cell +ax = b.get_main_ax() +points = ax.scatter(embryo["cell_x"], embryo["cell_y"], s=1, c=embryo["colors"]) +points.set_rasterized(True) +ax.set_xlim(xstart, xend) +ax.set_ylim(ystart, yend) +# ax.set_title("Mouse Embryo E12.5") +ax.set_axis_off() + +colors = list(colormap.values()) +labels = list(colormap.keys()) +cat_legend( + colors=colors, + labels=labels, + ax=ax, + loc="out upper left", + ncols=4, + deviation=0, + fontsize=8, +) + +for n in cell_types: + xh, yh = get_xy_hist(n) + cmap = LinearSegmentedColormap.from_list(n, ["white", colormap[n]]) + x_ax = b.get_ax(f"{n}-x") + x_ax.pcolormesh(xh.reshape(1, -1), cmap=cmap) + x_ax.set_axis_off() + x_ax.text(1.05, 0.5, n, va="center", ha="left", transform=x_ax.transAxes) + x_ax.add_patch( + plt.Rectangle( + (0, 0), + 1, + 1, + edgecolor=".5", + facecolor="none", + lw=1, + transform=x_ax.transAxes, + ) + ) + + y_ax = b.get_ax(f"{n}-y") + y_ax.pcolormesh(yh.reshape(-1, 1), cmap=cmap) + y_ax.set_axis_off() + y_ax.text( + 0.5, -0.05, n, va="top", ha="center", rotation=90, transform=y_ax.transAxes + ) + y_ax.add_patch( + plt.Rectangle( + (0, 0), + 1, + 1, + edgecolor=".5", + facecolor="none", + lw=1, + transform=y_ax.transAxes, + ) + ) + + +# sphinx_gallery_ignore_start +if "__file__" in globals(): + from pathlib import Path + + plt.rcParams["svg.fonttype"] = "none" + save_path = Path(__file__).parent / "figures" + plt.savefig(save_path / "mouse_embryo.svg", bbox_inches="tight", dpi=300) +else: + plt.show() +# sphinx_gallery_ignore_end diff --git a/scripts/example_figures/msa.py b/scripts/example_figures/msa.py new file mode 100644 index 0000000..36127d2 --- /dev/null +++ b/scripts/example_figures/msa.py @@ -0,0 +1,148 @@ +""" +Sequence Alignment Plot +======================= +""" +from collections import Counter +import numpy as np +import pandas as pd + +import marsilea as ma +import matplotlib as mpl +import matplotlib.pyplot as plt + +# sphinx_gallery_start_ignore +import mpl_fontkit as fk + +fk.install("Roboto Mono", verbose=False) + +mpl.rcParams["font.size"] = 10 +# sphinx_gallery_end_ignore + +# %% +# Load data +# --------- +seq = ma.load_data("seq_align") +seq = seq.iloc[:, 135:155] + +species = [ + "PH4H_Homo_sapiens", + "PH4H_Mus_musculus", + # 'PH4H_Rattus_norvegicus', + # 'PH4H_Bos_taurus', + "PH4H_Chromobacterium_violaceum", + "PH4H_Ralstonia_solanacearum", + # 'PH4H_Caulobacter_crescentus', + "PH4H_Pseudomonas_aeruginosa", + "PH4H_Rhizobium_loti", +] +seq = seq.loc[species] + +# %% +# Calculate the height of each amino acid. +# See https://en.wikipedia.org/wiki/Sequence_logo + +collect = [] +for _, col in seq.items(): + collect.append(Counter(col)) + +hm = pd.DataFrame(collect) +del hm["-"] +hm = hm.T.fillna(0.0) +hm.columns = seq.columns +hm /= hm.sum(axis=0) + +n = hm.shape[1] +s = 20 +En = (1 / np.log(2)) * ((s - 1) / (2 * n)) + +heights = [] +for _, col in hm.items(): + H = -(np.log2(col) * col).sum() + R = np.log2(20) - (H + En) + heights.append(col * R) + +logo = pd.DataFrame(heights).T + +# %% +# Prepare color palette and data +# ------------------------------ + +color_encode = { + "A": "#f76ab4", + "C": "#ff7f00", + "D": "#e41a1c", + "E": "#e41a1c", + "F": "#84380b", + "G": "#f76ab4", + "H": "#3c58e5", + "I": "#12ab0d", + "K": "#3c58e5", + "L": "#12ab0d", + "M": "#12ab0d", + "N": "#972aa8", + "P": "#12ab0d", + "Q": "#972aa8", + "R": "#3c58e5", + "S": "#ff7f00", + "T": "#ff7f00", + "V": "#12ab0d", + "W": "#84380b", + "Y": "#84380b", + "-": "white", +} + +max_aa = [] +freq = [] + +for _, col in hm.items(): + ix = np.argmax(col) + max_aa.append(hm.index[ix]) + freq.append(col[ix]) + +position = [] +mock_ticks = [] +for i in seq.columns: + if int(i) % 10 == 0: + position.append(i) + mock_ticks.append("^") + else: + position.append("") + mock_ticks.append("") + +# %% +# Plot +# ---- + +height = 1.3 +width = height * seq.shape[1] / seq.shape[0] + +ch = ma.CatHeatmap(seq.to_numpy(), palette=color_encode, height=height, width=width) +seq_text = seq.to_numpy() +ch.add_layer(ma.plotter.TextMesh(seq_text, color="white")) +gap_text = seq_text.copy() +gap_text[np.where(gap_text != "-")] = " " +ch.add_layer(ma.plotter.TextMesh(gap_text, color="black")) +ch.add_top(ma.plotter.SeqLogo(logo, color_encode=color_encode), pad=0.05, size=0.5) +ch.add_right(ma.plotter.Labels(seq.index.str.slice(5)), pad=0.05) +ch.add_bottom(ma.plotter.Labels(mock_ticks, rotation=0)) +ch.add_bottom(ma.plotter.Labels(position, rotation=0)) +ch.add_bottom( + ma.plotter.Numbers(freq, width=0.9, color="#FFB11B", show_value=False), + name="freq_bar", + size=0.4, +) +ch.add_bottom(ma.plotter.Labels(max_aa, rotation=0), pad=0.05) +ch.render() + +ch.get_ax("freq_bar").set_axis_off() + +# sphinx_gallery_start_ignore +if "__file__" in globals(): + from pathlib import Path + + save_path = Path(__file__).parent / "figures" + mpl.rcParams["svg.fonttype"] = "none" + plt.savefig(save_path / "msa.svg", bbox_inches="tight") +else: + plt.show() +# sphinx_gallery_end_ignore diff --git a/scripts/example_figures/oncoprint.py b/scripts/example_figures/oncoprint.py new file mode 100644 index 0000000..0f07eea --- /dev/null +++ b/scripts/example_figures/oncoprint.py @@ -0,0 +1,143 @@ +""" +Breast cancer mutation with Oncoprinter +======================================= + +The Dataset is collected from cBioportal: +Breast Invasive Carcinoma (TCGA, PanCancer Atlas) + +""" +# sphinx_gallery_thumbnail_number = -1 +import matplotlib.pyplot as plt + +import marsilea as ma +import marsilea.plotter as mp +from oncoprinter import OncoPrint + +# sphinx_gallery_start_ignore +import mpl_fontkit as fk + +fk.install("Lato", verbose=False) +# sphinx_gallery_end_ignore + +# %% +# Load data + + +onco_data = ma.load_data("oncoprint") +cna = onco_data["cna"] +mrna_exp = onco_data["mrna_exp"] +methyl_exp = onco_data["methyl_exp"] +clinical = onco_data["clinical"] + +# %% +# Make mRNA expression +# -------------------- + +mrna_exp = mrna_exp.loc[["TP53", "PTEN"]] +h = ma.Heatmap( + mrna_exp, + cmap="gist_heat_r", + height=0.3, + width=5, + cbar_kws=dict( + orientation="horizontal", height=1, width=10, title="mRNA Expression" + ), +) +h.add_title(top="mRNA expression Z-SCORE", align="left", fontsize=10) +h.add_left(mp.Labels(mrna_exp.index), pad=0.1) +h.render() + +# %% +# Make Methylation expression +# --------------------------- + + +m = ma.Heatmap( + methyl_exp.astype(float), + height=0.3, + width=5, + cmap="summer_r", + cbar_kws=dict(orientation="horizontal", height=1, width=10, title="Methylation"), +) +m.add_title(top="Methylation", align="left", fontsize=10) +m.add_left(mp.Labels(["PTEN", "CDH13"]), pad=0.1) +m.render() + +# %% +# Create Oncoprint +# ---------------- +selected_genes = ["TP53", "PIK3CA", "GATA3", "TTN", "CDH1", "PTEN"] +cna = cna[~cna["track"].isin(["KMT2C", "TTN", "FLG"])] +# cna['sample'] = cna['sample'].str.slice(5, 12) + +op = OncoPrint( + cna, + height=3, + name="main", + add_samples_names=False, + legend_kws=dict(fontsize=8), + add_tracks_counts_size=0.4, + add_mut_counts_size=0.4, + add_tracks_counts_pad=0.05, +) +op.render() + +# %% +# Make clinical information +# ------------------------- + + +clinical = clinical[op.samples_order] +short_term = { + "Breast Invasive Ductal Carcinoma": "IDC", + "Breast Invasive Lobular Carcinoma": "ILC", + "Breast Invasive Carcinoma (NOS)": "BIC (NOS)", +} +tumor_type = clinical.loc["Cancer Type Detailed"].map(short_term) +tumor_palette = ["#DD5746", "#4793AF", "#FFC470"] +tumor_colors = mp.Colors( + tumor_type, + palette=tumor_palette, + legend_kws=dict(fontsize=8), + label="Tumor Type", + label_loc="right", +) + +mut_count = clinical.loc["Mutation Count"] +mut_number = mp.Numbers( + mut_count, show_value=False, color="orange", label="Mutations", label_loc="right" +) + +# %% +# Add clinical to the oncoprint + +op.add_top(tumor_colors, size=0.15, pad=0.1) +op.add_top(mut_number, name="mut", size=0.15, pad=0.1, legend=False) +op.render() + +# %% +# Append expression to the oncoprint + +op /= h +op /= m + +# %% +# Render + +op.set_margin(0.2) +op.add_legends(box_padding=2, stack_size=4) +op.render() + +op.get_ax("main", "mut").set_axis_off() + +# sphinx_gallery_start_ignore +if "__file__" in globals(): + from pathlib import Path + import matplotlib.pyplot as plt + + plt.rcParams["svg.fonttype"] = "none" + save_path = Path(__file__).parent / "figures" + plt.savefig(save_path / "oncoprint.svg", bbox_inches="tight") +else: + plt.show() +# sphinx_gallery_end_ignore diff --git a/scripts/example_figures/single-heatmap.py b/scripts/example_figures/single-heatmap.py new file mode 100644 index 0000000..9adfa64 --- /dev/null +++ b/scripts/example_figures/single-heatmap.py @@ -0,0 +1,101 @@ +""" +Visualizing Single-cell RNA-seq Data +==================================== + +""" +# %% +import matplotlib as mpl +import matplotlib.pyplot as plt +from matplotlib.colors import Normalize +import marsilea as ma +import marsilea.plotter as mp + +from sklearn.preprocessing import normalize + +# sphinx_gallery_start_ignore +import mpl_fontkit as fk + +fk.install("Lato", verbose=False) +# sphinx_gallery_end_ignore + +pbmc3k = ma.load_data("pbmc3k") +exp = pbmc3k["exp"] +pct_cells = pbmc3k["pct_cells"] +count = pbmc3k["count"] + +matrix = normalize(exp.to_numpy(), axis=0) + +cell_cat = [ + "Lymphoid", + "Myeloid", + "Lymphoid", + "Lymphoid", + "Lymphoid", + "Myeloid", + "Myeloid", + "Myeloid", +] +cell_names = [ + "CD4 T", + "CD14\nMonocytes", + "B", + "CD8 T", + "NK", + "FCGR3A\nMonocytes", + "Dendritic", + "Megakaryocytes", +] + +# Make plots +cells_proportion = mp.SizedMesh( + pct_cells, + size_norm=Normalize(vmin=0, vmax=100), + color="none", + edgecolor="#51829B", + linewidth=1.5, + sizes=(1, 600), + size_legend_kws=dict(title="% of cells", show_at=[0.3, 0.5, 0.8, 1]), +) +mark_high = mp.MarkerMesh(matrix > 0.7, color="#DB4D6D", label="High", size=70) +cell_count = mp.Numbers(count["Value"], color="#fac858", label="Cell Count") +cell_exp = mp.Violin( + exp, label="Expression", linewidth=0, color="#ee6666", density_norm="width" +) +cell_types = mp.Labels(cell_names, align="center") +gene_names = mp.Labels(exp.columns) + +# %% +# Group plots together +h = ma.Heatmap( + matrix, cmap="Greens", label="Normalized\nExpression", width=3.5, height=3.3 +) +h.add_layer(cells_proportion) +h.add_layer(mark_high) +h.add_right(cell_count, pad=0.1, size=0.7) +h.add_bottom(cell_exp, pad=0.1, size=0.75, name="exp") +h.add_left(cell_types) +h.add_top(gene_names, pad=0.05) + +h.hsplit(labels=cell_cat, order=["Lymphoid", "Myeloid"]) +h.add_left(mp.Chunk(["Lymphoid", "Myeloid"], ["#33A6B8", "#B481BB"]), pad=0.05) +h.add_dendrogram("left", colors=["#33A6B8", "#B481BB"]) +h.add_dendrogram("top") +h.add_legends("right", align_stacks="left", align_legends="left", pad=0.2) +h.set_margin(0.2) +h.render() + +# h.get_ax("exp").set_yscale("symlog") + + +# sphinx_gallery_start_ignore +if "__file__" in globals(): + from pathlib import Path + import matplotlib.pyplot as plt + + save_path = Path(__file__).parent / "figures" + mpl.rcParams["svg.fonttype"] = "none" + # mpl.rcParams["font.family"] = "Arial" + plt.savefig(save_path / "single-heatmap.svg", bbox_inches="tight") +else: + plt.show() +# sphinx_gallery_end_ignore diff --git a/scripts/example_figures/tracks.py b/scripts/example_figures/tracks.py new file mode 100644 index 0000000..57cb2ed --- /dev/null +++ b/scripts/example_figures/tracks.py @@ -0,0 +1,141 @@ +# %% +from pathlib import Path + +import matplotlib.pyplot as plt +import mpl_fontkit as fk +import numpy as np +import pandas as pd +import pyBigWig + +fk.install("Lato") + +import marsilea as ma +import marsilea.plotter as mp + +# Please downlaod data from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE137105 +bws = [i for i in Path("data/GSE137105_RAW/").glob("*.bw")] + +MYC_START = 127734550 +MYC_END = 127744631 + +pdata = [] +for bw in bws: + names = bw.stem.split("_") + bw = pyBigWig.open(str(bw)) + # Gene location of MYC + c = bw.intervals("chr8", MYC_START, MYC_END) + vs = np.array([i[2] for i in c]) + pdata.append({"cond": names[1], "enz": names[2], "track": vs}) + +pdata = pd.DataFrame(pdata) +pdata["enz"] = pdata["enz"].replace("INPUT", "Background") +pdata = pdata.sort_values(["enz", "cond"]).reset_index(drop=True) + + +# %% +def add_gene_structure(ax): + import re + from matplotlib.lines import Line2D + + myc_structure = pd.read_csv("data/MYC.GFF3", sep="\t", comment="#", header=None) + myc_structure = myc_structure[myc_structure[2] == "exon"] + + def extract_id(text): + return re.search(r"ID=exon-(.*?)-", text).group(1) + + myc_structure["id"] = myc_structure[8].apply(extract_id) + + # Skeleton + # _, ax = plt.subplots(figsize=(5, 1)) + ranges = myc_structure[[3, 4]] + rmin, rmax = np.min(ranges), np.max(ranges) + ske = Line2D([rmin, rmax], [0, 0], linewidth=1, color="k") + ax.add_artist(ske) + + colors = ["#AF8260", "#E4C59E"] + ix = 0 + for _, df in myc_structure.groupby("id"): + for _, row in df.iterrows(): + start, end = row[3], row[4] + e = Line2D([start, end], [0, 0], linewidth=10, color=colors[ix]) + ax.add_artist(e) + ix += 1 + + ax.set_xlim(MYC_START, MYC_END) + ax.set_ylim(-1, 1) + ax.set_axis_off() + + +# %% +colors = { + "H3K9me1": "#DD6E42", + "H3K9me2": "#E8DAB2", + "H3K9me3": "#4F6D7A", + "Background": "#C0D6DF", +} + +lims = { + "H3K9me1": 20, + "H3K9me2": 35, + "H3K9me3": 35, + "Background": 20, +} + +TRACK_HEIGHT = 0.5 +TRACK_PAD = 0.1 +myc_track = ma.ZeroHeight(4.5, name="myc") + +for _, row in pdata.iterrows(): + track = row["track"] + name = f"{row['cond']}{row['enz']}" + color = colors[row["enz"]] + myc_track.add_bottom( + mp.Area(track, color=color, add_outline=False, alpha=1), + size=TRACK_HEIGHT, + pad=TRACK_PAD, + name=name, + ) + +myc_track.add_canvas("bottom", name="gene", size=0.2, pad=0.1) +myc_track.add_title(bottom="MYC") + +cond_canvas = ma.ZeroHeight(0.8) +for cond in pdata["cond"]: + cond_canvas.add_bottom( + mp.Title(cond, align="left"), size=TRACK_HEIGHT, pad=TRACK_PAD + ) + +enz_canvas = ma.ZeroHeight(1, name="enz") +for enz in pdata["enz"].drop_duplicates(): + enz_canvas.add_bottom( + mp.Title(f"‎ ‎ {enz}", align="left"), + size=TRACK_HEIGHT * 2, + pad=TRACK_PAD * 2, + name=enz, + ) + +comp = myc_track + 0.1 + cond_canvas + enz_canvas +comp.render() + +# Add a line for enz +for enz in pdata["enz"].drop_duplicates(): + ax = comp.get_ax("enz", enz) + ax.axvline(x=0, color="k", lw=4) + +for _, row in pdata.iterrows(): + name = f"{row['cond']}{row['enz']}" + lim = lims[row["enz"]] + ax = comp.get_ax("myc", name) + ax.set_ylim(0, lim) + ax.set_yticks([lim]) + +# Add gene structure +ax = comp.get_ax("myc", "gene") +add_gene_structure(ax) + +if "__file__" in globals(): + save_path = Path(__file__).parent / "figures" + plt.rcParams["svg.fonttype"] = "none" + plt.savefig(save_path / "tracks.svg", bbox_inches="tight", facecolor="none") +else: + plt.show() diff --git a/scripts/example_figures/two-heatmap.py b/scripts/example_figures/two-heatmap.py new file mode 100644 index 0000000..eb4bf63 --- /dev/null +++ b/scripts/example_figures/two-heatmap.py @@ -0,0 +1,141 @@ +""" +Visualizing Single-cell Multi-Omics Data +======================================== + +""" + +# %% +import matplotlib as mpl +from matplotlib.ticker import FuncFormatter +import matplotlib.pyplot as plt + +# sphinx_gallery_start_ignore +import mpl_fontkit as fk + +fk.install("Lato", verbose=False) +# sphinx_gallery_end_ignore + +import marsilea as ma + +dataset = ma.load_data("sc_multiomics") + +fmt = FuncFormatter(lambda x, _: f"{x:.0%}") +lineage = ["B Cells", "T Cells", "Mono/DC", "Plasma"] +lineage_colors = ["#D83F31", "#EE9322", "#E9B824", "#219C90"] +m = dict(zip(lineage, lineage_colors)) +cluster_data = dataset["gene_exp_matrix"] +interaction = dataset["interaction"] +lineage_cells = dataset["lineage_cells"] +marker_names = dataset["marker_names"] +cells_count = dataset["cells_count"] +display_cells = dataset["display_cells"] + +# sphinx_gallery_start_ignore +lineage_short_mapper = { + "B Lymphocytes": "B Cells", + "T Lymphocytes": "T Cells", + "Monocytes/DCs": "Mono/DC", + "Plasma": "Plasma", +} +lineage_cells = [lineage_short_mapper[c] for c in lineage_cells] +# sphinx_gallery_end_ignore + +# %% +gene_profile = ma.SizedHeatmap( + dataset["gene_pct_matrix"], + color=dataset["gene_exp_matrix"], + height=3.3, + width=3.5, + cluster_data=cluster_data, + marker="x", + cmap="PuRd", + sizes=(1, 30), + color_legend_kws={"title": "RNA", "height": 8, "width": 1.5}, + size_legend_kws={ + "colors": "#e252a4", + "fmt": fmt, + "title": "% positive", + "labelspacing": 0.5, + }, +) +gene_profile.hsplit(labels=lineage_cells, order=lineage) +gene_profile.add_left(ma.plotter.Chunk(lineage, lineage_colors, padding=4)) +gene_profile.add_dendrogram( + "left", method="ward", colors=lineage_colors, meta_color=".5", linewidth=1.5 +) +gene_profile.add_bottom( + ma.plotter.Labels(marker_names, color="#392467", align="bottom", padding=4) +) +gene_profile.vsplit([13]) +gene_profile.add_bottom( + ma.plotter.Chunk( + ["Cluster of Differentiation", "Other Immune"], + ["#537188", "#CBB279"], + padding=4, + ) +) +gene_profile.add_right( + ma.plotter.Labels( + display_cells, + text_props={"color": [m[c] for c in lineage_cells]}, + align="center", + padding=10, + ) +) +gene_profile.add_title("Transcriptomics Profile") + +protein_profile = ma.SizedHeatmap( + dataset["protein_pct_matrix"], + color=dataset["protein_exp_matrix"], + sizes=(1, 30), + cluster_data=cluster_data, + marker="+", + cmap="YlOrBr", + height=3.3, + width=3.5, + color_legend_kws={"title": "Protein", "height": 8, "width": 1.5}, + size_legend_kws={ + "colors": "#de600c", + "fmt": fmt, + "title": "% positive", + "labelspacing": 0.5, + }, +) +protein_profile.hsplit(labels=lineage_cells, order=lineage) +protein_profile.add_bottom( + ma.plotter.Labels(marker_names, color="#E36414", align="bottom", padding=4) +) +protein_profile.add_dendrogram("left", method="ward", show=False) +score = interaction["STRING Score"] +protein_profile.add_bottom( + ma.plotter.Arc( + marker_names, + interaction[["N1", "N2"]].values, + # weights=score, + colors=interaction["Type"].map({"Homo": "#864AF9", "Hetero": "#FF9BD2"}), + labels=interaction["Type"], + width=1, + legend_kws={"title": "PPI Type"}, + ), + size=1, +) +protein_profile.add_right( + ma.plotter.Numbers(cells_count, color="#B80000"), size=0.7, pad=0.1 +) +protein_profile.add_title("Proteomics Profile") + +comb = gene_profile + protein_profile +comb.add_legends("top", stack_size=1, stack_by="column", align_stacks="top") +comb.render() + +# sphinx_gallery_start_ignore +if "__file__" in globals(): + from pathlib import Path + + save_path = Path(__file__).parent / "figures" + mpl.rcParams["svg.fonttype"] = "none" + # mpl.rcParams["font.family"] = "Arial" + plt.savefig(save_path / "two-heatmap.svg", bbox_inches="tight") +else: + plt.show() +# sphinx_gallery_end_ignore diff --git a/scripts/example_figures/upset.py b/scripts/example_figures/upset.py new file mode 100644 index 0000000..7580f4c --- /dev/null +++ b/scripts/example_figures/upset.py @@ -0,0 +1,56 @@ +""" +Upset plot of overlap genes in KEGG cancer pathway +""" +# %% +from pathlib import Path +import matplotlib.pyplot as plt +import mpl_fontkit as fk + +fk.install("Lato") +plt.rcParams["svg.fonttype"] = "none" + +import httpx +from marsilea.upset import UpsetData, Upset + +# KEGG hsa_id and the name +paths = { + "05210": "Colorectal cancer", + "05212": "Pancreatic cancer", + "05226": "Gastric cancer", + "05219": "Bladder cancer", + "05215": "Prostate cancer", + "05224": "Breast cancer", + # '05222': 'Small cell lung cancer', + # '05223': 'Non-small cell lung cancer' +} + +pathway_genes = {} + +for hsa_id, name in paths.items(): + r = httpx.get(f"https://rest.kegg.jp/link/hsa/hsa{hsa_id}") + genes = [i.split("\t")[1] for i in r.text.strip().split("\n")] + pathway_genes[paths[hsa_id]] = genes + +# %% +upset_data = UpsetData.from_sets( + pathway_genes, sets_names=[paths[k] for k in paths.keys()] +) +us = Upset( + upset_data, + radius=35, + linewidth=1, + min_cardinality=4, + size_scale=0.23, + add_intersections=False, + add_sets_size=False, +) +us.add_intersections("top", size=0.4, pad=0.1) +us.add_sets_size("left", size=0.4, pad=0.1) +us.highlight_subsets("Breast cancer", facecolor="#319DA0") +# us.save("scripts/example_figures/figures/upset.svg") + +if "__file__" in globals(): + us.save(Path(__file__).parent / "figures" / "upset.svg") +else: + us.render() + plt.show() diff --git a/scripts/example_figures/violin_swarm.py b/scripts/example_figures/violin_swarm.py new file mode 100644 index 0000000..00a4c06 --- /dev/null +++ b/scripts/example_figures/violin_swarm.py @@ -0,0 +1,71 @@ +# %% +import matplotlib.pyplot as plt +import seaborn as sns + +data = sns.load_dataset("penguins").dropna() +data = data.pivot(columns=["species", "island", "sex"], values="bill_length_mm") + +# %% + +penguin_colors = { + "Chinstrap": "#B462C4", + "Adelie": "#EE7A30", + "Gentoo": "#347074", +} + +island_colors = { + "Torgersen": "#0079FF", + "Biscoe": "#00DFA2", + "Dream": "#F6FA70", +} + +sex_colors = { + "Female": "#EF6262", + "Male": "#1D5B79", +} + +import marsilea as ma +import marsilea.plotter as mp + +species = data.columns.get_level_values(0) +uni_species = list(species.unique()) +species_colors = [penguin_colors[s] for s in uni_species] + +islands = data.columns.get_level_values(1) +sex = data.columns.get_level_values(2) + +wb = ma.ClusterBoard(data.fillna(0).to_numpy(), margin=0.2, height=3) +wb.add_layer( + mp.Violin( + data, linewidth=1, density_norm="width", group_kws=dict(color=species_colors) + ) +) +wb.add_layer(mp.Swarm(data, color="k", size=2)) +wb.add_title("Penguin Bill Lengths (mm)") + +wb.group_cols(species, order=uni_species) +wb.add_top(mp.Chunk(uni_species, species_colors)) +wb.add_bottom( + mp.Colors(islands, palette=island_colors, label="Island"), size=0.2, pad=0.1 +) +wb.add_bottom(mp.Colors(sex, palette=sex_colors, label="Sex"), size=0.2, pad=0.1) +wb.add_dendrogram( + "top", + method="ward", + colors=species_colors, + meta_ratio=0.25, + meta_color="#005874", + linewidth=1.5, +) +wb.add_legends() +wb.render() + + +if "__file__" in globals(): + from pathlib import Path + + plt.rcParams["svg.fonttype"] = "none" + save_path = Path(__file__).parent / "figures" + wb.save(save_path / "penguin_violin_swarm.svg") +else: + plt.show()