Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add mermaid output to show_graph #20631

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions crates/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ memchr = { workspace = true }
once_cell = { workspace = true }
pyo3 = { workspace = true, optional = true }
rayon = { workspace = true }
regex = { workspace = true }
tokio = { workspace = true, optional = true }

[dev-dependencies]
Expand Down Expand Up @@ -226,6 +227,7 @@ dynamic_group_by = [
ewma = ["polars-plan/ewma"]
ewma_by = ["polars-plan/ewma_by"]
dot_diagram = ["polars-plan/dot_diagram"]
mermaid_diagram = ["polars-plan/mermaid_diagram"]
diagonal_concat = []
unique_counts = ["polars-plan/unique_counts"]
log = ["polars-plan/log"]
Expand Down Expand Up @@ -382,6 +384,7 @@ features = [
"list_to_struct",
"log",
"merge_sorted",
"mermaid_diagram",
"meta",
"mode",
"moment",
Expand Down
5 changes: 4 additions & 1 deletion crates/polars-lazy/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,13 +200,16 @@
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
extern crate core;

#[cfg(feature = "dot_diagram")]
#[cfg(any(feature = "dot_diagram", feature = "mermaid_diagram"))]
mod dot;
pub mod dsl;
pub mod frame;
pub mod physical_plan;
pub mod prelude;

#[cfg(feature = "mermaid_diagram")]
mod mermaid;

mod scan;
#[cfg(test)]
mod tests;
48 changes: 48 additions & 0 deletions crates/polars-lazy/src/mermaid.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use polars_core::prelude::*;
use regex::Regex;

use crate::prelude::*;

impl LazyFrame {
pub fn to_mermaid(&self, optimized: bool) -> PolarsResult<String> {
// Mermaid strings are very similar to dot strings, so
// we can reuse the dot implementation.
let dot = self.to_dot(optimized)?;

let edge_regex = Regex::new(r"(?P<node1>\w+) -- (?P<node2>\w+)").unwrap();
let node_regex = Regex::new(r#"(?P<node>\w+)(\s+)?\[label="(?P<label>.*)"]"#).unwrap();

let nodes = node_regex.captures_iter(&dot);
let edges = edge_regex.captures_iter(&dot);

let node_lines = nodes
.map(|node| {
format!(
"\t{}[\"{}\"]",
node.name("node").unwrap().as_str(),
node.name("label")
.unwrap()
.as_str()
.replace(r"\n", "\n") // replace escaped newlines
.replace(r#"\""#, "#quot;") // replace escaped quotes
)
})
.collect::<Vec<_>>()
.join("\n");

let edge_lines = edges
.map(|edge| {
format!(
"\t{} --- {}",
edge.name("node1").unwrap().as_str(),
edge.name("node2").unwrap().as_str()
)
})
.collect::<Vec<_>>()
.join("\n");

let mermaid = format!("graph TD\n{node_lines}\n{edge_lines}");

Ok(mermaid)
}
}
2 changes: 2 additions & 0 deletions crates/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ dynamic_group_by = ["polars-core/dynamic_group_by"]
ewma = ["polars-ops/ewma"]
ewma_by = ["polars-ops/ewma_by"]
dot_diagram = []
mermaid_diagram = []
unique_counts = ["polars-ops/unique_counts"]
log = ["polars-ops/log"]
chunked_ids = []
Expand Down Expand Up @@ -291,6 +292,7 @@ features = [
"concat_str",
"coalesce",
"dot_diagram",
"mermaid_diagram",
"trigonometry",
"streaming",
"true_div",
Expand Down
1 change: 1 addition & 0 deletions crates/polars-python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ features = [
"array_arithmetic",
"array_to_struct",
"log",
"mermaid_diagram",
"mode",
"moment",
"ndarray",
Expand Down
5 changes: 5 additions & 0 deletions crates/polars-python/src/lazyframe/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,11 @@ impl PyLazyFrame {
Ok(result)
}

fn to_mermaid(&self, optimized: bool) -> PyResult<String> {
let result = self.ldf.to_mermaid(optimized).map_err(PyPolarsErr::from)?;
Ok(result)
}

fn optimization_toggle(
&self,
type_coercion: bool,
Expand Down
2 changes: 2 additions & 0 deletions crates/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ array_arithmetic = ["polars-core/array_arithmetic", "dtype-array"]
array_to_struct = ["polars-ops/array_to_struct", "polars-lazy?/array_to_struct"]
log = ["polars-ops/log", "polars-lazy?/log"]
merge_sorted = ["polars-lazy?/merge_sorted"]
mermaid_diagram = ["polars-lazy?/mermaid_diagram"]
meta = ["polars-lazy?/meta"]
mode = ["polars-ops/mode", "polars-lazy?/mode"]
moment = ["polars-ops/moment", "polars-lazy?/moment"]
Expand Down Expand Up @@ -415,6 +416,7 @@ docs-selection = [
"diagonal_concat",
"abs",
"dot_diagram",
"mermaid_diagram",
"string_encoding",
"product",
"to_dummies",
Expand Down
1 change: 1 addition & 0 deletions crates/polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@
//! * `lazy` - Lazy API
//! - `regex` - Use regexes in [column selection]
//! - `dot_diagram` - Create dot diagrams from lazy logical plans.
//! - `mermaid` - Create mermaid diagrams from lazy logical plans.
//! * `sql` - Pass SQL queries to Polars.
//! * `streaming` - Process datasets larger than RAM.
//! * `random` - Generate arrays with randomly sampled values
Expand Down
1 change: 1 addition & 0 deletions docs/source/user-guide/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ The opt-in features are:
- `lazy` - Lazy API:
- `regex` - Use regexes in column selection.
- `dot_diagram` - Create dot diagrams from lazy logical plans.
- `mermaid_diagram` - Create mermaid diagrams from lazy logical plans.
- `sql` - Pass SQL queries to Polars.
- `streaming` - Be able to process datasets that are larger than RAM.
- `random` - Generate arrays with randomly sampled values
Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
TransferEncoding: TypeAlias = Literal["hex", "base64"]
WindowMappingStrategy: TypeAlias = Literal["group_to_rows", "join", "explode"]
ExplainFormat: TypeAlias = Literal["plain", "tree"]
ShowGraphFormat: TypeAlias = Literal["dot", "mermaid"]

# type signature for allowed frame init
FrameInitTypes: TypeAlias = Union[
Expand Down
52 changes: 52 additions & 0 deletions py-polars/polars/_utils/various.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,3 +702,55 @@ def display_dot_graph(
plt.imshow(img)
plt.show()
return None


def display_mermaid_graph(
*,
mermaid: str,
show: bool = True,
output_path: str | Path | None = None,
raw_output: bool = False,
) -> str | None:
if raw_output:
return mermaid

# Make font monospace
mermaid += r'%%{init: {"fontFamily": "monospace"}}%%' + "\n"

if output_path:
output_path = Path(output_path)
try:
subprocess.check_output(
[
"mmdc",
"--output",
output_path,
"--outputFormat",
output_path.suffix[1:],
"--input",
"-",
],
input=f"{mermaid}".encode(),
)

except (ImportError, FileNotFoundError):
msg = (
"The mermaid-cli `mmdc` binary should be on your PATH. "
"(If not installed you can download here: https://github.com/mermaid-js/mermaid-cli)"
)

raise ImportError(msg) from None
except subprocess.CalledProcessError as e:
msg = "Error while calling `mmdc` ensure that your file extension is one of `mmdc`'s supported output types."
raise ValueError(msg) from e

if not show:
return None

if not _in_notebook():
msg = "This function is only available in Jupyter notebooks."
raise OSError(msg)

from IPython.display import Markdown, display

return display(Markdown(f"```mermaid\n{mermaid}\n```"))
54 changes: 44 additions & 10 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from polars._utils.various import (
_is_generator,
display_dot_graph,
display_mermaid_graph,
extend_bool,
find_stacklevel,
is_bool_sequence,
Expand Down Expand Up @@ -118,6 +119,7 @@
SchemaDefinition,
SchemaDict,
SerializationFormat,
ShowGraphFormat,
StartBy,
UniqueKeepStrategy,
)
Expand Down Expand Up @@ -1130,6 +1132,7 @@ def show_graph(
show: bool = True,
output_path: str | Path | None = None,
raw_output: bool = False,
format: ShowGraphFormat | None = None,
figsize: tuple[float, float] = (16.0, 12.0),
type_coercion: bool = True,
_type_check: bool = True,
Expand All @@ -1147,8 +1150,11 @@ def show_graph(
"""
Show a plot of the query plan.

Note that Graphviz must be installed to render the visualization (if not
already present, you can download it here: `<https://graphviz.org/download>`_).
Note that Graphviz or mermaid must be installed to export the visualization
or show it outside of a notebook
(if not already present, you can download graphviz here: `<https://graphviz.org/download>`_
or mermaid here: `<https://github.com/mermaid-js/mermaid-cli>`_).


Parameters
----------
Expand All @@ -1160,6 +1166,8 @@ def show_graph(
Write the figure to disk.
raw_output
Return dot syntax. This cannot be combined with `show` and/or `output_path`.
format
The format of the graph output.
figsize
Passed to matplotlib if `show == True`.
type_coercion
Expand Down Expand Up @@ -1214,14 +1222,40 @@ def show_graph(
new_streaming=False,
)

dot = _ldf.to_dot(optimized)
return display_dot_graph(
dot=dot,
show=show,
output_path=output_path,
raw_output=raw_output,
figsize=figsize,
)
if raw_output:
if format is None or format == "dot":
return _ldf.to_dot(optimized)
elif format == "mermaid":
return _ldf.to_mermaid(optimized)
else:
msg = f'format must be one of ["dot", "mermaid"] but got {format}'
raise ValueError(msg)

if format is None or format == "dot":
try:
dot = _ldf.to_dot(optimized)
return display_dot_graph(
dot=dot,
show=show,
output_path=output_path,
figsize=figsize,
)
except (ImportError, FileNotFoundError) as e:
missing_graphviz_error = e

# if the user specified "dot" raise immediately
if format == "dot":
raise missing_graphviz_error from None

# If we reach this point, we failed to display the graph using Graphviz
# we should try as mermaid instead
try:
mermaid = _ldf.to_mermaid(optimized)
return display_mermaid_graph(
mermaid=mermaid, show=show, output_path=output_path
)
except OSError:
raise missing_graphviz_error from None

def inspect(self, fmt: str = "{}") -> LazyFrame:
"""
Expand Down
Loading