Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for pandas dataframes, multindex formatting #1046

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 9 additions & 18 deletions docs/Maths.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ Result:

Create a table with pandas [DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html):
```python
from fpdf import FPDF
from fpdf.adapters.table_pandas import FPDF_pandas
import pandas as pd

df = pd.DataFrame(
Expand All @@ -121,25 +121,16 @@ df = pd.DataFrame(
}
)

df = df.applymap(str) # Convert all data inside dataframe into string type

columns = [list(df)] # Get list of dataframe columns
rows = df.values.tolist() # Get list of dataframe rows
data = columns + rows # Combine columns and rows in one list

pdf = FPDF()
pdf = FPDF_pandas()
pdf.add_page()
pdf.set_font("Times", size=10)
with pdf.table(borders_layout="MINIMAL",
cell_fill_color=200, # grey
cell_fill_mode="ROWS",
line_height=pdf.font_size * 2.5,
text_align="CENTER",
width=160) as table:
for data_row in data:
row = table.row()
for datum in data_row:
row.cell(datum)
pdf.dataframe(df,
borders_layout="MINIMAL",
cell_fill_color=200, # grey
cell_fill_mode="ROWS",
line_height=pdf.font_size * 2.5,
text_align="CENTER",
width=160)
pdf.output("table_from_pandas.pdf")
```

Expand Down
2 changes: 2 additions & 0 deletions docs/Tables.md
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@ Result:

![](table_with_multiple_headings.png)

This also works with index columns. Pass any integer to the `num_index_columns` argument when calling `Table()` and that many columns will be formatted according to the `index_style` argument.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this only applies to the panda adapter, and whould probably be removed.

However, it would be nice to add a section about the panda adapter to this file, as I'm sure many fpdf2 users would be happy to find out about it while reading this page 🙂


## Table from pandas DataFrame

_cf._ [Maths documentation page](Maths.md#using-pandas)
Expand Down
38 changes: 38 additions & 0 deletions fpdf/adapters/table_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from pandas import MultiIndex
from fpdf import FPDF


class FPDF_pandas(FPDF):
Copy link
Member

@Lucas-C Lucas-C Oct 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that this adapter only adds a single method, I think we should provide a mixin instead, so that fpdf2 users can combine several mixins if they want to!

class PandasMixin:
    def dataframe(self, df, **kwargs):
        ...

And that would be how end-users make use of it:

from fpdf import FPDF
from fpdf.pandas import PandasMixin

class MyPDF(FPDF, PandasMixin):
    pass

pdf = MyPDF()
pdf.add_page()
pdf.set_font("Times", size=10)
pdf.dataframe(df, ...)

What do you think of this approach @afriedman412 🙂?

def __init__(self, **kwargs):
super().__init__(**kwargs)

def dataframe(self, df, **kwargs):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some docstring would be nice before merging this PR 🙂 (as well as an addition in CHANGELOG.md)

with self.table(
num_index_columns=df.index.nlevels,
num_heading_rows=df.columns.nlevels,
**kwargs
) as table:
TABLE_DATA = format_df(df)
for data_row in TABLE_DATA:
row = table.row()
for datum in data_row:
row.cell(datum)


def format_df(df, char: str = " ", convert_to_string: bool = True) -> list:
data = df.map(str).values.tolist()
if isinstance(df.columns, MultiIndex):
heading = [list(c) for c in zip(*df.columns)]
else:
heading = df.columns.values.reshape(1, len(df.columns)).tolist()

if isinstance(df.index, MultiIndex):
index = [list(c) for c in df.index]
else:
index = df.index.values.reshape(len(df), 1).tolist()
padding = [list(char) * df.index.nlevels] * df.columns.nlevels

output = [i + j for i, j in zip(padding + index, heading + data)]
if convert_to_string:
output = [[str(d) for d in row] for row in output]
return output
17 changes: 14 additions & 3 deletions fpdf/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .util import Padding

DEFAULT_HEADINGS_STYLE = FontFace(emphasis="BOLD")
DEFAULT_INDEX_STYLE = FontFace(emphasis="BOLD")


class Table:
Expand All @@ -32,6 +33,7 @@ def __init__(
gutter_height=0,
gutter_width=0,
headings_style=DEFAULT_HEADINGS_STYLE,
index_style=DEFAULT_INDEX_STYLE,
Copy link
Member

@Lucas-C Lucas-C Oct 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New parameters should always be added at the end of the parameters list, otherwise the code of existing fpdf2 users would break if they are currently passing parameters by value, up to line_height for example there:

align = "CENTER"
v_align = "MIDDLE",
borders_layout = TableBordersLayout.ALL
cell_fill_color = None
cell_fill_mode = TableCellFillMode.NONE
col_widths = None
first_row_as_headings = True
gutter_height = 0
gutter_width = 0
headings_style = DEFAULT_HEADINGS_STYLE
line_height = None

with pdf.table(align, v_align, borders_layout, cell_fill_color, cell_fill_mode, col_widths, first_row_as_headings, gutter_height, gutter_width, headings_style, line_height) as table:
    ...  # this code would break after merging this PR, because line_height would be passed to index_style

line_height=None,
markdown=False,
text_align="JUSTIFY",
Expand All @@ -40,6 +42,7 @@ def __init__(
padding=None,
outer_border_width=None,
num_heading_rows=1,
num_index_columns=0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that those 2 new parameters are only required for rendering pandas dataframes, I think they should not be added there but in PandasMixin, even if that means overriding some Table methods in PandasMixin 🙂

):
"""
Args:
Expand All @@ -58,6 +61,8 @@ def __init__(
gutter_width (float): optional horizontal space between columns
headings_style (fpdf.fonts.FontFace): optional, default to bold.
Defines the visual style of the top headings row: size, color, emphasis...
index_style (fpdf.fonts.FontFace): optional, default to bold.
Defines the visual style of the top headings row: size, color, emphasis...
line_height (number): optional. Defines how much vertical space a line of text will occupy
markdown (bool): optional, default to False. Enable markdown interpretation of cells textual content
text_align (str, fpdf.enums.Align, tuple): optional, default to JUSTIFY. Control text alignment inside cells.
Expand All @@ -72,6 +77,7 @@ def __init__(
num_heading_rows (number): optional. Sets the number of heading rows, default value is 1. If this value is not 1,
first_row_as_headings needs to be True if num_heading_rows>1 and False if num_heading_rows=0. For backwards compatibility,
first_row_as_headings is used in case num_heading_rows is 1.
num_index_cols (number): optional. Sets the number of index columns, default value is 0.
"""
self._fpdf = fpdf
self._align = align
Expand All @@ -85,12 +91,14 @@ def __init__(
self._gutter_height = gutter_height
self._gutter_width = gutter_width
self._headings_style = headings_style
self._index_style = index_style
self._line_height = 2 * fpdf.font_size if line_height is None else line_height
self._markdown = markdown
self._text_align = text_align
self._width = fpdf.epw if width is None else width
self._wrapmode = wrapmode
self._num_heading_rows = num_heading_rows
self.num_index_columns = num_index_columns
self._initial_style = None
self.rows = []

Expand Down Expand Up @@ -129,13 +137,16 @@ def __init__(
self.row(row)

def row(self, cells=(), style=None):
"Adds a row to the table. Yields a `Row` object."
"Adds a row to the table. Yields a `Row` object. Styles first `self.num_index_columns` cells with `self.index_style`"
if self._initial_style is None:
self._initial_style = self._fpdf.font_face()
row = Row(self, style=style)
self.rows.append(row)
for cell in cells:
row.cell(cell)
for n, cell in enumerate(cells):
if n < self.num_index_columns:
row.cell(cell, style=self._index_style)
else:
row.cell(cell)
return row

def render(self):
Expand Down
Binary file added test/table/table_pandas_multiheading.pdf
Binary file not shown.
Binary file added test/table/table_pandas_multiindex.pdf
Binary file not shown.
20 changes: 20 additions & 0 deletions test/table/test_table.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging
from pathlib import Path
import pandas as pd

import pytest

from fpdf import FPDF, FPDFException
from fpdf.adapters.table_pandas import FPDF_pandas
from fpdf.drawing import DeviceRGB
from fpdf.fonts import FontFace
from test.conftest import assert_pdf_equal, LOREM_IPSUM
Expand Down Expand Up @@ -37,6 +39,13 @@
("3", "4", "5", "6", "7", "8"),
)

MULTI_LABEL_TABLE_DATA = {
("tall", "fat"): {"color": "red", "number": 7, "happy": False},
("short", "fat"): {"color": "green", "number": 8, "happy": True},
("tall", "lean"): {"color": "blue", "number": 9, "happy": True},
("short", "lean"): {"color": "yellow", "number": 15, "happy": False},
}


def test_table_simple(tmp_path):
pdf = FPDF()
Expand Down Expand Up @@ -86,6 +95,17 @@ def test_table_with_syntactic_sugar(tmp_path):
table.row(TABLE_DATA[4])
assert_pdf_equal(pdf, HERE / "table_simple.pdf", tmp_path)

def test_pandas_multi_label(tmp_path):
for df, i in zip(
[pd.DataFrame(MULTI_LABEL_TABLE_DATA), pd.DataFrame(MULTI_LABEL_TABLE_DATA).T],
["heading", "index"],
):
pdf = FPDF_pandas()
pdf.add_page()
pdf.set_font("Times", size=10)
pdf.dataframe(df, borders_layout="MINIMAL", text_align="CENTER", width=160)
assert_pdf_equal(pdf, HERE / f"table_pandas_multi{i}.pdf", tmp_path)


def test_table_with_fixed_col_width(tmp_path):
pdf = FPDF()
Expand Down
Loading