Generating PDF Reports Dynamically

Manually assembling PDFs breaks the moment the data changes. Dynamic generation keeps the layout fixed and feeds fresh data in at render time — one script, many documents. This guide covers the two main Python stacks (ReportLab for canvas-level control, Jinja2+WeasyPrint for HTML-to-PDF), then the practical problems: headers/footers, multi-page tables, embedded charts, and Unicode fonts.

For broader context see Automating PDF Extraction & Generation. If you also need to assemble separately generated PDFs into one deliverable, the merge step is in Merging and Splitting PDF Documents.

Prerequisites

# System deps (WeasyPrint needs Cairo + Pango)
# Debian/Ubuntu: sudo apt install libcairo2 libpango-1.0-0 libpangocairo-1.0-0
# macOS: brew install cairo pango

pip install reportlab weasyprint jinja2 pandas matplotlib

Create a test data file:

mkdir -p reports data
python - <<'EOF'
import csv
rows = [
    {"customer":"Acme Corp","region":"North","revenue":82000,"costs":54000},
    {"customer":"Beta Ltd","region":"South","revenue":61000,"costs":41000},
    {"customer":"Gamma Inc","region":"East","revenue":74000,"costs":49000},
]
with open("data/sales.csv","w",newline="") as f:
    w = csv.DictWriter(f, fieldnames=rows[0].keys()); w.writeheader(); w.writerows(rows)
EOF

Step 1 — Inspect and normalise input data

Load and validate before passing to any renderer. Garbage in, garbage PDF out.

# pip install pandas
from pathlib import Path
import pandas as pd

DATA = Path("data/sales.csv")

def load_report_data(path: Path) -> list[dict]:
    try:
        df = pd.read_csv(path, encoding="utf-8")
        df.columns = df.columns.str.strip().str.lower()
        required = {"customer", "region", "revenue", "costs"}
        missing = required - set(df.columns)
        if missing:
            raise ValueError(f"Missing columns: {missing}")
        df["profit"] = df["revenue"] - df["costs"]
        df["margin"] = (df["profit"] / df["revenue"] * 100).round(1)
        return df.to_dict("records")
    except FileNotFoundError:
        raise SystemExit(f"Data file not found: {path}")
    except Exception as exc:
        raise SystemExit(f"Data load failed: {exc}")

rows = load_report_data(DATA)

Data from pandas-based pipelines drops straight into this loader — the same normalisation pattern applies.

The template-to-PDF flow

Template → Data → Render → Assemble flow Shows how raw data moves through normalisation into either a Jinja2+WeasyPrint HTML path or a ReportLab canvas path, then optionally into a merge/assemble step to produce the final PDF. Data Source CSV / DB / API Normalise pandas / dicts Jinja2 template → HTML → WeasyPrint ReportLab canvas platypus / drawString PDF output merge / split optional

Step 2 — Jinja2 + WeasyPrint: HTML-to-PDF path

Best when you want CSS layout, responsive tables, and page headers/footers via @page rules. Unicode fonts work out of the box if you declare them in @font-face.

# pip install weasyprint jinja2
from pathlib import Path
from jinja2 import Environment, BaseLoader
from weasyprint import HTML, CSS
import io

TEMPLATE_SRC = """<!DOCTYPE html>
<html><head><meta charset="utf-8">
<style>
@page {
  size: A4;
  margin: 20mm 15mm 25mm;
  @top-center { content: "Sales Report — {{ period }}"; font-size: 9pt; color: #475569; }
  @bottom-right { content: "Page " counter(page) " of " counter(pages); font-size: 8pt; color: #475569; }
}
body { font-family: sans-serif; font-size: 10pt; color: #0f172a; }
h1 { font-size: 18pt; margin-bottom: 4mm; }
table { width: 100%; border-collapse: collapse; margin-top: 6mm; }
th { background: #2563eb; color: #fff; padding: 5px 8px; text-align: left; font-size: 9pt; }
td { padding: 4px 8px; border-bottom: 1px solid #e2e8f0; font-size: 9pt; }
tr { page-break-inside: avoid; }
.right { text-align: right; }
.summary { margin-top: 8mm; font-weight: bold; }
</style></head>
<body>
<h1>Sales Performance</h1>
<p>Period: {{ period }}</p>
<table>
  <thead><tr>
    <th>Customer</th><th>Region</th>
    <th class="right">Revenue</th><th class="right">Costs</th>
    <th class="right">Profit</th><th class="right">Margin %</th>
  </tr></thead>
  <tbody>
  {% for r in rows %}
  <tr>
    <td>{{ r.customer }}</td><td>{{ r.region }}</td>
    <td class="right">${{ "{:,.0f}".format(r.revenue) }}</td>
    <td class="right">${{ "{:,.0f}".format(r.costs) }}</td>
    <td class="right">${{ "{:,.0f}".format(r.profit) }}</td>
    <td class="right">{{ r.margin }}%</td>
  </tr>
  {% endfor %}
  </tbody>
</table>
<p class="summary">Total revenue: ${{ "{:,.0f}".format(rows|sum(attribute="revenue")) }}</p>
</body></html>"""

def render_weasyprint(rows: list[dict], period: str, out: Path) -> None:
    env = Environment(loader=BaseLoader())
    tmpl = env.from_string(TEMPLATE_SRC)
    html_str = tmpl.render(rows=rows, period=period)
    try:
        HTML(string=html_str).write_pdf(str(out))
        print(f"Written: {out}")
    except Exception as exc:
        raise RuntimeError(f"WeasyPrint render failed: {exc}") from exc

render_weasyprint(rows, "Q3 2026", Path("reports/sales_weasyprint.pdf"))

Step 3 — ReportLab: canvas-level control

Use ReportLab when you need exact coordinate placement, vector graphics, or custom fonts for symbols like or . If you hit garbled boxes or a UnicodeEncodeError, see Fix ReportLab Unicode Font Errors — the short answer is to register a TrueType font with pdfmetrics.registerFont.

# pip install reportlab
from pathlib import Path
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import mm
from reportlab.lib import colors
from reportlab.platypus import (
    SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
)
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_RIGHT

OUT = Path("reports/sales_reportlab.pdf")

def render_reportlab(rows: list[dict], period: str, out: Path) -> None:
    out.parent.mkdir(parents=True, exist_ok=True)
    styles = getSampleStyleSheet()
    right_style = ParagraphStyle("right", parent=styles["Normal"], alignment=TA_RIGHT)

    doc = SimpleDocTemplate(
        str(out), pagesize=A4,
        leftMargin=15*mm, rightMargin=15*mm,
        topMargin=20*mm, bottomMargin=20*mm,
    )

    def _header_footer(canvas, doc):
        canvas.saveState()
        canvas.setFont("Helvetica", 8)
        canvas.setFillColor(colors.HexColor("#475569"))
        canvas.drawString(15*mm, A4[1] - 12*mm, f"Sales Report — {period}")
        canvas.drawRightString(A4[0] - 15*mm, 10*mm, f"Page {doc.page}")
        canvas.restoreState()

    header = [["Customer", "Region", "Revenue", "Costs", "Profit", "Margin %"]]
    data_rows = [
        [r["customer"], r["region"],
         f"${r['revenue']:,.0f}", f"${r['costs']:,.0f}",
         f"${r['profit']:,.0f}", f"{r['margin']}%"]
        for r in rows
    ]
    table_data = header + data_rows

    col_widths = [50*mm, 30*mm, 28*mm, 28*mm, 28*mm, 22*mm]
    tbl = Table(table_data, colWidths=col_widths, repeatRows=1)
    tbl.setStyle(TableStyle([
        ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#2563eb")),
        ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
        ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
        ("FONTSIZE", (0, 0), (-1, -1), 9),
        ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f6f8fb")]),
        ("GRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#e2e8f0")),
        ("ALIGN", (2, 0), (-1, -1), "RIGHT"),
        ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f6f8fb")]),
    ]))

    story = [
        Paragraph(f"Sales Performance — {period}", styles["h1"]),
        Spacer(1, 6*mm),
        tbl,
    ]
    try:
        doc.build(story, onFirstPage=_header_footer, onLaterPages=_header_footer)
        print(f"Written: {out}")
    except Exception as exc:
        raise RuntimeError(f"ReportLab build failed: {exc}") from exc

render_reportlab(rows, "Q3 2026", OUT)

Step 4 — Embed a Matplotlib chart

Charts go in as BytesIO images. In the ReportLab path use Image; in the WeasyPrint path encode as base64 and inject into an <img> tag.

# pip install reportlab matplotlib
import io
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from reportlab.platypus import Image

def bar_chart_image(rows: list[dict], width_pts: float = 400, height_pts: float = 180):
    """Return a ReportLab Image flowable containing a revenue bar chart."""
    labels = [r["customer"] for r in rows]
    revenue = [r["revenue"] for r in rows]
    costs = [r["costs"] for r in rows]

    fig, ax = plt.subplots(figsize=(width_pts / 72, height_pts / 72))
    x = range(len(labels))
    ax.bar([i - 0.2 for i in x], revenue, width=0.35, label="Revenue", color="#2563eb")
    ax.bar([i + 0.2 for i in x], costs, width=0.35, label="Costs", color="#dbeafe")
    ax.set_xticks(list(x))
    ax.set_xticklabels(labels, fontsize=8)
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda v, _: f"${v/1000:.0f}k"))
    ax.legend(fontsize=8)
    ax.spines[["top", "right"]].set_visible(False)
    plt.tight_layout()

    buf = io.BytesIO()
    fig.savefig(buf, format="png", dpi=150)
    plt.close(fig)
    buf.seek(0)
    return Image(buf, width=width_pts, height=height_pts)

Edge cases and variants

Dynamic pagination with ReportLab Platypus

SimpleDocTemplate handles page breaks automatically. For tables that span many pages, set repeatRows=1 on the Table to repeat the header row. If you need to split rows manually (e.g. to add a subtotal per page), subclass DocTemplate and override afterPage.

# pip install reportlab
from reportlab.platypus import SimpleDocTemplate, Table
from reportlab.lib.pagesizes import A4

# repeatRows=1 keeps the header on every page
tbl = Table(data, colWidths=col_widths, repeatRows=1)

Multi-section report with cover page

# pip install reportlab
from reportlab.platypus import SimpleDocTemplate, PageBreak, Paragraph
from reportlab.lib.styles import getSampleStyleSheet
from pathlib import Path

styles = getSampleStyleSheet()

def build_multi_section(sections: list[dict], out: Path) -> None:
    """sections = [{"title": str, "rows": [...]}]"""
    doc = SimpleDocTemplate(str(out))
    story = []
    for i, sec in enumerate(sections):
        if i > 0:
            story.append(PageBreak())
        story.append(Paragraph(sec["title"], styles["h1"]))
        # ... build table from sec["rows"]
    try:
        doc.build(story)
    except Exception as exc:
        raise RuntimeError(f"Build failed: {exc}") from exc

WeasyPrint with external CSS file

Keep CSS separate for maintainability:

# pip install weasyprint jinja2
from weasyprint import HTML, CSS
from pathlib import Path

html_str = "<html>...</html>"  # rendered Jinja2 output
css = CSS(filename=str(Path("templates/report.css")))
try:
    HTML(string=html_str).write_pdf("out.pdf", stylesheets=[css])
except Exception as exc:
    raise RuntimeError(f"Render failed: {exc}") from exc

Validation

# pip install pypdf
from pathlib import Path
from pypdf import PdfReader

def validate_pdf(path: Path, min_pages: int = 1) -> None:
    try:
        reader = PdfReader(str(path))
        assert len(reader.pages) >= min_pages, f"Expected >= {min_pages} pages, got {len(reader.pages)}"
        # Spot-check first page text is non-empty
        text = reader.pages[0].extract_text() or ""
        assert len(text.strip()) > 20, "First page appears empty"
        print(f"OK: {path.name} ({len(reader.pages)} pages)")
    except AssertionError as exc:
        raise RuntimeError(f"Validation failed for {path}: {exc}") from exc
    except Exception as exc:
        raise RuntimeError(f"Could not read {path}: {exc}") from exc

validate_pdf(Path("reports/sales_reportlab.pdf"))
validate_pdf(Path("reports/sales_weasyprint.pdf"))

Performance and scale notes

  • WeasyPrint builds an in-memory DOM before rendering. At >500 rows per page, memory climbs fast. Chunk data into page-sized batches and use Merging and Splitting PDF Documents to combine the pieces.
  • ReportLab Platypus streams flowables and handles large reports better. Still, avoid loading entire DataFrames into Table objects — pre-serialise to plain Python lists first.
  • For nightly batch runs, pre-render all Jinja2 templates to HTML strings in parallel (threads are fine — it is CPU-light), then render PDFs sequentially or in a process pool to avoid GIL contention on the rendering step.
  • Matplotlib figure creation is not thread-safe; use matplotlib.use("Agg") and create figures inside worker processes, not threads.

Troubleshooting

ErrorRoot causeFix
OSError: no library called "cairo-2" was foundWeasyPrint system dependency missingsudo apt install libcairo2 (Linux) or brew install cairo (macOS)
UnicodeEncodeError or garbled boxes in ReportLabDefault Helvetica core font lacks the glyphRegister a TrueType font — see Fix ReportLab Unicode Font Errors
Table rows split mid-cell in WeasyPrintMissing page-break-inside: avoid on <tr>Add tr { page-break-inside: avoid; } to the @page CSS block
LayoutError: Flowable ... too large in ReportLabA single flowable taller than the page frameSplit the flowable or reduce font size; for tables, chunk rows
Headers/footers bleed into content area@page margin smaller than header/footer heightIncrease topMargin/bottomMargin to accommodate the fixed elements

Complete working script

# pip install reportlab weasyprint jinja2 pandas matplotlib pypdf
"""gen_reports.py — generate a sales PDF report from a CSV, with chart.

Usage:
  python gen_reports.py --data data/sales.csv --out reports/sales.pdf --engine reportlab
  python gen_reports.py --data data/sales.csv --out reports/sales.pdf --engine weasyprint
"""
import argparse
import io
import sys
from pathlib import Path

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import pandas as pd
from jinja2 import Environment, BaseLoader
from weasyprint import HTML
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import mm
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, Image
from reportlab.lib.styles import getSampleStyleSheet
from pypdf import PdfReader

WEASYPRINT_TEMPLATE = """<!DOCTYPE html>
<html><head><meta charset="utf-8"><style>
@page { size: A4; margin: 20mm 15mm 25mm;
  @top-center { content: "{{ period }}"; font-size: 9pt; color: #475569; }
  @bottom-right { content: "Page " counter(page); font-size: 8pt; color: #475569; } }
body { font-family: sans-serif; font-size: 10pt; }
h1 { font-size: 16pt; }
table { width: 100%; border-collapse: collapse; }
th { background: #2563eb; color: #fff; padding: 4px 8px; }
td { padding: 4px 8px; border-bottom: 1px solid #e2e8f0; }
tr { page-break-inside: avoid; }
.right { text-align: right; }
</style></head><body>
<h1>Sales Performance — {{ period }}</h1>
<table><thead><tr>
  <th>Customer</th><th>Region</th><th class="right">Revenue</th>
  <th class="right">Costs</th><th class="right">Profit</th><th class="right">Margin %</th>
</tr></thead><tbody>
{% for r in rows %}
<tr><td>{{ r.customer }}</td><td>{{ r.region }}</td>
  <td class="right">${{ "{:,.0f}".format(r.revenue) }}</td>
  <td class="right">${{ "{:,.0f}".format(r.costs) }}</td>
  <td class="right">${{ "{:,.0f}".format(r.profit) }}</td>
  <td class="right">{{ r.margin }}%</td></tr>
{% endfor %}
</tbody></table></body></html>"""


def load_data(path: Path) -> list[dict]:
    df = pd.read_csv(path, encoding="utf-8")
    df.columns = df.columns.str.strip().str.lower()
    df["profit"] = df["revenue"] - df["costs"]
    df["margin"] = (df["profit"] / df["revenue"] * 100).round(1)
    return df.to_dict("records")


def make_chart(rows: list[dict]) -> io.BytesIO:
    labels = [r["customer"] for r in rows]
    fig, ax = plt.subplots(figsize=(5, 2.5))
    x = range(len(labels))
    ax.bar([i - 0.2 for i in x], [r["revenue"] for r in rows], 0.35,
           label="Revenue", color="#2563eb")
    ax.bar([i + 0.2 for i in x], [r["costs"] for r in rows], 0.35,
           label="Costs", color="#dbeafe")
    ax.set_xticks(list(x)); ax.set_xticklabels(labels, fontsize=8)
    ax.legend(fontsize=8); ax.spines[["top","right"]].set_visible(False)
    plt.tight_layout()
    buf = io.BytesIO(); fig.savefig(buf, format="png", dpi=150); plt.close(fig)
    buf.seek(0); return buf


def render_weasyprint(rows, period, out: Path) -> None:
    tmpl = Environment(loader=BaseLoader()).from_string(WEASYPRINT_TEMPLATE)
    HTML(string=tmpl.render(rows=rows, period=period)).write_pdf(str(out))


def render_reportlab(rows, period, out: Path) -> None:
    styles = getSampleStyleSheet()
    doc = SimpleDocTemplate(str(out), pagesize=A4,
                            leftMargin=15*mm, rightMargin=15*mm,
                            topMargin=20*mm, bottomMargin=20*mm)

    def _hf(canvas, doc):
        canvas.saveState()
        canvas.setFont("Helvetica", 8)
        canvas.setFillColor(colors.HexColor("#475569"))
        canvas.drawString(15*mm, A4[1]-12*mm, f"Sales Report — {period}")
        canvas.drawRightString(A4[0]-15*mm, 10*mm, f"Page {doc.page}")
        canvas.restoreState()

    header = [["Customer","Region","Revenue","Costs","Profit","Margin %"]]
    data = [[r["customer"], r["region"], f"${r['revenue']:,.0f}", f"${r['costs']:,.0f}",
             f"${r['profit']:,.0f}", f"{r['margin']}%"] for r in rows]
    tbl = Table(header+data, colWidths=[50*mm,30*mm,28*mm,28*mm,28*mm,22*mm], repeatRows=1)
    tbl.setStyle(TableStyle([
        ("BACKGROUND",(0,0),(-1,0),colors.HexColor("#2563eb")),
        ("TEXTCOLOR",(0,0),(-1,0),colors.white),
        ("FONTNAME",(0,0),(-1,0),"Helvetica-Bold"),
        ("FONTSIZE",(0,0),(-1,-1),9),
        ("GRID",(0,0),(-1,-1),0.5,colors.HexColor("#e2e8f0")),
        ("ALIGN",(2,0),(-1,-1),"RIGHT"),
    ]))
    chart_buf = make_chart(rows)
    story = [Paragraph(f"Sales Performance — {period}", styles["h1"]),
             Spacer(1, 4*mm), Image(chart_buf, width=360, height=180),
             Spacer(1, 4*mm), tbl]
    doc.build(story, onFirstPage=_hf, onLaterPages=_hf)


def validate(path: Path) -> None:
    r = PdfReader(str(path))
    assert len(r.pages) >= 1, "PDF has no pages"
    print(f"OK: {path.name} ({len(r.pages)} page(s))")


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--data", required=True)
    ap.add_argument("--out", required=True)
    ap.add_argument("--engine", choices=["reportlab","weasyprint"], default="reportlab")
    ap.add_argument("--period", default="Q3 2026")
    args = ap.parse_args()

    data_path = Path(args.data)
    out_path = Path(args.out)
    out_path.parent.mkdir(parents=True, exist_ok=True)

    try:
        rows = load_data(data_path)
    except Exception as exc:
        sys.exit(f"Data load error: {exc}")

    try:
        if args.engine == "weasyprint":
            render_weasyprint(rows, args.period, out_path)
        else:
            render_reportlab(rows, args.period, out_path)
        validate(out_path)
    except Exception as exc:
        sys.exit(f"Render error: {exc}")

if __name__ == "__main__":
    main()

Pages in this section

Part of Automating PDF Extraction & Generation.

Explore next