Generating PDF Reports Dynamically
Manually assembling PDFs breaks the moment the data changes. Dynamic generation keeps the layout fixed and feeds fresh data in at render time — one script, many documents. This guide covers the two main Python stacks (ReportLab for canvas-level control, Jinja2+WeasyPrint for HTML-to-PDF), then the practical problems: headers/footers, multi-page tables, embedded charts, and Unicode fonts.
For broader context see Automating PDF Extraction & Generation. If you also need to assemble separately generated PDFs into one deliverable, the merge step is in Merging and Splitting PDF Documents.
Prerequisites
# System deps (WeasyPrint needs Cairo + Pango)
# Debian/Ubuntu: sudo apt install libcairo2 libpango-1.0-0 libpangocairo-1.0-0
# macOS: brew install cairo pango
pip install reportlab weasyprint jinja2 pandas matplotlib
Create a test data file:
mkdir -p reports data
python - <<'EOF'
import csv
rows = [
{"customer":"Acme Corp","region":"North","revenue":82000,"costs":54000},
{"customer":"Beta Ltd","region":"South","revenue":61000,"costs":41000},
{"customer":"Gamma Inc","region":"East","revenue":74000,"costs":49000},
]
with open("data/sales.csv","w",newline="") as f:
w = csv.DictWriter(f, fieldnames=rows[0].keys()); w.writeheader(); w.writerows(rows)
EOF
Step 1 — Inspect and normalise input data
Load and validate before passing to any renderer. Garbage in, garbage PDF out.
# pip install pandas
from pathlib import Path
import pandas as pd
DATA = Path("data/sales.csv")
def load_report_data(path: Path) -> list[dict]:
try:
df = pd.read_csv(path, encoding="utf-8")
df.columns = df.columns.str.strip().str.lower()
required = {"customer", "region", "revenue", "costs"}
missing = required - set(df.columns)
if missing:
raise ValueError(f"Missing columns: {missing}")
df["profit"] = df["revenue"] - df["costs"]
df["margin"] = (df["profit"] / df["revenue"] * 100).round(1)
return df.to_dict("records")
except FileNotFoundError:
raise SystemExit(f"Data file not found: {path}")
except Exception as exc:
raise SystemExit(f"Data load failed: {exc}")
rows = load_report_data(DATA)
Data from pandas-based pipelines drops straight into this loader — the same normalisation pattern applies.
The template-to-PDF flow
Step 2 — Jinja2 + WeasyPrint: HTML-to-PDF path
Best when you want CSS layout, responsive tables, and page headers/footers via @page rules. Unicode fonts work out of the box if you declare them in @font-face.
# pip install weasyprint jinja2
from pathlib import Path
from jinja2 import Environment, BaseLoader
from weasyprint import HTML, CSS
import io
TEMPLATE_SRC = """<!DOCTYPE html>
<html><head><meta charset="utf-8">
<style>
@page {
size: A4;
margin: 20mm 15mm 25mm;
@top-center { content: "Sales Report — {{ period }}"; font-size: 9pt; color: #475569; }
@bottom-right { content: "Page " counter(page) " of " counter(pages); font-size: 8pt; color: #475569; }
}
body { font-family: sans-serif; font-size: 10pt; color: #0f172a; }
h1 { font-size: 18pt; margin-bottom: 4mm; }
table { width: 100%; border-collapse: collapse; margin-top: 6mm; }
th { background: #2563eb; color: #fff; padding: 5px 8px; text-align: left; font-size: 9pt; }
td { padding: 4px 8px; border-bottom: 1px solid #e2e8f0; font-size: 9pt; }
tr { page-break-inside: avoid; }
.right { text-align: right; }
.summary { margin-top: 8mm; font-weight: bold; }
</style></head>
<body>
<h1>Sales Performance</h1>
<p>Period: {{ period }}</p>
<table>
<thead><tr>
<th>Customer</th><th>Region</th>
<th class="right">Revenue</th><th class="right">Costs</th>
<th class="right">Profit</th><th class="right">Margin %</th>
</tr></thead>
<tbody>
{% for r in rows %}
<tr>
<td>{{ r.customer }}</td><td>{{ r.region }}</td>
<td class="right">${{ "{:,.0f}".format(r.revenue) }}</td>
<td class="right">${{ "{:,.0f}".format(r.costs) }}</td>
<td class="right">${{ "{:,.0f}".format(r.profit) }}</td>
<td class="right">{{ r.margin }}%</td>
</tr>
{% endfor %}
</tbody>
</table>
<p class="summary">Total revenue: ${{ "{:,.0f}".format(rows|sum(attribute="revenue")) }}</p>
</body></html>"""
def render_weasyprint(rows: list[dict], period: str, out: Path) -> None:
env = Environment(loader=BaseLoader())
tmpl = env.from_string(TEMPLATE_SRC)
html_str = tmpl.render(rows=rows, period=period)
try:
HTML(string=html_str).write_pdf(str(out))
print(f"Written: {out}")
except Exception as exc:
raise RuntimeError(f"WeasyPrint render failed: {exc}") from exc
render_weasyprint(rows, "Q3 2026", Path("reports/sales_weasyprint.pdf"))
Step 3 — ReportLab: canvas-level control
Use ReportLab when you need exact coordinate placement, vector graphics, or custom fonts for symbols like € or ™. If you hit garbled boxes or a UnicodeEncodeError, see Fix ReportLab Unicode Font Errors — the short answer is to register a TrueType font with pdfmetrics.registerFont.
# pip install reportlab
from pathlib import Path
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import mm
from reportlab.lib import colors
from reportlab.platypus import (
SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
)
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_RIGHT
OUT = Path("reports/sales_reportlab.pdf")
def render_reportlab(rows: list[dict], period: str, out: Path) -> None:
out.parent.mkdir(parents=True, exist_ok=True)
styles = getSampleStyleSheet()
right_style = ParagraphStyle("right", parent=styles["Normal"], alignment=TA_RIGHT)
doc = SimpleDocTemplate(
str(out), pagesize=A4,
leftMargin=15*mm, rightMargin=15*mm,
topMargin=20*mm, bottomMargin=20*mm,
)
def _header_footer(canvas, doc):
canvas.saveState()
canvas.setFont("Helvetica", 8)
canvas.setFillColor(colors.HexColor("#475569"))
canvas.drawString(15*mm, A4[1] - 12*mm, f"Sales Report — {period}")
canvas.drawRightString(A4[0] - 15*mm, 10*mm, f"Page {doc.page}")
canvas.restoreState()
header = [["Customer", "Region", "Revenue", "Costs", "Profit", "Margin %"]]
data_rows = [
[r["customer"], r["region"],
f"${r['revenue']:,.0f}", f"${r['costs']:,.0f}",
f"${r['profit']:,.0f}", f"{r['margin']}%"]
for r in rows
]
table_data = header + data_rows
col_widths = [50*mm, 30*mm, 28*mm, 28*mm, 28*mm, 22*mm]
tbl = Table(table_data, colWidths=col_widths, repeatRows=1)
tbl.setStyle(TableStyle([
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#2563eb")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, -1), 9),
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f6f8fb")]),
("GRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#e2e8f0")),
("ALIGN", (2, 0), (-1, -1), "RIGHT"),
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f6f8fb")]),
]))
story = [
Paragraph(f"Sales Performance — {period}", styles["h1"]),
Spacer(1, 6*mm),
tbl,
]
try:
doc.build(story, onFirstPage=_header_footer, onLaterPages=_header_footer)
print(f"Written: {out}")
except Exception as exc:
raise RuntimeError(f"ReportLab build failed: {exc}") from exc
render_reportlab(rows, "Q3 2026", OUT)
Step 4 — Embed a Matplotlib chart
Charts go in as BytesIO images. In the ReportLab path use Image; in the WeasyPrint path encode as base64 and inject into an <img> tag.
# pip install reportlab matplotlib
import io
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from reportlab.platypus import Image
def bar_chart_image(rows: list[dict], width_pts: float = 400, height_pts: float = 180):
"""Return a ReportLab Image flowable containing a revenue bar chart."""
labels = [r["customer"] for r in rows]
revenue = [r["revenue"] for r in rows]
costs = [r["costs"] for r in rows]
fig, ax = plt.subplots(figsize=(width_pts / 72, height_pts / 72))
x = range(len(labels))
ax.bar([i - 0.2 for i in x], revenue, width=0.35, label="Revenue", color="#2563eb")
ax.bar([i + 0.2 for i in x], costs, width=0.35, label="Costs", color="#dbeafe")
ax.set_xticks(list(x))
ax.set_xticklabels(labels, fontsize=8)
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda v, _: f"${v/1000:.0f}k"))
ax.legend(fontsize=8)
ax.spines[["top", "right"]].set_visible(False)
plt.tight_layout()
buf = io.BytesIO()
fig.savefig(buf, format="png", dpi=150)
plt.close(fig)
buf.seek(0)
return Image(buf, width=width_pts, height=height_pts)
Edge cases and variants
Dynamic pagination with ReportLab Platypus
SimpleDocTemplate handles page breaks automatically. For tables that span many pages, set repeatRows=1 on the Table to repeat the header row. If you need to split rows manually (e.g. to add a subtotal per page), subclass DocTemplate and override afterPage.
# pip install reportlab
from reportlab.platypus import SimpleDocTemplate, Table
from reportlab.lib.pagesizes import A4
# repeatRows=1 keeps the header on every page
tbl = Table(data, colWidths=col_widths, repeatRows=1)
Multi-section report with cover page
# pip install reportlab
from reportlab.platypus import SimpleDocTemplate, PageBreak, Paragraph
from reportlab.lib.styles import getSampleStyleSheet
from pathlib import Path
styles = getSampleStyleSheet()
def build_multi_section(sections: list[dict], out: Path) -> None:
"""sections = [{"title": str, "rows": [...]}]"""
doc = SimpleDocTemplate(str(out))
story = []
for i, sec in enumerate(sections):
if i > 0:
story.append(PageBreak())
story.append(Paragraph(sec["title"], styles["h1"]))
# ... build table from sec["rows"]
try:
doc.build(story)
except Exception as exc:
raise RuntimeError(f"Build failed: {exc}") from exc
WeasyPrint with external CSS file
Keep CSS separate for maintainability:
# pip install weasyprint jinja2
from weasyprint import HTML, CSS
from pathlib import Path
html_str = "<html>...</html>" # rendered Jinja2 output
css = CSS(filename=str(Path("templates/report.css")))
try:
HTML(string=html_str).write_pdf("out.pdf", stylesheets=[css])
except Exception as exc:
raise RuntimeError(f"Render failed: {exc}") from exc
Validation
# pip install pypdf
from pathlib import Path
from pypdf import PdfReader
def validate_pdf(path: Path, min_pages: int = 1) -> None:
try:
reader = PdfReader(str(path))
assert len(reader.pages) >= min_pages, f"Expected >= {min_pages} pages, got {len(reader.pages)}"
# Spot-check first page text is non-empty
text = reader.pages[0].extract_text() or ""
assert len(text.strip()) > 20, "First page appears empty"
print(f"OK: {path.name} ({len(reader.pages)} pages)")
except AssertionError as exc:
raise RuntimeError(f"Validation failed for {path}: {exc}") from exc
except Exception as exc:
raise RuntimeError(f"Could not read {path}: {exc}") from exc
validate_pdf(Path("reports/sales_reportlab.pdf"))
validate_pdf(Path("reports/sales_weasyprint.pdf"))
Performance and scale notes
- WeasyPrint builds an in-memory DOM before rendering. At >500 rows per page, memory climbs fast. Chunk data into page-sized batches and use Merging and Splitting PDF Documents to combine the pieces.
- ReportLab Platypus streams flowables and handles large reports better. Still, avoid loading entire DataFrames into
Tableobjects — pre-serialise to plain Python lists first. - For nightly batch runs, pre-render all Jinja2 templates to HTML strings in parallel (threads are fine — it is CPU-light), then render PDFs sequentially or in a process pool to avoid GIL contention on the rendering step.
- Matplotlib figure creation is not thread-safe; use
matplotlib.use("Agg")and create figures inside worker processes, not threads.
Troubleshooting
| Error | Root cause | Fix |
|---|---|---|
OSError: no library called "cairo-2" was found | WeasyPrint system dependency missing | sudo apt install libcairo2 (Linux) or brew install cairo (macOS) |
UnicodeEncodeError or garbled boxes in ReportLab | Default Helvetica core font lacks the glyph | Register a TrueType font — see Fix ReportLab Unicode Font Errors |
| Table rows split mid-cell in WeasyPrint | Missing page-break-inside: avoid on <tr> | Add tr { page-break-inside: avoid; } to the @page CSS block |
LayoutError: Flowable ... too large in ReportLab | A single flowable taller than the page frame | Split the flowable or reduce font size; for tables, chunk rows |
| Headers/footers bleed into content area | @page margin smaller than header/footer height | Increase topMargin/bottomMargin to accommodate the fixed elements |
Complete working script
# pip install reportlab weasyprint jinja2 pandas matplotlib pypdf
"""gen_reports.py — generate a sales PDF report from a CSV, with chart.
Usage:
python gen_reports.py --data data/sales.csv --out reports/sales.pdf --engine reportlab
python gen_reports.py --data data/sales.csv --out reports/sales.pdf --engine weasyprint
"""
import argparse
import io
import sys
from pathlib import Path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import pandas as pd
from jinja2 import Environment, BaseLoader
from weasyprint import HTML
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import mm
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, Image
from reportlab.lib.styles import getSampleStyleSheet
from pypdf import PdfReader
WEASYPRINT_TEMPLATE = """<!DOCTYPE html>
<html><head><meta charset="utf-8"><style>
@page { size: A4; margin: 20mm 15mm 25mm;
@top-center { content: "{{ period }}"; font-size: 9pt; color: #475569; }
@bottom-right { content: "Page " counter(page); font-size: 8pt; color: #475569; } }
body { font-family: sans-serif; font-size: 10pt; }
h1 { font-size: 16pt; }
table { width: 100%; border-collapse: collapse; }
th { background: #2563eb; color: #fff; padding: 4px 8px; }
td { padding: 4px 8px; border-bottom: 1px solid #e2e8f0; }
tr { page-break-inside: avoid; }
.right { text-align: right; }
</style></head><body>
<h1>Sales Performance — {{ period }}</h1>
<table><thead><tr>
<th>Customer</th><th>Region</th><th class="right">Revenue</th>
<th class="right">Costs</th><th class="right">Profit</th><th class="right">Margin %</th>
</tr></thead><tbody>
{% for r in rows %}
<tr><td>{{ r.customer }}</td><td>{{ r.region }}</td>
<td class="right">${{ "{:,.0f}".format(r.revenue) }}</td>
<td class="right">${{ "{:,.0f}".format(r.costs) }}</td>
<td class="right">${{ "{:,.0f}".format(r.profit) }}</td>
<td class="right">{{ r.margin }}%</td></tr>
{% endfor %}
</tbody></table></body></html>"""
def load_data(path: Path) -> list[dict]:
df = pd.read_csv(path, encoding="utf-8")
df.columns = df.columns.str.strip().str.lower()
df["profit"] = df["revenue"] - df["costs"]
df["margin"] = (df["profit"] / df["revenue"] * 100).round(1)
return df.to_dict("records")
def make_chart(rows: list[dict]) -> io.BytesIO:
labels = [r["customer"] for r in rows]
fig, ax = plt.subplots(figsize=(5, 2.5))
x = range(len(labels))
ax.bar([i - 0.2 for i in x], [r["revenue"] for r in rows], 0.35,
label="Revenue", color="#2563eb")
ax.bar([i + 0.2 for i in x], [r["costs"] for r in rows], 0.35,
label="Costs", color="#dbeafe")
ax.set_xticks(list(x)); ax.set_xticklabels(labels, fontsize=8)
ax.legend(fontsize=8); ax.spines[["top","right"]].set_visible(False)
plt.tight_layout()
buf = io.BytesIO(); fig.savefig(buf, format="png", dpi=150); plt.close(fig)
buf.seek(0); return buf
def render_weasyprint(rows, period, out: Path) -> None:
tmpl = Environment(loader=BaseLoader()).from_string(WEASYPRINT_TEMPLATE)
HTML(string=tmpl.render(rows=rows, period=period)).write_pdf(str(out))
def render_reportlab(rows, period, out: Path) -> None:
styles = getSampleStyleSheet()
doc = SimpleDocTemplate(str(out), pagesize=A4,
leftMargin=15*mm, rightMargin=15*mm,
topMargin=20*mm, bottomMargin=20*mm)
def _hf(canvas, doc):
canvas.saveState()
canvas.setFont("Helvetica", 8)
canvas.setFillColor(colors.HexColor("#475569"))
canvas.drawString(15*mm, A4[1]-12*mm, f"Sales Report — {period}")
canvas.drawRightString(A4[0]-15*mm, 10*mm, f"Page {doc.page}")
canvas.restoreState()
header = [["Customer","Region","Revenue","Costs","Profit","Margin %"]]
data = [[r["customer"], r["region"], f"${r['revenue']:,.0f}", f"${r['costs']:,.0f}",
f"${r['profit']:,.0f}", f"{r['margin']}%"] for r in rows]
tbl = Table(header+data, colWidths=[50*mm,30*mm,28*mm,28*mm,28*mm,22*mm], repeatRows=1)
tbl.setStyle(TableStyle([
("BACKGROUND",(0,0),(-1,0),colors.HexColor("#2563eb")),
("TEXTCOLOR",(0,0),(-1,0),colors.white),
("FONTNAME",(0,0),(-1,0),"Helvetica-Bold"),
("FONTSIZE",(0,0),(-1,-1),9),
("GRID",(0,0),(-1,-1),0.5,colors.HexColor("#e2e8f0")),
("ALIGN",(2,0),(-1,-1),"RIGHT"),
]))
chart_buf = make_chart(rows)
story = [Paragraph(f"Sales Performance — {period}", styles["h1"]),
Spacer(1, 4*mm), Image(chart_buf, width=360, height=180),
Spacer(1, 4*mm), tbl]
doc.build(story, onFirstPage=_hf, onLaterPages=_hf)
def validate(path: Path) -> None:
r = PdfReader(str(path))
assert len(r.pages) >= 1, "PDF has no pages"
print(f"OK: {path.name} ({len(r.pages)} page(s))")
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--data", required=True)
ap.add_argument("--out", required=True)
ap.add_argument("--engine", choices=["reportlab","weasyprint"], default="reportlab")
ap.add_argument("--period", default="Q3 2026")
args = ap.parse_args()
data_path = Path(args.data)
out_path = Path(args.out)
out_path.parent.mkdir(parents=True, exist_ok=True)
try:
rows = load_data(data_path)
except Exception as exc:
sys.exit(f"Data load error: {exc}")
try:
if args.engine == "weasyprint":
render_weasyprint(rows, args.period, out_path)
else:
render_reportlab(rows, args.period, out_path)
validate(out_path)
except Exception as exc:
sys.exit(f"Render error: {exc}")
if __name__ == "__main__":
main()
Pages in this section
- Create Dynamic Invoice PDFs Automatically — per-customer invoices with line-item loops, tax calculation, and totals
- Fix ReportLab Unicode Font Errors — garbled boxes or
UnicodeEncodeErrorfor€,™, accented characters
Related
- Automating PDF Extraction & Generation — parent guide covering the full PDF automation stack
- Merging and Splitting PDF Documents — assemble per-customer PDFs into one batch deliverable
- Automating Excel Report Generation — same report logic when the output is an Excel workbook instead of a PDF
- Cleaning Messy CSV Data with pandas — prepare the data layer before feeding it to any renderer