Writing Excel Formulas and Charts with openpyxl
openpyxl lets you write .xlsx files entirely from Python — including live formulas, named ranges, custom number formats, and embedded charts. The tricky parts are understanding what openpyxl actually stores (strings and references, not computed values) and knowing how the Reference/Series API maps onto the chart object model. This guide covers both in full.
Prerequisites
You need Python 3.9+, openpyxl, and a sample data file to experiment with. The snippet below creates one.
# pip install openpyxl
python - <<'EOF'
from pathlib import Path
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Sales"
ws.append(["Month", "Revenue", "Cost"])
rows = [
("Jan", 12000, 8000), ("Feb", 15000, 9500), ("Mar", 13500, 8800),
("Apr", 17000, 10200), ("May", 19500, 11000), ("Jun", 22000, 12500),
]
for r in rows:
ws.append(r)
wb.save(Path("sample_sales.xlsx"))
print("sample_sales.xlsx written")
EOF
Step 1 — Load the workbook and inspect the sheet
Before writing formulas, confirm column layout and data extent.
# pip install openpyxl
from pathlib import Path
import openpyxl
WORKBOOK = Path("sample_sales.xlsx")
try:
wb = openpyxl.load_workbook(WORKBOOK)
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
ws = wb["Sales"]
print(f"Dimensions: {ws.dimensions}") # e.g. A1:C7
print(f"Max row: {ws.max_row}, Max col: {ws.max_column}")
ws.dimensions returns the bounding box of used cells. Use ws.max_row and ws.max_column to compute formula ranges programmatically instead of hard-coding them.
Step 2 — Write live formulas
Assign a formula string to cell.value. openpyxl stores the string verbatim; Excel evaluates it when the file is opened. See Fix openpyxl Formulas Showing as Blank for what happens when you read the file back with data_only=True.
# pip install openpyxl
from pathlib import Path
import openpyxl
WORKBOOK = Path("sample_sales.xlsx")
try:
wb = openpyxl.load_workbook(WORKBOOK)
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
ws = wb["Sales"]
last_data_row = ws.max_row # 7 (header on row 1, data rows 2-7)
summary_row = last_data_row + 2 # leave a blank row
# SUM formulas
ws[f"B{summary_row}"] = f"=SUM(B2:B{last_data_row})"
ws[f"C{summary_row}"] = f"=SUM(C2:C{last_data_row})"
# Profit margin column (new column D)
ws["D1"] = "Margin"
for row in range(2, last_data_row + 1):
ws[f"D{row}"] = f"=(B{row}-C{row})/B{row}"
# AVERAGE in summary row
ws[f"D{summary_row}"] = f"=AVERAGE(D2:D{last_data_row})"
# Labels
ws[f"A{summary_row}"] = "Total / Avg"
wb.save(WORKBOOK)
print(f"Formulas written to row {summary_row} and column D.")
Key points:
- Formulas use standard Excel syntax — start every formula with
=. - Use f-strings to keep row references dynamic rather than hard-coding row numbers.
- openpyxl does not calculate formula results; read the file back with
data_only=Falseto retrieve the formula string, or open it in Excel/LibreOffice once to force evaluation.
Step 3 — Named ranges
Named ranges let formulas use Revenue instead of Sales!$B$2:$B$7. Define them via wb.defined_names.
# pip install openpyxl
from pathlib import Path
import openpyxl
from openpyxl.workbook.defined_name import DefinedName
from openpyxl.utils import quote_sheetname, absolute_coordinate
WORKBOOK = Path("sample_sales.xlsx")
try:
wb = openpyxl.load_workbook(WORKBOOK)
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
ws = wb["Sales"]
last_data_row = 7 # adjust if data length changes
# Build an absolute reference string: 'Sales'!$B$2:$B$7
sheet_ref = quote_sheetname(ws.title)
rev_range = absolute_coordinate(f"B2:B{last_data_row}")
cost_range = absolute_coordinate(f"C2:C{last_data_row}")
wb.defined_names["Revenue"] = DefinedName("Revenue", attr_text=f"{sheet_ref}!{rev_range}")
wb.defined_names["Cost"] = DefinedName("Cost", attr_text=f"{sheet_ref}!{cost_range}")
# Now use named ranges in formulas
ws["B10"] = "=SUM(Revenue)"
ws["C10"] = "=SUM(Cost)"
wb.save(WORKBOOK)
print("Named ranges 'Revenue' and 'Cost' defined.")
quote_sheetname wraps the sheet name in single quotes if it contains spaces. absolute_coordinate converts B2:B7 to $B$2:$B$7.
Step 4 — Number formats
Number formats are stored as strings on the cell's number_format attribute. They follow Excel's custom format syntax.
# pip install openpyxl
from pathlib import Path
import openpyxl
WORKBOOK = Path("sample_sales.xlsx")
try:
wb = openpyxl.load_workbook(WORKBOOK)
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
ws = wb["Sales"]
# Currency for revenue and cost columns
currency_fmt = '"$"#,##0.00'
for row in range(2, ws.max_row + 1):
if ws[f"B{row}"].value is not None:
ws[f"B{row}"].number_format = currency_fmt
if ws[f"C{row}"].value is not None:
ws[f"C{row}"].number_format = currency_fmt
# Percentage for margin column
pct_fmt = "0.0%"
for row in range(2, ws.max_row + 1):
if ws[f"D{row}"].value is not None:
ws[f"D{row}"].number_format = pct_fmt
wb.save(WORKBOOK)
print("Number formats applied.")
Common format strings:
| Purpose | Format string |
|---|---|
| Currency USD | "$"#,##0.00 |
| Percentage | 0.0% |
| Integer with thousands | #,##0 |
| ISO date | YYYY-MM-DD |
| Date + time | YYYY-MM-DD HH:MM:SS |
| Scientific | 0.00E+00 |
Step 5 — BarChart
The chart API always follows the same pattern: create a chart object, create Reference objects for the data and categories, call add_data and set_categories, then call ws.add_chart.
# pip install openpyxl
from pathlib import Path
import openpyxl
from openpyxl.chart import BarChart, Reference
WORKBOOK = Path("sample_sales.xlsx")
try:
wb = openpyxl.load_workbook(WORKBOOK)
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
ws = wb["Sales"]
chart = BarChart()
chart.type = "col" # vertical bars; "bar" = horizontal
chart.grouping = "clustered" # or "stacked", "percentStacked"
chart.title = "Monthly Revenue vs Cost"
chart.y_axis.title = "Amount (USD)"
chart.x_axis.title = "Month"
chart.style = 10 # built-in Excel style 1-48
# Data reference: columns B and C, rows 1-7 (row 1 = header, used as series title)
data_ref = Reference(ws, min_col=2, max_col=3, min_row=1, max_row=ws.max_row)
chart.add_data(data_ref, titles_from_data=True)
# Category labels: column A, rows 2-7 (skip header)
cats_ref = Reference(ws, min_col=1, min_row=2, max_row=ws.max_row)
chart.set_categories(cats_ref)
# Size in EMU (English Metric Units): 1 cm = 360000 EMU
chart.width = 20 # cm
chart.height = 12 # cm
ws.add_chart(chart, "F2") # anchor top-left corner at F2
wb.save(WORKBOOK)
print("BarChart written.")
titles_from_data=True uses the first row of the reference as series names. If your data has no header row, pass titles_from_data=False and set chart.series[i].title manually.
Step 6 — LineChart
LineChart follows identical steps; swap the chart class and adjust style options.
# pip install openpyxl
from pathlib import Path
import openpyxl
from openpyxl.chart import LineChart, Reference
WORKBOOK = Path("sample_sales.xlsx")
try:
wb = openpyxl.load_workbook(WORKBOOK)
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
ws = wb["Sales"]
line = LineChart()
line.title = "Profit Margin Over Time"
line.y_axis.title = "Margin"
line.x_axis.title = "Month"
line.y_axis.numFmt = "0%"
line.smooth = True # curved lines
# Margin column D (no header row offset needed — row 1 IS the header)
data_ref = Reference(ws, min_col=4, min_row=1, max_row=ws.max_row)
line.add_data(data_ref, titles_from_data=True)
cats_ref = Reference(ws, min_col=1, min_row=2, max_row=ws.max_row)
line.set_categories(cats_ref)
line.width = 18
line.height = 10
ws.add_chart(line, "F20")
wb.save(WORKBOOK)
print("LineChart written.")
Setting y_axis.numFmt = "0%" formats axis tick labels as percentages regardless of the cell format.
Step 7 — PieChart
PieChart takes a single data series. There is no category axis — categories become slice labels.
# pip install openpyxl
from pathlib import Path
import openpyxl
from openpyxl.chart import PieChart, Reference
from openpyxl.chart.series import DataPoint
WORKBOOK = Path("sample_sales.xlsx")
try:
wb = openpyxl.load_workbook(WORKBOOK)
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
ws = wb["Sales"]
pie = PieChart()
pie.title = "Revenue by Month"
# Single series: revenue column B, rows 2-7 (no header)
data_ref = Reference(ws, min_col=2, min_row=2, max_row=ws.max_row)
pie.add_data(data_ref)
# Labels from column A
cats_ref = Reference(ws, min_col=1, min_row=2, max_row=ws.max_row)
pie.set_categories(cats_ref)
# Explode the first slice to highlight it
slice0 = DataPoint(idx=0, explosion=10)
pie.series[0].data_points = [slice0]
pie.dataLabels = openpyxl.chart.label.DataLabelList()
pie.dataLabels.showPercent = True
pie.width = 15
pie.height = 12
ws.add_chart(pie, "F35")
wb.save(WORKBOOK)
print("PieChart written.")
DataPoint(idx=0, explosion=10) moves slice 0 outward by 10% for emphasis. showPercent=True adds percentage labels to each slice.
How the pieces fit together
The diagram below shows the data flow from worksheet cells through the openpyxl object model to the embedded chart in the saved workbook.
Step 8 — Styling cells and freezing panes
Header styling
# pip install openpyxl
from pathlib import Path
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
WORKBOOK = Path("sample_sales.xlsx")
try:
wb = openpyxl.load_workbook(WORKBOOK)
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
ws = wb["Sales"]
header_font = Font(bold=True, color="FFFFFF", size=12)
header_fill = PatternFill("solid", fgColor="2563EB")
center_align = Alignment(horizontal="center", vertical="center")
thin_border = Border(
bottom=Side(style="thin", color="E2E8F0"),
right=Side(style="thin", color="E2E8F0"),
)
for col_idx in range(1, ws.max_column + 1):
cell = ws.cell(row=1, column=col_idx)
cell.font = header_font
cell.fill = header_fill
cell.alignment = center_align
cell.border = thin_border
# Auto-fit column widths (approximate — openpyxl has no native auto-fit)
for col in ws.columns:
max_len = max((len(str(c.value)) for c in col if c.value), default=8)
ws.column_dimensions[col[0].column_letter].width = min(max_len + 4, 40)
wb.save(WORKBOOK)
print("Headers styled.")
Freezing panes
freeze_panes takes the address of the first unfrozen cell. Setting it to "A2" freezes row 1 only; "B2" freezes both the first row and the first column.
# pip install openpyxl
from pathlib import Path
import openpyxl
WORKBOOK = Path("sample_sales.xlsx")
try:
wb = openpyxl.load_workbook(WORKBOOK)
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
ws = wb["Sales"]
ws.freeze_panes = "A2" # row 1 (header) stays visible when scrolling
wb.save(WORKBOOK)
print("Row 1 frozen.")
To unfreeze: ws.freeze_panes = None.
Edge cases and variants
Multiple chart types on one sheet
Place each chart at a different anchor cell. Cell addresses use Excel notation — if charts overlap, the last one added renders on top.
# pip install openpyxl
from pathlib import Path
import openpyxl
from openpyxl.chart import BarChart, LineChart, Reference
WORKBOOK = Path("sample_sales.xlsx")
try:
wb = openpyxl.load_workbook(WORKBOOK)
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
ws = wb["Sales"]
last_row = ws.max_row
bar = BarChart()
bar.title = "Revenue vs Cost"
bar.add_data(Reference(ws, min_col=2, max_col=3, min_row=1, max_row=last_row), titles_from_data=True)
bar.set_categories(Reference(ws, min_col=1, min_row=2, max_row=last_row))
bar.width, bar.height = 18, 10
ws.add_chart(bar, "F2")
line = LineChart()
line.title = "Margin Trend"
line.add_data(Reference(ws, min_col=4, min_row=1, max_row=last_row), titles_from_data=True)
line.set_categories(Reference(ws, min_col=1, min_row=2, max_row=last_row))
line.width, line.height = 18, 10
ws.add_chart(line, "F22")
wb.save(WORKBOOK)
print("Two charts written.")
Writing formulas into a new sheet
When generating a report (as covered in Automating Excel Report Generation), create a summary sheet that references data sheets with cross-sheet formulas.
# pip install openpyxl
from pathlib import Path
import openpyxl
WORKBOOK = Path("sample_sales.xlsx")
try:
wb = openpyxl.load_workbook(WORKBOOK)
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
# Create a summary sheet
if "Summary" not in wb.sheetnames:
wb.create_sheet("Summary")
summary = wb["Summary"]
summary["A1"] = "KPI"
summary["B1"] = "Value"
summary["A2"] = "Total Revenue"
summary["B2"] = "=SUM(Sales!B2:B7)" # cross-sheet reference
summary["A3"] = "Total Cost"
summary["B3"] = "=SUM(Sales!C2:C7)"
summary["A4"] = "Net Profit"
summary["B4"] = "=B2-B3"
summary["A5"] = "Avg Margin"
summary["B5"] = "=AVERAGE(Sales!D2:D7)"
wb.save(WORKBOOK)
print("Summary sheet with cross-sheet formulas written.")
Reading formula results back into Python
If you need formula results in Python rather than displaying them in Excel, compute the values in Python directly — either with pandas before writing, or by reading with Reading Excel Files with Python after Excel has saved cached values.
# pip install openpyxl pandas
from pathlib import Path
import pandas as pd
WORKBOOK = Path("sample_sales.xlsx")
try:
df = pd.read_excel(WORKBOOK, sheet_name="Sales", engine="openpyxl")
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
# Compute margin in Python rather than relying on formula caching
df["Margin"] = (df["Revenue"] - df["Cost"]) / df["Revenue"]
print(df[["Month", "Margin"]].to_string(index=False))
Validation
After writing the file, assert its integrity before treating it as production output.
# pip install openpyxl
from pathlib import Path
import openpyxl
WORKBOOK = Path("sample_sales.xlsx")
try:
wb = openpyxl.load_workbook(WORKBOOK, data_only=False)
except FileNotFoundError as exc:
raise SystemExit(f"File not found: {exc}") from exc
ws = wb["Sales"]
# 1. Check formula strings were stored (not overwritten with None)
margin_formula = ws["D2"].value
assert isinstance(margin_formula, str) and margin_formula.startswith("="), \
f"Expected formula in D2, got: {margin_formula!r}"
# 2. Check named ranges exist
assert "Revenue" in wb.defined_names, "Named range 'Revenue' missing"
assert "Cost" in wb.defined_names, "Named range 'Cost' missing"
# 3. Check at least one chart is embedded
assert len(ws._charts) >= 1, "No charts found in Sales sheet"
# 4. Check freeze pane
assert ws.freeze_panes == "A2", f"Unexpected freeze_panes: {ws.freeze_panes!r}"
print("All checks passed.")
Performance and scale notes
- Large data sets: for sheets with 50 000+ rows, use
openpyxl.Workbook(write_only=True)withws.append()— it streams rows without loading the whole workbook into memory. - Write-only mode cannot embed charts or apply styles after rows are written; write data first, then open with a regular
load_workbookpass to add formatting and charts. - Formula recalculation: openpyxl sets
calcPrto request full recalculation on next open. If your workflow requires pre-calculated values (e.g., for CI pipelines), compute in pandas and write literal values — no Excel required. - File size: embedded charts add ~10–30 KB per chart. Hundreds of charts in one workbook can push file sizes above 10 MB; consider splitting into multiple workbooks.
Troubleshooting
| Symptom | Root cause | Fix |
|---|---|---|
Formula cell reads back as None | Opened with data_only=True; no cached value yet | Read with data_only=False to get formula string, or open once in Excel to cache values |
AttributeError: 'NoneType' object has no attribute 'value' | Cell reference outside used range | Check ws.max_row/ws.max_column before iterating |
| Chart renders but shows no data | Reference row/column bounds wrong or titles_from_data mismatch | Print ws.cell(min_row, min_col).value to verify bounds |
InvalidFileException on load | File opened/locked by Excel | Close the file in Excel, then re-run |
| Named range not visible in Excel Name Box | Scope set to workbook but name conflicts with sheet-level name | Delete conflicting sheet-level name in Excel Name Manager |
PermissionError on save | File is open in Excel | Close Excel first, or save to a temp path then replace |
Complete working script
#!/usr/bin/env python3
"""
Write formulas, named ranges, number formats, and charts into a .xlsx file.
Usage: python excel_formulas_charts.py [--input INPUT] [--output OUTPUT]
"""
# pip install openpyxl
import argparse
from pathlib import Path
import openpyxl
from openpyxl.chart import BarChart, LineChart, Reference
from openpyxl.styles import Font, PatternFill, Alignment
from openpyxl.workbook.defined_name import DefinedName
from openpyxl.utils import quote_sheetname, absolute_coordinate
def build_sample_workbook(path: Path) -> None:
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Sales"
ws.append(["Month", "Revenue", "Cost"])
for month, rev, cost in [
("Jan", 12000, 8000), ("Feb", 15000, 9500), ("Mar", 13500, 8800),
("Apr", 17000, 10200), ("May", 19500, 11000), ("Jun", 22000, 12500),
]:
ws.append([month, rev, cost])
wb.save(path)
def apply_formulas(ws) -> None:
last = ws.max_row
ws["D1"] = "Margin"
for row in range(2, last + 1):
ws[f"D{row}"] = f"=(B{row}-C{row})/B{row}"
ws[f"D{row}"].number_format = "0.0%"
ws[f"B{row}"].number_format = '"$"#,##0'
ws[f"C{row}"].number_format = '"$"#,##0'
summary = last + 2
ws[f"A{summary}"] = "Total / Avg"
ws[f"B{summary}"] = f"=SUM(B2:B{last})"
ws[f"C{summary}"] = f"=SUM(C2:C{last})"
ws[f"D{summary}"] = f"=AVERAGE(D2:D{last})"
ws[f"B{summary}"].number_format = '"$"#,##0'
ws[f"C{summary}"].number_format = '"$"#,##0'
ws[f"D{summary}"].number_format = "0.0%"
def apply_named_ranges(wb, ws) -> None:
last = ws.max_row
sheet_ref = quote_sheetname(ws.title)
wb.defined_names["Revenue"] = DefinedName(
"Revenue", attr_text=f"{sheet_ref}!{absolute_coordinate(f'B2:B{last}')}"
)
wb.defined_names["Cost"] = DefinedName(
"Cost", attr_text=f"{sheet_ref}!{absolute_coordinate(f'C2:C{last}')}"
)
def style_headers(ws) -> None:
for col in range(1, ws.max_column + 1):
cell = ws.cell(row=1, column=col)
cell.font = Font(bold=True, color="FFFFFF", size=12)
cell.fill = PatternFill("solid", fgColor="2563EB")
cell.alignment = Alignment(horizontal="center")
ws.freeze_panes = "A2"
def add_charts(ws) -> None:
last = ws.max_row
bar = BarChart()
bar.title = "Revenue vs Cost"
bar.type = "col"
bar.add_data(Reference(ws, min_col=2, max_col=3, min_row=1, max_row=last),
titles_from_data=True)
bar.set_categories(Reference(ws, min_col=1, min_row=2, max_row=last))
bar.width, bar.height = 18, 10
ws.add_chart(bar, "F2")
line = LineChart()
line.title = "Margin Trend"
line.smooth = True
line.add_data(Reference(ws, min_col=4, min_row=1, max_row=last),
titles_from_data=True)
line.set_categories(Reference(ws, min_col=1, min_row=2, max_row=last))
line.width, line.height = 18, 10
ws.add_chart(line, "F22")
def main() -> None:
parser = argparse.ArgumentParser(description="Write formulas and charts to Excel.")
parser.add_argument("--input", default="sample_sales.xlsx",
help="Source .xlsx file (created if absent)")
parser.add_argument("--output", default=None,
help="Destination path (default: overwrite input)")
args = parser.parse_args()
src = Path(args.input)
dest = Path(args.output) if args.output else src
if not src.exists():
print(f"Creating sample workbook at {src}")
build_sample_workbook(src)
try:
wb = openpyxl.load_workbook(src)
except Exception as exc:
raise SystemExit(f"Cannot open {src}: {exc}") from exc
ws = wb["Sales"]
apply_formulas(ws)
apply_named_ranges(wb, ws)
style_headers(ws)
add_charts(ws)
wb.save(dest)
print(f"Written: {dest}")
if __name__ == "__main__":
main()
Related
- Fix openpyxl Formulas Showing as Blank — when formulas return None on read-back
- Automating Excel Report Generation — scheduling and batch report workflows
- Reading Excel Files with Python — loading .xlsx data back into pandas
- Automating Monthly Sales Reports in Excel — end-to-end sales report automation