Inserting Images into Word Documents
Adding an image to a Word document sounds trivial until you hit the defaults: python-docx renders images at their native pixel size, which maps to a random physical size depending on the file's DPI metadata. A 300 DPI PNG intended for print comes out tiny; a 72 DPI screenshot fills three pages. This guide covers every insertion pattern you actually encounter — sizing, aspect ratio, placement in tables and headers/footers, captions, BytesIO sources, and batch folder insertion — so you produce predictable output every time.
The techniques here build on Automating Word Document Creation and are frequently combined with Dynamic Mail Merge with Python when logos or signature images vary per recipient.
Prerequisites
pip install python-docx Pillow
python-docx handles .docx manipulation; Pillow is needed only for the aspect-ratio calculation helper and for loading images from bytes. A test image is assumed at assets/logo.png.
# pip install python-docx Pillow
from pathlib import Path
from docx import Document
from docx.shared import Inches, Cm, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
ASSETS = Path("assets")
OUTPUT = Path("output")
OUTPUT.mkdir(exist_ok=True)
1. Inspect the Image Before Inserting
Before writing any insertion code, confirm the image dimensions and DPI. This tells you whether you need to supply an explicit size (you almost always do).
# pip install Pillow
from PIL import Image
from pathlib import Path
img_path = Path("assets/logo.png")
try:
with Image.open(img_path) as img:
width_px, height_px = img.size
dpi = img.info.get("dpi", (72, 72))
width_in = width_px / dpi[0]
height_in = height_px / dpi[1]
print(f"Size: {width_px}x{height_px}px @ {dpi[0]} DPI")
print(f"Native physical size: {width_in:.2f} x {height_in:.2f} inches")
except FileNotFoundError:
print("Image not found — check the path")
except Exception as e:
print(f"Could not open image: {e}")
If the native physical size is wildly wrong for your document layout, you need to pass an explicit width or height argument to add_picture(). See Fix Images Too Large in python-docx for the full diagnosis.
2. Basic Insertion with Explicit Width
Document.add_picture(image_path_or_stream, width, height) — supply exactly one of width or height and python-docx preserves the aspect ratio automatically.
# pip install python-docx
from pathlib import Path
from docx import Document
from docx.shared import Inches
IMAGE = Path("assets/logo.png")
OUTPUT = Path("output/basic_image.docx")
doc = Document()
doc.add_heading("Report Header", level=1)
doc.add_paragraph("This report includes a company logo.")
try:
doc.add_picture(str(IMAGE), width=Inches(2.0))
doc.save(str(OUTPUT))
print(f"Saved: {OUTPUT}")
except FileNotFoundError:
print(f"Image not found: {IMAGE}")
except Exception as e:
print(f"Insertion failed: {e}")
Passing only width=Inches(2.0) scales the height proportionally. Passing both width and height overrides the aspect ratio — use that only when you deliberately want distortion (for example, a fixed-size icon cell in a table).
3. Fitting an Image to Page Width
For full-width images, compute the usable page width from the section margins rather than hardcoding inches. This adapts automatically if the template uses non-standard margins.
# pip install python-docx
from pathlib import Path
from docx import Document
from docx.shared import Inches
REPORT_IMAGE = Path("assets/chart.png")
OUTPUT = Path("output/full_width_image.docx")
doc = Document()
# Read page geometry from the first section
section = doc.sections[0]
page_width = section.page_width
left_margin = section.left_margin
right_margin = section.right_margin
usable_width = page_width - left_margin - right_margin
doc.add_heading("Monthly Sales Chart", level=2)
try:
doc.add_picture(str(REPORT_IMAGE), width=usable_width)
doc.save(str(OUTPUT))
print(f"Image width: {usable_width.inches:.2f} in — saved {OUTPUT}")
except FileNotFoundError:
print(f"Image not found: {REPORT_IMAGE}")
except Exception as e:
print(f"Could not insert image: {e}")
section.page_width, section.left_margin, and section.right_margin are all in EMU (English Metric Units). python-docx arithmetic works directly in EMU — passing usable_width as the width argument is valid without unit conversion.
4. Placing Images in Tables
Table cells are the most reliable way to position images side-by-side or to create a fixed-size image grid. Constrain the image to something narrower than the cell to leave interior padding.
# pip install python-docx
from pathlib import Path
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
LOGO = Path("assets/logo.png")
CHART = Path("assets/chart.png")
OUTPUT = Path("output/table_images.docx")
doc = Document()
doc.add_heading("Product Overview", level=1)
table = doc.add_table(rows=1, cols=2)
table.style = "Table Grid"
left_cell = table.cell(0, 0)
right_cell = table.cell(0, 1)
# Insert into left cell
try:
para_l = left_cell.paragraphs[0]
para_l.alignment = WD_ALIGN_PARAGRAPH.CENTER
run_l = para_l.add_run()
run_l.add_picture(str(LOGO), width=Inches(2.5))
except FileNotFoundError:
left_cell.text = "[logo missing]"
# Insert into right cell
try:
para_r = right_cell.paragraphs[0]
para_r.alignment = WD_ALIGN_PARAGRAPH.CENTER
run_r = para_r.add_run()
run_r.add_picture(str(CHART), width=Inches(2.5))
except FileNotFoundError:
right_cell.text = "[chart missing]"
try:
doc.save(str(OUTPUT))
print(f"Saved: {OUTPUT}")
except Exception as e:
print(f"Save failed: {e}")
Note that add_picture inside a table cell is called on a Run, not on the document directly. Access it via paragraph.add_run().add_picture(...).
5. Inserting Images into Headers and Footers
Headers and footers are separate _HeaderFooter objects. Access the paragraph inside them and insert via a run, exactly as you would in a table cell.
# pip install python-docx
from pathlib import Path
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
LOGO = Path("assets/logo.png")
OUTPUT = Path("output/header_logo.docx")
doc = Document()
section = doc.sections[0]
# -- Header logo --
header = section.header
header_para = header.paragraphs[0]
header_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
header_run = header_para.add_run()
try:
header_run.add_picture(str(LOGO), width=Inches(1.2))
except FileNotFoundError:
header_para.text = "Logo missing"
# -- Footer page note --
footer = section.footer
footer_para = footer.paragraphs[0]
footer_para.text = "Confidential — "
footer_run = footer_para.add_run()
try:
footer_run.add_picture(str(LOGO), width=Inches(0.6))
except FileNotFoundError:
footer_run.text = "[logo]"
doc.add_paragraph("Document body text here.")
try:
doc.save(str(OUTPUT))
print(f"Saved: {OUTPUT}")
except Exception as e:
print(f"Save failed: {e}")
If the document uses different first-page headers, set section.different_first_page_header_footer = True and populate section.first_page_header separately.
6. Inline vs Floating Images
python-docx inserts images as inline shapes only — they flow with the text. Floating images (text wrap around an image) require raw OOXML manipulation because the python-docx API does not expose them.
For the majority of automated reporting workflows, inline is preferable: it is deterministic, survives re-flowing, and requires no XML patching. If you need floating images (e.g., for magazine-style layouts), embed a pre-positioned placeholder in the template and replace it via docxtpl rather than injecting via python-docx.
7. Adding Captions
python-docx does not have a dedicated add_caption() method. The standard approach is to add a paragraph immediately after the picture paragraph and style it as Caption.
# pip install python-docx
from pathlib import Path
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
IMAGE = Path("assets/chart.png")
OUTPUT = Path("output/captioned_image.docx")
doc = Document()
doc.add_heading("Quarterly Results", level=2)
try:
doc.add_picture(str(IMAGE), width=Inches(4.0))
# Caption immediately follows the picture paragraph
caption = doc.add_paragraph("Figure 1 — Q3 revenue by product line", style="Caption")
caption.alignment = WD_ALIGN_PARAGRAPH.CENTER
doc.save(str(OUTPUT))
print(f"Saved: {OUTPUT}")
except FileNotFoundError:
print(f"Image not found: {IMAGE}")
except Exception as e:
print(f"Error: {e}")
The Caption style must exist in the document's style set. It is present in all default python-docx documents. If you open a stripped template that lacks it, add doc.styles.add_style("Caption", WD_STYLE_TYPE.PARAGRAPH) first.
8. Inserting Images from Bytes / BytesIO
When images come from a database, S3, or an in-memory generation pipeline (e.g., a matplotlib chart), avoid writing to disk — pass a BytesIO object directly.
# pip install python-docx matplotlib
import io
from pathlib import Path
import matplotlib.pyplot as plt
from docx import Document
from docx.shared import Inches
OUTPUT = Path("output/chart_from_bytes.docx")
# Generate a chart in memory
fig, ax = plt.subplots(figsize=(6, 3))
ax.bar(["Jan", "Feb", "Mar"], [120, 145, 98])
ax.set_title("Monthly Units")
buf = io.BytesIO()
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
plt.close(fig)
buf.seek(0)
doc = Document()
doc.add_heading("Sales Report", level=1)
try:
doc.add_picture(buf, width=Inches(5.0))
doc.save(str(OUTPUT))
print(f"Saved: {OUTPUT}")
except Exception as e:
print(f"Failed to insert chart: {e}")
buf.seek(0) is mandatory before passing to add_picture — otherwise the read position is at the end of the buffer and python-docx reads zero bytes.
9. Batch Logo/Image Insertion from a Folder
A common pattern: each row in a dataset maps to a product image file. Iterate the records, resolve the image path, fall back to a placeholder if the file is missing.
# pip install python-docx pandas
import pandas as pd
from pathlib import Path
from docx import Document
from docx.shared import Inches
IMAGES_DIR = Path("assets/products")
DATA_FILE = Path("data/products.csv")
OUTPUT_DIR = Path("output/product_sheets")
PLACEHOLDER = Path("assets/placeholder.png")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
try:
df = pd.read_csv(DATA_FILE)
except FileNotFoundError:
raise SystemExit(f"Data file not found: {DATA_FILE}")
for _, row in df.iterrows():
doc = Document()
doc.add_heading(str(row.get("product_name", "Product")), level=1)
doc.add_paragraph(str(row.get("description", "")))
img_path = IMAGES_DIR / f"{row['sku']}.png"
if not img_path.exists():
img_path = PLACEHOLDER
try:
doc.add_picture(str(img_path), width=Inches(3.0))
except Exception as e:
doc.add_paragraph(f"[Image unavailable: {e}]")
out_file = OUTPUT_DIR / f"{row['sku']}.docx"
try:
doc.save(str(out_file))
except Exception as e:
print(f"Could not save {out_file}: {e}")
print(f"Generated {len(df)} product sheets in {OUTPUT_DIR}")
Edge Cases & Variants
High-DPI Images from Design Tools
Designers export assets at 300 DPI for print. At native size, a 1200 x 900 px / 300 DPI image is 4 x 3 inches — fine for full-page layouts but too large for a logo slot. Always pass an explicit width argument. See Fix Images Too Large in python-docx for a helper that calculates the scaled size automatically.
Inserting Charts Generated by openpyxl
When your pipeline uses openpyxl to generate Excel charts, export each chart as an image via matplotlib or save the entire sheet view as PNG, then insert via BytesIO as shown in section 8.
Transparent PNG Backgrounds
python-docx preserves PNG transparency. The image will display with a transparent background in Word's default white document view. On colored section backgrounds, this renders correctly — no pre-flattening required.
Validation
After generating, confirm the image dimensions are within expected ranges:
# pip install python-docx
from pathlib import Path
from docx import Document
from docx.shared import Inches
GENERATED = Path("output/basic_image.docx")
try:
doc = Document(str(GENERATED))
shapes = doc.inline_shapes
print(f"Inline shapes found: {len(shapes)}")
for i, shape in enumerate(shapes):
w_in = shape.width / 914400 # EMU to inches
h_in = shape.height / 914400
print(f" Shape {i}: {w_in:.2f} x {h_in:.2f} inches")
assert w_in <= 8.5, f"Shape {i} wider than letter page"
except FileNotFoundError:
print(f"Document not found: {GENERATED}")
except AssertionError as e:
print(f"Validation failed: {e}")
One EMU = 1/914400 of an inch. shape.width and shape.height are always in EMU.
Performance & Scale Notes
add_picturereads the entire image file each call. For batch runs inserting the same logo in thousands of documents, read the image bytes once into aBytesIO, thenseek(0)before each insertion — this avoids repeated disk I/O.- python-docx holds the entire document XML in memory. For large batches, instantiate a new
Document()per output file rather than reusing a single instance; accumulated runs fragment the paragraph tree. - Very large images (>10 MB) embedded in
.docxfiles bloat file sizes significantly. Resize to the intended display size using Pillow before insertion — aim for ~150 DPI at the intended display dimensions.
Troubleshooting
| Error | Root cause | Fix |
|---|---|---|
PackageNotFoundError on add_picture | Path string is wrong or file does not exist | Use Path.exists() before calling; pass str(path) not a Path object on older python-docx versions |
| Image appears as a broken icon in Word | BytesIO position not reset to 0 | Call buf.seek(0) immediately before add_picture(buf, ...) |
| Image fills entire page / is huge | No width argument passed; python-docx used native EMU size | Always pass width=Inches(n) or width=usable_width — see fix guide |
KeyError: 'Caption' on add_paragraph(style="Caption") | Template lacks the Caption style | Use a standard Document() baseline, or add the style manually before use |
| Image inserted in wrong location | doc.add_picture() always appends to the document body | To insert mid-document, add a paragraph at the target position and call run.add_picture() on that paragraph's run |
Complete Working Script
#!/usr/bin/env python3
# pip install python-docx Pillow pandas
"""
batch_image_report.py — insert per-row images from a CSV manifest
into individual .docx files, fitting each image to the usable page width.
Usage: python batch_image_report.py --data products.csv --images assets/ --out output/
"""
import argparse
import io
from pathlib import Path
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
import pandas as pd
from PIL import Image as PILImage
def fit_width(img_path: Path, max_width_emu: int) -> int:
"""Return a width in EMU that fits within max_width_emu, preserving aspect ratio."""
try:
with PILImage.open(img_path) as img:
w, h = img.size
dpi = img.info.get("dpi", (96, 96))
native_emu = int(w / dpi[0] * 914400)
return min(native_emu, max_width_emu)
except Exception:
return min(int(Inches(4)), max_width_emu)
def make_report(row: dict, images_dir: Path, output_path: Path, placeholder: Path) -> None:
doc = Document()
section = doc.sections[0]
usable = section.page_width - section.left_margin - section.right_margin
doc.add_heading(str(row.get("name", "Report")), level=1)
for key in ("description", "notes"):
if row.get(key):
doc.add_paragraph(str(row[key]))
img_path = images_dir / f"{row.get('sku', 'unknown')}.png"
if not img_path.exists():
img_path = placeholder
try:
target_width = fit_width(img_path, usable)
doc.add_picture(str(img_path), width=target_width)
caption = doc.add_paragraph(f"Figure — {row.get('name', '')}", style="Caption")
caption.alignment = WD_ALIGN_PARAGRAPH.CENTER
except Exception as e:
doc.add_paragraph(f"[Image error: {e}]")
doc.save(str(output_path))
def main() -> None:
parser = argparse.ArgumentParser(description="Batch image report generator")
parser.add_argument("--data", required=True, help="Path to CSV file")
parser.add_argument("--images", required=True, help="Directory containing SKU PNG files")
parser.add_argument("--out", required=True, help="Output directory")
parser.add_argument("--placeholder", default="assets/placeholder.png")
args = parser.parse_args()
data_path = Path(args.data)
images_dir = Path(args.images)
output_dir = Path(args.out)
placeholder = Path(args.placeholder)
output_dir.mkdir(parents=True, exist_ok=True)
try:
df = pd.read_csv(data_path)
except FileNotFoundError:
raise SystemExit(f"Data file not found: {data_path}")
ok, fail = 0, 0
for _, row in df.iterrows():
out_file = output_dir / f"{row.get('sku', f'row_{_}')}.docx"
try:
make_report(row.to_dict(), images_dir, out_file, placeholder)
ok += 1
except Exception as e:
print(f"FAIL {out_file.name}: {e}")
fail += 1
print(f"Done: {ok} generated, {fail} failed → {output_dir}")
if __name__ == "__main__":
main()
Related
- Fix Images Too Large in python-docx — fix the default native-size overflow problem
- Automating Word Document Creation — full document generation pipeline with styles and tables
- Dynamic Mail Merge with Python — per-recipient templating where each output may carry a different logo or signature image