Watermarking and Securing PDFs
Visual watermarks handle branding and confidentiality labelling. Cryptographic controls handle access enforcement. Both belong at the end of any Automating PDF Extraction & Generation pipeline — after structural work such as Merging and Splitting PDF Documents is complete, and before the output reaches a recipient. Applying encryption mid-pipeline breaks merge operations and parsing steps; applying watermarks after encryption requires decryption first. Get the order right and both techniques compose cleanly.
This guide covers: generating ReportLab overlay templates, stamping pages with merge_page(), AES-256 encryption with writer.encrypt(), owner vs user passwords, permission flag bitmasks, and batch processing patterns.
Prerequisites
# pip install pypdf reportlab
pip install "pypdf>=3.17" "reportlab>=4.2"
You need at least one source PDF for testing. A minimal one-page file is sufficient for all examples here. Store it at ./input/source.pdf or adjust the path constants in the snippets.
Diagnostic Step: Inspect the PDF Before Applying Security
Before applying watermarks or encryption, verify the file's current state: is it already encrypted, what page size does it use, and does it contain form fields that watermarking might break?
# pip install pypdf
from pathlib import Path
from pypdf import PdfReader
SOURCE = Path("./input/source.pdf")
try:
reader = PdfReader(SOURCE)
page = reader.pages[0]
# MediaBox gives the page dimensions in points (1 pt = 1/72 inch)
media_box = page.mediabox
print(f"Pages : {len(reader.pages)}")
print(f"Encrypted : {reader.is_encrypted}")
print(f"Page width : {float(media_box.width):.1f} pt ({float(media_box.width)/72:.2f} in)")
print(f"Page height : {float(media_box.height):.1f} pt ({float(media_box.height)/72:.2f} in)")
print(f"Has AcroForm: {'/AcroForm' in reader.trailer.get('/Root', {})}")
except FileNotFoundError:
print(f"File not found: {SOURCE}")
If is_encrypted is True, decrypt before watermarking — see Remove a Password from a PDF with Python. If the page is letter-sized (612 × 792 pt), the snippets below work without modification. For A4 (595 × 842 pt), swap the pagesize constant in the ReportLab call.
Security Layers: How Watermarking and Encryption Compose
Step 1: Generate a Watermark Template with ReportLab
ReportLab produces a single-page PDF with transparent text or graphics. This file is reused across every page in a batch — generate it once, open it once.
# pip install reportlab
from pathlib import Path
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter, A4
WATERMARK_PATH = Path("./tmp/watermark_template.pdf")
def create_text_watermark(
text: str = "CONFIDENTIAL",
pagesize: tuple = letter, # swap to A4 for European documents
alpha: float = 0.25, # 0.1 = very faint, 0.5 = noticeable
font_size: int = 52,
) -> Path:
"""Generate a diagonal text watermark and save to WATERMARK_PATH."""
WATERMARK_PATH.parent.mkdir(parents=True, exist_ok=True)
width, height = pagesize
c = canvas.Canvas(str(WATERMARK_PATH), pagesize=pagesize)
c.saveState()
# Move origin to page centre, rotate 45°, draw centred string
c.translate(width / 2, height / 2)
c.rotate(45)
c.setFillAlpha(alpha)
c.setFont("Helvetica-Bold", font_size)
c.setFillColorRGB(0.3, 0.3, 0.3)
# drawCentredString centres on x=0 (the translated origin)
c.drawCentredString(0, 0, text)
c.restoreState()
c.save()
return WATERMARK_PATH
Keep alpha between 0.1 and 0.4. Above 0.4, the overlay obscures body text on documents with light backgrounds.
Step 2: Stamp Pages with merge_page()
PdfWriter.merge_page() composites the watermark PDF page on top of each content page using PDF transparency semantics. The watermark layer is stamped over the content; use merge_page(watermark, over=False) to push it under the content instead (useful for background logos).
# pip install pypdf reportlab
from pathlib import Path
from pypdf import PdfReader, PdfWriter
INPUT_PDF = Path("./input/source.pdf")
OUTPUT_PDF = Path("./output/watermarked.pdf")
WATERMARK = Path("./tmp/watermark_template.pdf")
def stamp_watermark(
source: Path,
watermark: Path,
output: Path,
under: bool = False, # True = watermark behind content (background mode)
) -> None:
"""Merge a watermark template onto every page of source PDF."""
output.parent.mkdir(parents=True, exist_ok=True)
try:
wm_reader = PdfReader(watermark)
wm_page = wm_reader.pages[0]
reader = PdfReader(source)
writer = PdfWriter()
for page in reader.pages:
if under:
# Clone watermark page, stamp content on top
wm_copy = PdfReader(watermark).pages[0]
wm_copy.merge_page(page)
writer.add_page(wm_copy)
else:
# Stamp watermark on top of content
page.merge_page(wm_page)
writer.add_page(page)
with open(output, "wb") as fh:
writer.write(fh)
print(f"Watermarked: {output}")
except FileNotFoundError as exc:
print(f"Missing file: {exc}")
except Exception as exc:
print(f"Watermark failed: {exc}")
if __name__ == "__main__":
stamp_watermark(INPUT_PDF, WATERMARK, OUTPUT_PDF)
Step 3: Encrypt with AES-256 and Set Permission Flags
PdfWriter.encrypt() accepts a permissions_flag integer built from the pypdf.generic.PermissionFlags constants (or a raw bitmask). The owner password bypasses all restrictions; the user password enforces them.
# pip install pypdf
from pathlib import Path
from pypdf import PdfReader, PdfWriter
from pypdf.generic import PermissionFlags
INPUT_PDF = Path("./output/watermarked.pdf")
SECURED_PDF = Path("./output/secured.pdf")
# Bitmask: allow printing and annotations, deny content copy and modification
PERMISSIONS = (
PermissionFlags.PRINT_PRINTING
| PermissionFlags.ADD_OR_MODIFY_ANNOTATIONS
)
def encrypt_pdf(
source: Path,
output: Path,
user_password: str,
owner_password: str,
permissions: int = PERMISSIONS,
algorithm: str = "AES-256",
) -> None:
"""
Encrypt source PDF and write to output.
user_password — required to open/view the document
owner_password — grants full rights, overrides permission flags
"""
output.parent.mkdir(parents=True, exist_ok=True)
try:
reader = PdfReader(source)
writer = PdfWriter()
for page in reader.pages:
writer.add_page(page)
writer.encrypt(
user_password=user_password,
owner_password=owner_password,
permissions_flag=permissions,
algorithm=algorithm, # "AES-256" → PDF 2.0 compliant
)
with open(output, "wb") as fh:
writer.write(fh)
print(f"Encrypted ({algorithm}): {output}")
except FileNotFoundError as exc:
print(f"Source not found: {exc}")
except Exception as exc:
print(f"Encryption failed: {exc}")
if __name__ == "__main__":
import os
encrypt_pdf(
INPUT_PDF,
SECURED_PDF,
user_password=os.environ["PDF_USER_PW"], # never hardcode
owner_password=os.environ["PDF_OWNER_PW"],
)
Never hardcode passwords in source files. Pull them from environment variables or a secrets manager (AWS Secrets Manager, HashiCorp Vault, or even a local .env excluded from version control).
Owner vs User Password
| Password type | Who holds it | What it unlocks |
|---|---|---|
| User password | End recipient | Open and view (within permission flags) |
| Owner password | Document author / admin | All operations; overrides every permission flag |
| No user password | — | File opens without password; flags still apply to non-owner opens |
Setting user_password="" (empty string) leaves the document openable by anyone while still enforcing permission flags and requiring the owner password for editing. This is a common pattern for read-only distribution.
Permission Flags Reference
from pypdf.generic import PermissionFlags
# Common flag combinations
READ_ONLY = 0 # deny everything
PRINT_ONLY = PermissionFlags.PRINT_PRINTING
ANNOTATE = (
PermissionFlags.PRINT_PRINTING
| PermissionFlags.ADD_OR_MODIFY_ANNOTATIONS
)
FULL_EDIT = (
PermissionFlags.PRINT_PRINTING
| PermissionFlags.MODIFY_CONTENTS
| PermissionFlags.COPY_CONTENT
| PermissionFlags.ADD_OR_MODIFY_ANNOTATIONS
| PermissionFlags.FILL_IN_EXISTING_FORM_FIELDS
| PermissionFlags.EXTRACT_TEXT_AND_GRAPHICS
| PermissionFlags.ASSEMBLE_DOCUMENT
| PermissionFlags.PRINT_IN_HIGH_QUALITY
)
Edge Cases and Variants
Variant 1: Watermark + Encrypt in One Pass (in-memory)
Avoid writing an intermediate file by streaming through io.BytesIO. This matters when generating PDF reports dynamically and piping output directly to a secured response:
# pip install pypdf reportlab
import io, os
from pathlib import Path
from pypdf import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
def watermark_then_encrypt(
source_path: Path,
output_path: Path,
wm_text: str,
user_pw: str,
owner_pw: str,
) -> None:
"""Generate watermark in-memory and encrypt in a single pipeline."""
# 1. Build watermark in a BytesIO buffer
buf = io.BytesIO()
width, height = letter
c = canvas.Canvas(buf, pagesize=letter)
c.saveState()
c.translate(width / 2, height / 2)
c.rotate(45)
c.setFillAlpha(0.25)
c.setFont("Helvetica-Bold", 52)
c.setFillColorRGB(0.3, 0.3, 0.3)
c.drawCentredString(0, 0, wm_text)
c.restoreState()
c.save()
buf.seek(0)
# 2. Merge watermark onto each page
wm_page = PdfReader(buf).pages[0]
reader = PdfReader(source_path)
writer = PdfWriter()
for page in reader.pages:
page.merge_page(wm_page)
writer.add_page(page)
# 3. Encrypt without touching the merged intermediate
writer.encrypt(
user_password=user_pw,
owner_password=owner_pw,
algorithm="AES-256",
)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "wb") as fh:
writer.write(fh)
print(f"Done: {output_path}")
Variant 2: RC4 Legacy Mode for Older Readers
Some embedded systems (older MFP scanners, kiosk PDF viewers) reject AES-256. Fall back to RC4-128 only when interoperability requires it — RC4 is cryptographically weak and should not be used for compliance:
# pip install pypdf
from pathlib import Path
from pypdf import PdfReader, PdfWriter
def encrypt_rc4_legacy(source: Path, output: Path, user_pw: str, owner_pw: str) -> None:
"""RC4-128 for legacy reader compatibility only — not for compliance use."""
reader = PdfReader(source)
writer = PdfWriter()
for page in reader.pages:
writer.add_page(page)
# "RC4-128" is accepted by pypdf but generates a PDF 1.4-compatible dict
writer.encrypt(user_password=user_pw, owner_password=owner_pw, algorithm="RC4-128")
with open(output, "wb") as fh:
writer.write(fh)
Variant 3: Image Watermark (logo stamp)
For brand logos, draw a scaled image instead of text in the ReportLab canvas:
# pip install reportlab Pillow
from pathlib import Path
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib.utils import ImageReader
LOGO = Path("./assets/logo.png")
WM = Path("./tmp/logo_watermark.pdf")
def create_image_watermark(logo: Path = LOGO, output: Path = WM, alpha: float = 0.2) -> None:
width, height = letter
c = canvas.Canvas(str(output), pagesize=letter)
c.saveState()
c.setFillAlpha(alpha)
# Centre the logo; adjust width/height as needed
logo_w, logo_h = 200, 80
c.drawImage(
ImageReader(str(logo)),
(width - logo_w) / 2,
(height - logo_h) / 2,
width=logo_w,
height=logo_h,
mask="auto", # honour PNG transparency
)
c.restoreState()
c.save()
Validation
After applying watermark and encryption, verify both programmatically before delivery:
# pip install pypdf
from pathlib import Path
from pypdf import PdfReader
def validate_secured_pdf(path: Path, user_pw: str, expected_pages: int) -> bool:
"""Assert the file is encrypted, decrypts cleanly, and has correct page count."""
try:
reader = PdfReader(path)
if not reader.is_encrypted:
print(f"FAIL: {path.name} is not encrypted")
return False
result = reader.decrypt(user_pw)
if result == 0:
print(f"FAIL: wrong password for {path.name}")
return False
actual = len(reader.pages)
if actual != expected_pages:
print(f"FAIL: expected {expected_pages} pages, got {actual}")
return False
print(f"PASS: {path.name} — encrypted, {actual} pages")
return True
except Exception as exc:
print(f"ERROR: {exc}")
return False
reader.decrypt() returns 0 on failure, 1 for user-password success, and 2 for owner-password success.
Performance and Scale Notes
- Generate the watermark template once per batch, not once per file. A ReportLab canvas render takes ~5–10 ms; multiplied across thousands of documents it adds up.
- Re-open the watermark reader once per batch and reuse the page object —
PdfReaderis not expensive to keep open. - Use multiprocessing for large batches.
pypdfoperations are CPU-bound and release the GIL between pages, soconcurrent.futures.ProcessPoolExecutorwithmax_workers=os.cpu_count()gives near-linear throughput gains. - Memory ceiling. Each
PdfReader/PdfWriterpair holds the full page tree in memory. For files over ~200 MB, stream withpypdf'sclone_reader_document_rootor process in chunks.
Troubleshooting
| Error | Root cause | Fix |
|---|---|---|
PdfReadError: Stream has not been decrypted | Trying to read pages from an encrypted file without calling decrypt() | Call reader.decrypt(password) immediately after opening; check the return value |
NotImplementedError: Encryption algorithm not supported | Using PyPDF2 (unmaintained) or a version of pypdf older than 3.0 | pip install "pypdf>=3.17" and remove PyPDF2 from requirements.txt |
| Watermark text missing from output | merge_page called but transparency not set; canvas not saved before .save() | Verify c.saveState() / c.restoreState() wrap the drawing calls; check setFillAlpha value |
| Permission flags ignored by Adobe Acrobat | File encrypted with user/owner password the same value | Always set owner and user passwords to different values; identical passwords disable flag enforcement in some readers |
FileNotDecryptedError on decrypt | Wrong password passed to reader.decrypt() | Catch pypdf.errors.FileNotDecryptedError; surface a clear message rather than swallowing the exception |
Complete Working Script
# pip install pypdf reportlab
"""
secure_pdfs.py — watermark and encrypt all PDFs in a directory.
Usage:
PDF_USER_PW=viewer123 PDF_OWNER_PW=admin456 python secure_pdfs.py \
--input ./raw --output ./secured --text "CONFIDENTIAL"
"""
import argparse
import io
import os
from pathlib import Path
from pypdf import PdfReader, PdfWriter
from pypdf.generic import PermissionFlags
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
PERMISSIONS = PermissionFlags.PRINT_PRINTING | PermissionFlags.ADD_OR_MODIFY_ANNOTATIONS
def build_watermark_buffer(text: str, alpha: float = 0.25) -> io.BytesIO:
buf = io.BytesIO()
width, height = letter
c = canvas.Canvas(buf, pagesize=letter)
c.saveState()
c.translate(width / 2, height / 2)
c.rotate(45)
c.setFillAlpha(alpha)
c.setFont("Helvetica-Bold", 52)
c.setFillColorRGB(0.3, 0.3, 0.3)
c.drawCentredString(0, 0, text)
c.restoreState()
c.save()
buf.seek(0)
return buf
def process_file(
source: Path,
output: Path,
wm_page, # pre-loaded watermark page object
user_pw: str,
owner_pw: str,
) -> bool:
try:
reader = PdfReader(source)
writer = PdfWriter()
for page in reader.pages:
page.merge_page(wm_page)
writer.add_page(page)
writer.encrypt(
user_password=user_pw,
owner_password=owner_pw,
permissions_flag=PERMISSIONS,
algorithm="AES-256",
)
with open(output, "wb") as fh:
writer.write(fh)
print(f" secured: {output.name}")
return True
except Exception as exc:
print(f" SKIP {source.name}: {exc}")
return False
def main() -> None:
parser = argparse.ArgumentParser(description="Watermark and encrypt PDFs")
parser.add_argument("--input", type=Path, default=Path("./input"), help="Source directory")
parser.add_argument("--output", type=Path, default=Path("./output"), help="Output directory")
parser.add_argument("--text", default="CONFIDENTIAL", help="Watermark text")
args = parser.parse_args()
user_pw = os.environ.get("PDF_USER_PW", "")
owner_pw = os.environ.get("PDF_OWNER_PW")
if not owner_pw:
raise SystemExit("Set PDF_OWNER_PW environment variable")
args.output.mkdir(parents=True, exist_ok=True)
# Build watermark once for the entire batch
wm_buf = build_watermark_buffer(args.text)
wm_page = PdfReader(wm_buf).pages[0]
pdfs = sorted(args.input.glob("*.pdf"))
print(f"Processing {len(pdfs)} file(s) from {args.input}")
ok = sum(
process_file(p, args.output / f"secure_{p.name}", wm_page, user_pw, owner_pw)
for p in pdfs
)
print(f"Done: {ok}/{len(pdfs)} succeeded")
if __name__ == "__main__":
main()
Related
- Add Password Protection to PDF Files — detailed AES-256 encryption workflow with validation
- Remove a Password from a PDF with Python — decrypt an authorized PDF and save an unencrypted copy
- Merging and Splitting PDF Documents — complete structural edits before applying security
- Generating PDF Reports Dynamically — pipe generated output directly into the watermark+encrypt step