Watermarking and Securing PDFs

Visual watermarks handle branding and confidentiality labelling. Cryptographic controls handle access enforcement. Both belong at the end of any Automating PDF Extraction & Generation pipeline — after structural work such as Merging and Splitting PDF Documents is complete, and before the output reaches a recipient. Applying encryption mid-pipeline breaks merge operations and parsing steps; applying watermarks after encryption requires decryption first. Get the order right and both techniques compose cleanly.

This guide covers: generating ReportLab overlay templates, stamping pages with merge_page(), AES-256 encryption with writer.encrypt(), owner vs user passwords, permission flag bitmasks, and batch processing patterns.

Prerequisites

# pip install pypdf reportlab
pip install "pypdf>=3.17" "reportlab>=4.2"

You need at least one source PDF for testing. A minimal one-page file is sufficient for all examples here. Store it at ./input/source.pdf or adjust the path constants in the snippets.

Diagnostic Step: Inspect the PDF Before Applying Security

Before applying watermarks or encryption, verify the file's current state: is it already encrypted, what page size does it use, and does it contain form fields that watermarking might break?

# pip install pypdf
from pathlib import Path
from pypdf import PdfReader

SOURCE = Path("./input/source.pdf")

try:
    reader = PdfReader(SOURCE)
    page = reader.pages[0]
    # MediaBox gives the page dimensions in points (1 pt = 1/72 inch)
    media_box = page.mediabox
    print(f"Pages       : {len(reader.pages)}")
    print(f"Encrypted   : {reader.is_encrypted}")
    print(f"Page width  : {float(media_box.width):.1f} pt  ({float(media_box.width)/72:.2f} in)")
    print(f"Page height : {float(media_box.height):.1f} pt  ({float(media_box.height)/72:.2f} in)")
    print(f"Has AcroForm: {'/AcroForm' in reader.trailer.get('/Root', {})}")
except FileNotFoundError:
    print(f"File not found: {SOURCE}")

If is_encrypted is True, decrypt before watermarking — see Remove a Password from a PDF with Python. If the page is letter-sized (612 × 792 pt), the snippets below work without modification. For A4 (595 × 842 pt), swap the pagesize constant in the ReportLab call.

Security Layers: How Watermarking and Encryption Compose

PDF watermark and encryption pipeline Shows the three-stage pipeline: source PDF and ReportLab watermark template merge into a watermarked PDF, which then passes through pypdf encrypt() to produce the final secured PDF with user password, owner password, and permission flags. Stage 1: Overlay Stage 2: Encrypt Output Source PDF unprotected ReportLab watermark.pdf merge_page() PdfWriter Watermarked PDF (no lock) encrypt() AES-256 Secured PDF user password owner password permission flags Permission flags print_printing modify_content copy_content add_or_modify_annotations

Step 1: Generate a Watermark Template with ReportLab

ReportLab produces a single-page PDF with transparent text or graphics. This file is reused across every page in a batch — generate it once, open it once.

# pip install reportlab
from pathlib import Path
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter, A4

WATERMARK_PATH = Path("./tmp/watermark_template.pdf")

def create_text_watermark(
    text: str = "CONFIDENTIAL",
    pagesize: tuple = letter,   # swap to A4 for European documents
    alpha: float = 0.25,        # 0.1 = very faint, 0.5 = noticeable
    font_size: int = 52,
) -> Path:
    """Generate a diagonal text watermark and save to WATERMARK_PATH."""
    WATERMARK_PATH.parent.mkdir(parents=True, exist_ok=True)
    width, height = pagesize
    c = canvas.Canvas(str(WATERMARK_PATH), pagesize=pagesize)
    c.saveState()
    # Move origin to page centre, rotate 45°, draw centred string
    c.translate(width / 2, height / 2)
    c.rotate(45)
    c.setFillAlpha(alpha)
    c.setFont("Helvetica-Bold", font_size)
    c.setFillColorRGB(0.3, 0.3, 0.3)
    # drawCentredString centres on x=0 (the translated origin)
    c.drawCentredString(0, 0, text)
    c.restoreState()
    c.save()
    return WATERMARK_PATH

Keep alpha between 0.1 and 0.4. Above 0.4, the overlay obscures body text on documents with light backgrounds.

Step 2: Stamp Pages with merge_page()

PdfWriter.merge_page() composites the watermark PDF page on top of each content page using PDF transparency semantics. The watermark layer is stamped over the content; use merge_page(watermark, over=False) to push it under the content instead (useful for background logos).

# pip install pypdf reportlab
from pathlib import Path
from pypdf import PdfReader, PdfWriter

INPUT_PDF  = Path("./input/source.pdf")
OUTPUT_PDF = Path("./output/watermarked.pdf")
WATERMARK  = Path("./tmp/watermark_template.pdf")


def stamp_watermark(
    source: Path,
    watermark: Path,
    output: Path,
    under: bool = False,     # True = watermark behind content (background mode)
) -> None:
    """Merge a watermark template onto every page of source PDF."""
    output.parent.mkdir(parents=True, exist_ok=True)
    try:
        wm_reader = PdfReader(watermark)
        wm_page   = wm_reader.pages[0]

        reader = PdfReader(source)
        writer = PdfWriter()

        for page in reader.pages:
            if under:
                # Clone watermark page, stamp content on top
                wm_copy = PdfReader(watermark).pages[0]
                wm_copy.merge_page(page)
                writer.add_page(wm_copy)
            else:
                # Stamp watermark on top of content
                page.merge_page(wm_page)
                writer.add_page(page)

        with open(output, "wb") as fh:
            writer.write(fh)
        print(f"Watermarked: {output}")
    except FileNotFoundError as exc:
        print(f"Missing file: {exc}")
    except Exception as exc:
        print(f"Watermark failed: {exc}")


if __name__ == "__main__":
    stamp_watermark(INPUT_PDF, WATERMARK, OUTPUT_PDF)

Step 3: Encrypt with AES-256 and Set Permission Flags

PdfWriter.encrypt() accepts a permissions_flag integer built from the pypdf.generic.PermissionFlags constants (or a raw bitmask). The owner password bypasses all restrictions; the user password enforces them.

# pip install pypdf
from pathlib import Path
from pypdf import PdfReader, PdfWriter
from pypdf.generic import PermissionFlags

INPUT_PDF   = Path("./output/watermarked.pdf")
SECURED_PDF = Path("./output/secured.pdf")

# Bitmask: allow printing and annotations, deny content copy and modification
PERMISSIONS = (
    PermissionFlags.PRINT_PRINTING
    | PermissionFlags.ADD_OR_MODIFY_ANNOTATIONS
)


def encrypt_pdf(
    source: Path,
    output: Path,
    user_password: str,
    owner_password: str,
    permissions: int = PERMISSIONS,
    algorithm: str = "AES-256",
) -> None:
    """
    Encrypt source PDF and write to output.
    user_password  — required to open/view the document
    owner_password — grants full rights, overrides permission flags
    """
    output.parent.mkdir(parents=True, exist_ok=True)
    try:
        reader = PdfReader(source)
        writer = PdfWriter()
        for page in reader.pages:
            writer.add_page(page)

        writer.encrypt(
            user_password=user_password,
            owner_password=owner_password,
            permissions_flag=permissions,
            algorithm=algorithm,      # "AES-256" → PDF 2.0 compliant
        )

        with open(output, "wb") as fh:
            writer.write(fh)
        print(f"Encrypted ({algorithm}): {output}")
    except FileNotFoundError as exc:
        print(f"Source not found: {exc}")
    except Exception as exc:
        print(f"Encryption failed: {exc}")


if __name__ == "__main__":
    import os
    encrypt_pdf(
        INPUT_PDF,
        SECURED_PDF,
        user_password=os.environ["PDF_USER_PW"],    # never hardcode
        owner_password=os.environ["PDF_OWNER_PW"],
    )

Never hardcode passwords in source files. Pull them from environment variables or a secrets manager (AWS Secrets Manager, HashiCorp Vault, or even a local .env excluded from version control).

Owner vs User Password

Password typeWho holds itWhat it unlocks
User passwordEnd recipientOpen and view (within permission flags)
Owner passwordDocument author / adminAll operations; overrides every permission flag
No user passwordFile opens without password; flags still apply to non-owner opens

Setting user_password="" (empty string) leaves the document openable by anyone while still enforcing permission flags and requiring the owner password for editing. This is a common pattern for read-only distribution.

Permission Flags Reference

from pypdf.generic import PermissionFlags

# Common flag combinations
READ_ONLY  = 0                           # deny everything
PRINT_ONLY = PermissionFlags.PRINT_PRINTING
ANNOTATE   = (
    PermissionFlags.PRINT_PRINTING
    | PermissionFlags.ADD_OR_MODIFY_ANNOTATIONS
)
FULL_EDIT  = (
    PermissionFlags.PRINT_PRINTING
    | PermissionFlags.MODIFY_CONTENTS
    | PermissionFlags.COPY_CONTENT
    | PermissionFlags.ADD_OR_MODIFY_ANNOTATIONS
    | PermissionFlags.FILL_IN_EXISTING_FORM_FIELDS
    | PermissionFlags.EXTRACT_TEXT_AND_GRAPHICS
    | PermissionFlags.ASSEMBLE_DOCUMENT
    | PermissionFlags.PRINT_IN_HIGH_QUALITY
)

Edge Cases and Variants

Variant 1: Watermark + Encrypt in One Pass (in-memory)

Avoid writing an intermediate file by streaming through io.BytesIO. This matters when generating PDF reports dynamically and piping output directly to a secured response:

# pip install pypdf reportlab
import io, os
from pathlib import Path
from pypdf import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter


def watermark_then_encrypt(
    source_path: Path,
    output_path: Path,
    wm_text: str,
    user_pw: str,
    owner_pw: str,
) -> None:
    """Generate watermark in-memory and encrypt in a single pipeline."""
    # 1. Build watermark in a BytesIO buffer
    buf = io.BytesIO()
    width, height = letter
    c = canvas.Canvas(buf, pagesize=letter)
    c.saveState()
    c.translate(width / 2, height / 2)
    c.rotate(45)
    c.setFillAlpha(0.25)
    c.setFont("Helvetica-Bold", 52)
    c.setFillColorRGB(0.3, 0.3, 0.3)
    c.drawCentredString(0, 0, wm_text)
    c.restoreState()
    c.save()
    buf.seek(0)

    # 2. Merge watermark onto each page
    wm_page = PdfReader(buf).pages[0]
    reader  = PdfReader(source_path)
    writer  = PdfWriter()
    for page in reader.pages:
        page.merge_page(wm_page)
        writer.add_page(page)

    # 3. Encrypt without touching the merged intermediate
    writer.encrypt(
        user_password=user_pw,
        owner_password=owner_pw,
        algorithm="AES-256",
    )

    output_path.parent.mkdir(parents=True, exist_ok=True)
    with open(output_path, "wb") as fh:
        writer.write(fh)
    print(f"Done: {output_path}")

Variant 2: RC4 Legacy Mode for Older Readers

Some embedded systems (older MFP scanners, kiosk PDF viewers) reject AES-256. Fall back to RC4-128 only when interoperability requires it — RC4 is cryptographically weak and should not be used for compliance:

# pip install pypdf
from pathlib import Path
from pypdf import PdfReader, PdfWriter

def encrypt_rc4_legacy(source: Path, output: Path, user_pw: str, owner_pw: str) -> None:
    """RC4-128 for legacy reader compatibility only — not for compliance use."""
    reader = PdfReader(source)
    writer = PdfWriter()
    for page in reader.pages:
        writer.add_page(page)
    # "RC4-128" is accepted by pypdf but generates a PDF 1.4-compatible dict
    writer.encrypt(user_password=user_pw, owner_password=owner_pw, algorithm="RC4-128")
    with open(output, "wb") as fh:
        writer.write(fh)

Variant 3: Image Watermark (logo stamp)

For brand logos, draw a scaled image instead of text in the ReportLab canvas:

# pip install reportlab Pillow
from pathlib import Path
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib.utils import ImageReader

LOGO = Path("./assets/logo.png")
WM   = Path("./tmp/logo_watermark.pdf")


def create_image_watermark(logo: Path = LOGO, output: Path = WM, alpha: float = 0.2) -> None:
    width, height = letter
    c = canvas.Canvas(str(output), pagesize=letter)
    c.saveState()
    c.setFillAlpha(alpha)
    # Centre the logo; adjust width/height as needed
    logo_w, logo_h = 200, 80
    c.drawImage(
        ImageReader(str(logo)),
        (width - logo_w) / 2,
        (height - logo_h) / 2,
        width=logo_w,
        height=logo_h,
        mask="auto",        # honour PNG transparency
    )
    c.restoreState()
    c.save()

Validation

After applying watermark and encryption, verify both programmatically before delivery:

# pip install pypdf
from pathlib import Path
from pypdf import PdfReader

def validate_secured_pdf(path: Path, user_pw: str, expected_pages: int) -> bool:
    """Assert the file is encrypted, decrypts cleanly, and has correct page count."""
    try:
        reader = PdfReader(path)

        if not reader.is_encrypted:
            print(f"FAIL: {path.name} is not encrypted")
            return False

        result = reader.decrypt(user_pw)
        if result == 0:
            print(f"FAIL: wrong password for {path.name}")
            return False

        actual = len(reader.pages)
        if actual != expected_pages:
            print(f"FAIL: expected {expected_pages} pages, got {actual}")
            return False

        print(f"PASS: {path.name} — encrypted, {actual} pages")
        return True
    except Exception as exc:
        print(f"ERROR: {exc}")
        return False

reader.decrypt() returns 0 on failure, 1 for user-password success, and 2 for owner-password success.

Performance and Scale Notes

  • Generate the watermark template once per batch, not once per file. A ReportLab canvas render takes ~5–10 ms; multiplied across thousands of documents it adds up.
  • Re-open the watermark reader once per batch and reuse the page object — PdfReader is not expensive to keep open.
  • Use multiprocessing for large batches. pypdf operations are CPU-bound and release the GIL between pages, so concurrent.futures.ProcessPoolExecutor with max_workers=os.cpu_count() gives near-linear throughput gains.
  • Memory ceiling. Each PdfReader/PdfWriter pair holds the full page tree in memory. For files over ~200 MB, stream with pypdf's clone_reader_document_root or process in chunks.

Troubleshooting

ErrorRoot causeFix
PdfReadError: Stream has not been decryptedTrying to read pages from an encrypted file without calling decrypt()Call reader.decrypt(password) immediately after opening; check the return value
NotImplementedError: Encryption algorithm not supportedUsing PyPDF2 (unmaintained) or a version of pypdf older than 3.0pip install "pypdf>=3.17" and remove PyPDF2 from requirements.txt
Watermark text missing from outputmerge_page called but transparency not set; canvas not saved before .save()Verify c.saveState() / c.restoreState() wrap the drawing calls; check setFillAlpha value
Permission flags ignored by Adobe AcrobatFile encrypted with user/owner password the same valueAlways set owner and user passwords to different values; identical passwords disable flag enforcement in some readers
FileNotDecryptedError on decryptWrong password passed to reader.decrypt()Catch pypdf.errors.FileNotDecryptedError; surface a clear message rather than swallowing the exception

Complete Working Script

# pip install pypdf reportlab
"""
secure_pdfs.py — watermark and encrypt all PDFs in a directory.

Usage:
    PDF_USER_PW=viewer123 PDF_OWNER_PW=admin456 python secure_pdfs.py \
        --input ./raw --output ./secured --text "CONFIDENTIAL"
"""
import argparse
import io
import os
from pathlib import Path

from pypdf import PdfReader, PdfWriter
from pypdf.generic import PermissionFlags
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

PERMISSIONS = PermissionFlags.PRINT_PRINTING | PermissionFlags.ADD_OR_MODIFY_ANNOTATIONS


def build_watermark_buffer(text: str, alpha: float = 0.25) -> io.BytesIO:
    buf = io.BytesIO()
    width, height = letter
    c = canvas.Canvas(buf, pagesize=letter)
    c.saveState()
    c.translate(width / 2, height / 2)
    c.rotate(45)
    c.setFillAlpha(alpha)
    c.setFont("Helvetica-Bold", 52)
    c.setFillColorRGB(0.3, 0.3, 0.3)
    c.drawCentredString(0, 0, text)
    c.restoreState()
    c.save()
    buf.seek(0)
    return buf


def process_file(
    source: Path,
    output: Path,
    wm_page,          # pre-loaded watermark page object
    user_pw: str,
    owner_pw: str,
) -> bool:
    try:
        reader = PdfReader(source)
        writer = PdfWriter()
        for page in reader.pages:
            page.merge_page(wm_page)
            writer.add_page(page)
        writer.encrypt(
            user_password=user_pw,
            owner_password=owner_pw,
            permissions_flag=PERMISSIONS,
            algorithm="AES-256",
        )
        with open(output, "wb") as fh:
            writer.write(fh)
        print(f"  secured: {output.name}")
        return True
    except Exception as exc:
        print(f"  SKIP {source.name}: {exc}")
        return False


def main() -> None:
    parser = argparse.ArgumentParser(description="Watermark and encrypt PDFs")
    parser.add_argument("--input",  type=Path, default=Path("./input"),  help="Source directory")
    parser.add_argument("--output", type=Path, default=Path("./output"), help="Output directory")
    parser.add_argument("--text",   default="CONFIDENTIAL",               help="Watermark text")
    args = parser.parse_args()

    user_pw  = os.environ.get("PDF_USER_PW", "")
    owner_pw = os.environ.get("PDF_OWNER_PW")
    if not owner_pw:
        raise SystemExit("Set PDF_OWNER_PW environment variable")

    args.output.mkdir(parents=True, exist_ok=True)

    # Build watermark once for the entire batch
    wm_buf  = build_watermark_buffer(args.text)
    wm_page = PdfReader(wm_buf).pages[0]

    pdfs = sorted(args.input.glob("*.pdf"))
    print(f"Processing {len(pdfs)} file(s) from {args.input}")
    ok = sum(
        process_file(p, args.output / f"secure_{p.name}", wm_page, user_pw, owner_pw)
        for p in pdfs
    )
    print(f"Done: {ok}/{len(pdfs)} succeeded")


if __name__ == "__main__":
    main()

Part of Automating PDF Extraction & Generation.

Explore next