diff --git a/frappe/tests/test_pdf.py b/frappe/tests/test_pdf.py
index fb68cdffe4..4ae8e8ff3c 100644
--- a/frappe/tests/test_pdf.py
+++ b/frappe/tests/test_pdf.py
@@ -37,9 +37,32 @@ class TestPdf(FrappeTestCase):
def test_read_options_from_html(self):
_, html_options = pdfgen.read_options_from_html(self.html)
self.assertTrue(html_options["margin-top"] == "0")
- self.assertTrue(html_options["margin-left"] == "10")
+ self.assertTrue(html_options["margin-left"] == "10mm")
self.assertTrue(html_options["margin-right"] == "0")
+ html_1 = """
+
Hello
+ """
+ _, options = pdfgen.read_options_from_html(html_1)
+
+ self.assertTrue(options["margin-top"] == "0")
+ self.assertTrue(options["margin-left"] == "10mm")
+ self.assertTrue(options["margin-bottom"] == "20mm")
+ # margin-right was for .more-info (child of .print-format)
+ # so it should not be extracted into options
+ self.assertFalse(options.get("margin-right"))
+
def test_pdf_encryption(self):
password = "qwe"
pdf = pdfgen.get_pdf(self.html, options={"password": password})
diff --git a/frappe/utils/pdf.py b/frappe/utils/pdf.py
index bed32987d1..7325f87201 100644
--- a/frappe/utils/pdf.py
+++ b/frappe/utils/pdf.py
@@ -5,10 +5,10 @@ import contextlib
import io
import mimetypes
import os
-import re
import subprocess
from urllib.parse import parse_qs, urlparse
+import cssutils
import pdfkit
from bs4 import BeautifulSoup
from packaging.version import Version
@@ -206,8 +206,9 @@ def read_options_from_html(html):
toggle_visible_pdf(soup)
- # use regex instead of soup-parser
- for attr in (
+ valid_styles = get_print_format_styles(soup)
+
+ attrs = (
"margin-top",
"margin-bottom",
"margin-left",
@@ -217,18 +218,49 @@ def read_options_from_html(html):
"orientation",
"page-width",
"page-height",
- ):
- try:
- pattern = re.compile(r"(\.print-format)([\S|\s][^}]*?)(" + str(attr) + r":)(.+)(mm;)")
- match = pattern.findall(html)
- if match:
- options[attr] = str(match[-1][3]).strip()
- except Exception:
- pass
-
+ )
+ options |= {style.name: style.value for style in valid_styles if style.name in attrs}
return str(soup), options
+def get_print_format_styles(soup: BeautifulSoup) -> list[cssutils.css.Property]:
+ """
+ Get styles purely on class 'print-format'.
+ Valid:
+ 1) .print-format { ... }
+ 2) .print-format, p { ... } | p, .print-format { ... }
+
+ Invalid (applied on child elements):
+ 1) .print-format p { ... } | .print-format > p { ... }
+ 2) .print-format #abc { ... }
+
+ Returns:
+ [cssutils.css.Property(name='margin-top', value='50mm', priority=''), ...]
+ """
+ stylesheet = ""
+ style_tags = soup.find_all("style")
+
+ # Prepare a css stylesheet from all the style tags' contents
+ for style_tag in style_tags:
+ stylesheet += style_tag.string
+
+ # Use css parser to tokenize the classes and their styles
+ parsed_sheet = cssutils.parseString(stylesheet)
+
+ # Get all styles that are only for .print-format
+ valid_styles = []
+ for rule in parsed_sheet:
+ if not isinstance(rule, cssutils.css.CSSStyleRule):
+ continue
+
+ # Allow only .print-format { ... } and .print-format, p { ... }
+ # Disallow .print-format p { ... } and .print-format > p { ... }
+ if ".print-format" in [x.strip() for x in rule.selectorText.split(",")]:
+ valid_styles.extend(entry for entry in rule.style)
+
+ return valid_styles
+
+
def inline_private_images(html) -> str:
soup = BeautifulSoup(html, "html.parser")
for img in soup.find_all("img"):
diff --git a/pyproject.toml b/pyproject.toml
index 2e2a7948e6..14289d1a72 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,6 +34,7 @@ dependencies = [
"chardet~=5.1.0",
"croniter~=2.0.1",
"cryptography~=42.0.0",
+ "cssutils~=2.9.0",
"email-reply-parser~=0.5.12",
"gunicorn~=21.2.0",
"html5lib~=1.1",