Merge pull request #25659 from marination/wkhtml-options
fix: Use CssParser to correctly pass options to wkhtmltopdf
This commit is contained in:
commit
722bb3b0fc
3 changed files with 69 additions and 13 deletions
|
|
@ -37,9 +37,32 @@ class TestPdf(FrappeTestCase):
|
|||
def test_read_options_from_html(self):
|
||||
_, html_options = pdfgen.read_options_from_html(self.html)
|
||||
self.assertTrue(html_options["margin-top"] == "0")
|
||||
self.assertTrue(html_options["margin-left"] == "10")
|
||||
self.assertTrue(html_options["margin-left"] == "10mm")
|
||||
self.assertTrue(html_options["margin-right"] == "0")
|
||||
|
||||
html_1 = """<style>
|
||||
.print-format {
|
||||
margin-top: 0mm;
|
||||
margin-left: 10mm;
|
||||
}
|
||||
.print-format .more-info {
|
||||
margin-right: 15mm;
|
||||
}
|
||||
.print-format, .more-info {
|
||||
margin-bottom: 20mm;
|
||||
}
|
||||
</style>
|
||||
<div class="more-info">Hello</div>
|
||||
"""
|
||||
_, options = pdfgen.read_options_from_html(html_1)
|
||||
|
||||
self.assertTrue(options["margin-top"] == "0")
|
||||
self.assertTrue(options["margin-left"] == "10mm")
|
||||
self.assertTrue(options["margin-bottom"] == "20mm")
|
||||
# margin-right was for .more-info (child of .print-format)
|
||||
# so it should not be extracted into options
|
||||
self.assertFalse(options.get("margin-right"))
|
||||
|
||||
def test_pdf_encryption(self):
|
||||
password = "qwe"
|
||||
pdf = pdfgen.get_pdf(self.html, options={"password": password})
|
||||
|
|
|
|||
|
|
@ -5,10 +5,10 @@ import contextlib
|
|||
import io
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import cssutils
|
||||
import pdfkit
|
||||
from bs4 import BeautifulSoup
|
||||
from packaging.version import Version
|
||||
|
|
@ -206,8 +206,9 @@ def read_options_from_html(html):
|
|||
|
||||
toggle_visible_pdf(soup)
|
||||
|
||||
# use regex instead of soup-parser
|
||||
for attr in (
|
||||
valid_styles = get_print_format_styles(soup)
|
||||
|
||||
attrs = (
|
||||
"margin-top",
|
||||
"margin-bottom",
|
||||
"margin-left",
|
||||
|
|
@ -217,18 +218,49 @@ def read_options_from_html(html):
|
|||
"orientation",
|
||||
"page-width",
|
||||
"page-height",
|
||||
):
|
||||
try:
|
||||
pattern = re.compile(r"(\.print-format)([\S|\s][^}]*?)(" + str(attr) + r":)(.+)(mm;)")
|
||||
match = pattern.findall(html)
|
||||
if match:
|
||||
options[attr] = str(match[-1][3]).strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
)
|
||||
options |= {style.name: style.value for style in valid_styles if style.name in attrs}
|
||||
return str(soup), options
|
||||
|
||||
|
||||
def get_print_format_styles(soup: BeautifulSoup) -> list[cssutils.css.Property]:
|
||||
"""
|
||||
Get styles purely on class 'print-format'.
|
||||
Valid:
|
||||
1) .print-format { ... }
|
||||
2) .print-format, p { ... } | p, .print-format { ... }
|
||||
|
||||
Invalid (applied on child elements):
|
||||
1) .print-format p { ... } | .print-format > p { ... }
|
||||
2) .print-format #abc { ... }
|
||||
|
||||
Returns:
|
||||
[cssutils.css.Property(name='margin-top', value='50mm', priority=''), ...]
|
||||
"""
|
||||
stylesheet = ""
|
||||
style_tags = soup.find_all("style")
|
||||
|
||||
# Prepare a css stylesheet from all the style tags' contents
|
||||
for style_tag in style_tags:
|
||||
stylesheet += style_tag.string
|
||||
|
||||
# Use css parser to tokenize the classes and their styles
|
||||
parsed_sheet = cssutils.parseString(stylesheet)
|
||||
|
||||
# Get all styles that are only for .print-format
|
||||
valid_styles = []
|
||||
for rule in parsed_sheet:
|
||||
if not isinstance(rule, cssutils.css.CSSStyleRule):
|
||||
continue
|
||||
|
||||
# Allow only .print-format { ... } and .print-format, p { ... }
|
||||
# Disallow .print-format p { ... } and .print-format > p { ... }
|
||||
if ".print-format" in [x.strip() for x in rule.selectorText.split(",")]:
|
||||
valid_styles.extend(entry for entry in rule.style)
|
||||
|
||||
return valid_styles
|
||||
|
||||
|
||||
def inline_private_images(html) -> str:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
for img in soup.find_all("img"):
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ dependencies = [
|
|||
"chardet~=5.1.0",
|
||||
"croniter~=2.0.1",
|
||||
"cryptography~=42.0.0",
|
||||
"cssutils~=2.9.0",
|
||||
"email-reply-parser~=0.5.12",
|
||||
"gunicorn~=21.2.0",
|
||||
"html5lib~=1.1",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue