seitime-frappe/frappe/utils/pdf_generator/page.py
Corentin Forler cb8ac9b14f
fix: Get host URL when printing outside of request
Signed-off-by: Akhil Narang <me@akhilnarang.dev>
2025-11-14 12:52:29 +05:30

357 lines
12 KiB
Python

import base64
import time
import urllib
import frappe
from frappe.utils.pdf import get_host_url
"""
CDP commands documentation can be found here.
https://chromedevtools.github.io/devtools-protocol/
"""
class Page:
def __init__(self, session, browser_context_id, page_type):
self.session = session
result, error = self.session.send(
"Target.createTarget", {"url": "", "browserContextId": browser_context_id}
)
if error:
frappe.log_error(title="Error creating new page:", message=f"{error}")
self.target_id = result["targetId"]
self.type = page_type
result, error = self.session.send(
"Target.attachToTarget", {"targetId": self.target_id, "flatten": True}
)
if error:
raise RuntimeError(f"Error attaching to target: {error}")
self.session_id = result["sessionId"]
self.send("Page.enable")
self.frame_id = None
self.get_frame_id_on_demand()
self.set_media_emulation("print")
self.set_cookies()
# TODO: make send to return future and don't wait for it by default.
def send(self, method, params=None, return_future=False):
if params is None:
params = {}
return self.session.send(method, params, self.session_id, return_future)
def get_frame_id_on_demand(self):
if self.frame_id:
return self.frame_id
try:
result, error = self.send("Page.getFrameTree")
if error:
raise RuntimeError(f"Error fetching frameId: {error}")
frame_tree = result["frameTree"]
frame = frame_tree["frame"]
self.frame_id = frame["id"]
return self.frame_id
except Exception:
frappe.log_error(title="Error fetching frameId:", message=f"{frappe.get_traceback()}")
raise
def _ensure_frame_id(self):
if not self.frame_id:
self.get_frame_id_on_demand()
return self.frame_id
def set_media_emulation(self, media_type: str = "print"):
"""Set media emulation for the page."""
return self.send("Emulation.setEmulatedMedia", {"media": media_type})
def set_cookies(self):
if frappe.session and frappe.session.sid and hasattr(frappe.local, "request"):
domain = frappe.utils.get_host_name().split(":", 1)[0]
cookie = {
"name": "sid",
"value": frappe.session.sid,
"domain": domain,
"sameSite": "Strict",
}
_result, error = self.send("Network.enable")
if error:
raise RuntimeError(f"Error enabling network: {error}")
_result, error = self.send("Network.setCookie", cookie)
if error:
raise RuntimeError(f"Error setting cookie: {error}")
_result, error = self.send("Network.disable")
if error:
raise RuntimeError(f"Error disabling network: {error}")
def intercept_request_and_fulfill(self, url_pattern):
"""Starts intercepting network requests for the given target_id and URL pattern."""
data = {}
def on_request_paused_event(future, response):
"""Callback for when a request is paused (intercepted)."""
params = response.get("params")
if params and params.get("requestId"):
data["request_id"] = params["requestId"]
if not future.done():
future.set_result(data["request_id"])
# Start listening for requestPaused event
event = self.session.start_listener(
"Fetch.requestPaused", on_request_paused_event, self.session_id, self.target_id, self.frame_id
)
# Enable request interception for the specified URL pattern
self.session.send("Fetch.enable", {"patterns": [{"urlPattern": url_pattern}]})
def intercept_and_fulfill():
self.session.wait_for_event(event)
self.session.send(
"Fetch.fulfillRequest",
{"requestId": event[1].result(), "responseCode": 200},
return_future=True,
)
self.session.remove_listener("Fetch.requestPaused", event)
return intercept_and_fulfill
def intercept_request_for_local_resources(self, url_pattern="*"):
"""Starts intercepting network requests for the given target_id and URL pattern."""
data = {}
def on_request_paused_event(future, response):
"""Callback for when a request is paused (intercepted)."""
params = response.get("params")
if params and params.get("requestId"):
data["request_id"] = params["requestId"]
url = params["request"]["url"]
if url.startswith(get_host_url()):
path = url.replace(get_host_url(), "").split("?v", 1)[0]
if path.startswith("assets/") or path.startswith("files/"):
path = urllib.parse.unquote(path)
if path.startswith("files/"):
path = frappe.utils.get_site_path("public", path)
content = frappe.read_file(path, as_base64=True)
response_headers = []
# write logic to handle all file types as required
if path.endswith(".svg"):
response_headers.append({"name": "Content-Type", "value": "image/svg+xml"})
if content:
self.session.send(
"Fetch.fulfillRequest",
{
"requestId": data["request_id"],
"responseCode": 200, # actually hande the response code from the request
"responseHeaders": response_headers,
"body": content,
},
return_future=True,
)
return
self.session.send(
"Fetch.continueRequest",
{"requestId": data["request_id"]},
return_future=True,
)
# Start listening for requestPaused event
self.session.start_listener(
"Fetch.requestPaused", on_request_paused_event, self.session_id, self.target_id, self.frame_id
)
# Enable request interception for the specified URL pattern
self.session.send("Fetch.enable", {"patterns": [{"urlPattern": url_pattern}]})
def set_tab_url(self, url):
"""Navigate to a URL and fulfill the request with status code 200."""
# Intercept and fulfill request with 200 status code
wait_and_fulfill = self.intercept_request_and_fulfill(url)
# Now, navigate after intercepting the request
wait_start = self.wait_for_load(wait_for="load")
page_navigate = self.send("Page.navigate", {"url": url}, return_future=True)
wait_and_fulfill()
def wait_for_navigate():
self.session.wait_for_event(page_navigate, 3)
wait_start()
self.wait_for_navigate = wait_for_navigate
def evaluate(self, expression, await_promise=False):
self.send("Runtime.enable")
result, error = self.send(
"Runtime.evaluate", {"expression": expression, "awaitPromise": await_promise}
)
if error:
# retry if error in 500ms for 3 times (just safe guard as i had few edge cases where it failed).
# waiting for network is still slower than this.
for _i in range(3):
print(f"Error evaluating expression: {error}. Retrying in 500ms")
time.sleep(0.5)
result, error = self.send(
"Runtime.evaluate", {"expression": expression, "awaitPromise": await_promise}
)
if not error:
break
raise RuntimeError(f"Error evaluating expression: {error}")
self.send("Runtime.disable")
return result
# set wait_for to networkIdle if pdf is not rendering correctly.
# if you face header Height to be incorrect as some external script is changing elements.
# networkIdle is most stable option but make it a lot slower so avoiding for now. enable if not stable
def set_content(self, html, wait_for=None):
if not wait_for:
wait_for = ["load", "DOMContentLoaded"]
self.intercept_request_for_local_resources()
wait_start = self.wait_for_load(wait_for=wait_for)
self.send("Page.setDocumentContent", {"frameId": self._ensure_frame_id(), "html": html})
self.wait_for_set_content = wait_start
def wait_for_load(self, wait_for, timeout=60):
self.send("Page.setLifecycleEventsEnabled", {"enabled": True})
status = {}
if isinstance(wait_for, str):
status[wait_for] = False
if isinstance(wait_for, list):
for event in wait_for:
status[event] = False
def on_lifecycle_event(future, response):
params = response.get("params", {})
if params.get("name") in status.keys():
status[params.get("name")] = True
if all(status.values()):
if not future.done():
future.set_result(response)
event = self.session.start_listener(
"Page.lifecycleEvent", on_lifecycle_event, self.session_id, self.target_id, self.frame_id
)
def start_wait():
self.session.wait_for_event(event, timeout)
self.session.remove_listener("Page.lifecycleEvent", event)
return start_wait
def get_element_height(self, selector="body"):
try:
if not self.is_print_designer:
selector = ".wrapper"
self.send("DOM.enable")
doc_result, doc_error = self.send("DOM.getDocument")
if doc_error:
raise RuntimeError(f"Error getting document node: {doc_error}")
doc_node_id = doc_result["root"]["nodeId"]
result, error = self.send("DOM.querySelector", {"nodeId": doc_node_id, "selector": selector})
if error:
raise RuntimeError(f"Error querying selector: {error}")
node_id = result["nodeId"]
result, error = self.send("DOM.getBoxModel", {"nodeId": node_id})
if error:
raise RuntimeError(f"Error getting computed style: {error}")
height = result["model"]["height"]
finally:
self.send("DOM.disable")
return height
def add_page_size_css(self):
width = str(self.options["paperWidth"]) + "in"
height = str(self.options["paperHeight"]) + "in"
marginLeft = str(self.options["marginLeft"]) + "in"
marginRight = str(self.options["marginRight"]) + "in"
marginTop = str(self.options["marginTop"]) + "in"
marginBottom = str(self.options["marginBottom"]) + "in"
# Enable DOM and CSS agents
result, error = self.send("DOM.enable")
if error:
raise RuntimeError(f"Error enabling DOM: {error}")
result, error = self.send("CSS.enable")
if error:
raise RuntimeError(f"Error enabling CSS: {error}")
# Create a new stylesheet
result, error = self.send("CSS.createStyleSheet", {"frameId": self._ensure_frame_id()})
if error:
raise RuntimeError(f"Error creating stylesheet: {error}")
style_sheet_id = result["styleSheetId"]
# Define the CSS rule for the page size
css_rule = f"""
@page {{
size: {width} {height};
margin: {marginTop} {marginRight} {marginBottom} {marginLeft};
}}
"""
# Apply the CSS rule to the created stylesheet
result, error = self.send("CSS.setStyleSheetText", {"styleSheetId": style_sheet_id, "text": css_rule})
if error:
raise RuntimeError(f"Error setting stylesheet text: {error}")
self.send("CSS.disable")
self.send("DOM.disable")
def generate_pdf(self, wait_for_pdf=True, raw=False):
self.add_page_size_css()
if not wait_for_pdf:
self.wait_for_pdf = self.send("Page.printToPDF", self.options, return_future=True)
return
result, error = self.send("Page.printToPDF", self.options)
if error:
raise RuntimeError(f"Error generating PDF: {error}")
if "stream" not in result:
raise ValueError("Stream handle not returned from Page.printToPDF")
return self.get_pdf_from_stream(result["stream"], raw)
def get_pdf_stream_id(self):
# wait for task to complete
self.session.wait_for_event(self.wait_for_pdf)
# wait for event to complete
task = self.wait_for_pdf.result()
future = task.result()
stream_id = future["result"]["stream"]
return stream_id
def get_pdf_from_stream(self, stream_id, raw=False):
from io import BytesIO
from pypdf import PdfReader
pdf_data = b""
offset = 0
while True:
chunk_result, error = self.send("IO.read", {"handle": stream_id, "offset": offset, "size": 4096})
if error:
raise RuntimeError(f"Error reading PDF chunk: {error}")
chunk_data = chunk_result["data"]
# we don't use base64Encode option but added check anyway as it is one of the valid options.
if chunk_result.get("base64Encoded", False):
chunk_data = base64.b64decode(chunk_data)
pdf_data += chunk_data
offset += len(chunk_data)
if chunk_result.get("eof", False):
break
_result, error = self.send("IO.close", {"handle": stream_id})
if error:
raise RuntimeError(f"Error closing PDF stream: {error}")
if raw:
return pdf_data
return PdfReader(BytesIO(pdf_data))
def close(self):
self.session.send("Fetch.disable")
_result, error = self.send("Target.closeTarget", {"targetId": self.target_id})
if error:
raise RuntimeError(f"Error closing target: {error}")