fix: don't try to scan encrypted PDFs

Also fix reversed logic of JS check Signed-off-by: Akhil Narang <me@akhilnarang.dev>
2025-09-10 15:40:54 +05:30 · 2025-09-10 15:40:54 +05:30 · 36ccf32ab3
commit 36ccf32ab3
parent 38a7ab4be8
2 changed files with 21 additions and 9 deletions
--- a/frappe/core/doctype/file/file.py
+++ b/frappe/core/doctype/file/file.py
@ -390,8 +390,8 @@ class File(Document):
 			)

 	def check_content(self):
-		if self.file_type == "PDF" and self._content and not pdf_contains_js(self._content):
-			frappe.throw(_("PDF cannot be uploaded, It contains unsafe content"))
+		if self.file_type == "PDF" and self._content and pdf_contains_js(self._content):
+			frappe.throw(_("This PDF cannot be uploaded as it contains unsafe content."))

 	def validate_duplicate_entry(self):
 		if not self.flags.ignore_duplicate_entry_error and not self.is_folder:
--- a/frappe/utils/pdf.py
+++ b/frappe/utils/pdf.py
@ -14,7 +14,7 @@ import pdfkit
 pdfkit.source.unicode = str  # NOTE: upstream bug; PYTHONOPTIMIZE=1 optimized this away
 from bs4 import BeautifulSoup
 from packaging.version import Version
-from pypdf import PdfReader, PdfWriter
+from pypdf import PdfReader, PdfWriter, errors

 import frappe
 from frappe import _
@ -386,7 +386,16 @@ def get_wkhtmltopdf_version():
 	return wkhtmltopdf_version or "0"


-def pdf_contains_js(file_content):
+def pdf_contains_js(file_content: bytes):
+	"""
+	Check if a PDF file contains JavaScript.
+
+	Args:
+		file_content (bytes): The content of the PDF file.
+
+	Returns:
+		bool: True if the PDF contains JavaScript, False otherwise and also if the file is encrypted.
+	"""
 	from io import BytesIO

 	reader = PdfReader(BytesIO(file_content))
@ -406,10 +415,13 @@ def pdf_contains_js(file_content):

 	root = reader.trailer.get("/Root", {})
 	if has_javascript(root):
-		return False
+		return True

-	for page in reader.pages:
-		if has_javascript(page):
-			return False
+	try:
+		for page in reader.pages:
+			if has_javascript(page):
+				return True
+	except errors.FileNotDecryptedError:
+		pass

-	return True
+	return False