Merge pull request #33675 from sokumon/pdf-issue

fix: scan pdfs before uploading
This commit is contained in:
Soham Kulkarni 2025-09-02 18:41:13 +05:30 committed by GitHub
commit b7129419ef
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 36 additions and 2 deletions

View file

@ -26,6 +26,7 @@ from frappe.utils import (
)
from frappe.utils.file_manager import is_safe_path
from frappe.utils.image import optimize_image, strip_exif_data
from frappe.utils.pdf import pdf_contains_js
from .exceptions import (
AttachmentLimitReached,
@ -137,8 +138,8 @@ class File(Document):
self.validate_file_path()
self.validate_file_url()
self.validate_file_on_disk()
self.file_size = frappe.form_dict.file_size or self.file_size
self.check_content()
def validate_attachment_references(self):
if not self.attached_to_doctype:
@ -388,6 +389,10 @@ class File(Document):
exc=FileTypeNotAllowed,
)
def check_content(self):
if self.file_type == "PDF" and not pdf_contains_js(self._content):
frappe.throw(_("PDF cannot be uploaded, It contains unsafe content"))
def validate_duplicate_entry(self):
if not self.flags.ignore_duplicate_entry_error and not self.is_folder:
if not self.content_hash:
@ -649,7 +654,7 @@ class File(Document):
if isinstance(self._content, str):
self._content = self._content.encode()
self.check_content()
with open(file_path, "wb+") as f:
f.write(self._content)
os.fsync(f.fileno())

View file

@ -384,3 +384,32 @@ def get_wkhtmltopdf_version():
pass
return wkhtmltopdf_version or "0"
def pdf_contains_js(file_content):
from io import BytesIO
reader = PdfReader(BytesIO(file_content))
def has_javascript(obj):
if isinstance(obj, dict):
for key, value in obj.items():
if key in ("/JS", "/JavaScript"):
return True
if has_javascript(value):
return True
elif isinstance(obj, list):
for item in obj:
if has_javascript(item):
return True
return False
root = reader.trailer.get("/Root", {})
if has_javascript(root):
return False
for page in reader.pages:
if has_javascript(page):
return False
return True