diff --git a/frappe/core/doctype/file/file.py b/frappe/core/doctype/file/file.py index d111498c49..e5a9f86c8b 100755 --- a/frappe/core/doctype/file/file.py +++ b/frappe/core/doctype/file/file.py @@ -26,6 +26,7 @@ from frappe.utils import ( ) from frappe.utils.file_manager import is_safe_path from frappe.utils.image import optimize_image, strip_exif_data +from frappe.utils.pdf import pdf_contains_js from .exceptions import ( AttachmentLimitReached, @@ -137,8 +138,8 @@ class File(Document): self.validate_file_path() self.validate_file_url() self.validate_file_on_disk() - self.file_size = frappe.form_dict.file_size or self.file_size + self.check_content() def validate_attachment_references(self): if not self.attached_to_doctype: @@ -388,6 +389,10 @@ class File(Document): exc=FileTypeNotAllowed, ) + def check_content(self): + if self.file_type == "PDF" and not pdf_contains_js(self._content): + frappe.throw(_("PDF cannot be uploaded, It contains unsafe content")) + def validate_duplicate_entry(self): if not self.flags.ignore_duplicate_entry_error and not self.is_folder: if not self.content_hash: @@ -649,7 +654,7 @@ class File(Document): if isinstance(self._content, str): self._content = self._content.encode() - + self.check_content() with open(file_path, "wb+") as f: f.write(self._content) os.fsync(f.fileno()) diff --git a/frappe/utils/pdf.py b/frappe/utils/pdf.py index 9f0469f0b4..8f0d4f7fed 100644 --- a/frappe/utils/pdf.py +++ b/frappe/utils/pdf.py @@ -384,3 +384,32 @@ def get_wkhtmltopdf_version(): pass return wkhtmltopdf_version or "0" + + +def pdf_contains_js(file_content): + from io import BytesIO + + reader = PdfReader(BytesIO(file_content)) + + def has_javascript(obj): + if isinstance(obj, dict): + for key, value in obj.items(): + if key in ("/JS", "/JavaScript"): + return True + if has_javascript(value): + return True + elif isinstance(obj, list): + for item in obj: + if has_javascript(item): + return True + return False + + root = reader.trailer.get("/Root", {}) + if has_javascript(root): + return False + + for page in reader.pages: + if has_javascript(page): + return False + + return True