fix: don't try to scan encrypted PDFs
Also fix reversed logic of JS check Signed-off-by: Akhil Narang <me@akhilnarang.dev>
This commit is contained in:
parent
38a7ab4be8
commit
36ccf32ab3
2 changed files with 21 additions and 9 deletions
|
|
@ -390,8 +390,8 @@ class File(Document):
|
|||
)
|
||||
|
||||
def check_content(self):
|
||||
if self.file_type == "PDF" and self._content and not pdf_contains_js(self._content):
|
||||
frappe.throw(_("PDF cannot be uploaded, It contains unsafe content"))
|
||||
if self.file_type == "PDF" and self._content and pdf_contains_js(self._content):
|
||||
frappe.throw(_("This PDF cannot be uploaded as it contains unsafe content."))
|
||||
|
||||
def validate_duplicate_entry(self):
|
||||
if not self.flags.ignore_duplicate_entry_error and not self.is_folder:
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ import pdfkit
|
|||
pdfkit.source.unicode = str # NOTE: upstream bug; PYTHONOPTIMIZE=1 optimized this away
|
||||
from bs4 import BeautifulSoup
|
||||
from packaging.version import Version
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
from pypdf import PdfReader, PdfWriter, errors
|
||||
|
||||
import frappe
|
||||
from frappe import _
|
||||
|
|
@ -386,7 +386,16 @@ def get_wkhtmltopdf_version():
|
|||
return wkhtmltopdf_version or "0"
|
||||
|
||||
|
||||
def pdf_contains_js(file_content):
|
||||
def pdf_contains_js(file_content: bytes):
|
||||
"""
|
||||
Check if a PDF file contains JavaScript.
|
||||
|
||||
Args:
|
||||
file_content (bytes): The content of the PDF file.
|
||||
|
||||
Returns:
|
||||
bool: True if the PDF contains JavaScript, False otherwise and also if the file is encrypted.
|
||||
"""
|
||||
from io import BytesIO
|
||||
|
||||
reader = PdfReader(BytesIO(file_content))
|
||||
|
|
@ -406,10 +415,13 @@ def pdf_contains_js(file_content):
|
|||
|
||||
root = reader.trailer.get("/Root", {})
|
||||
if has_javascript(root):
|
||||
return False
|
||||
return True
|
||||
|
||||
for page in reader.pages:
|
||||
if has_javascript(page):
|
||||
return False
|
||||
try:
|
||||
for page in reader.pages:
|
||||
if has_javascript(page):
|
||||
return True
|
||||
except errors.FileNotDecryptedError:
|
||||
pass
|
||||
|
||||
return True
|
||||
return False
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue