refactor: Replace html2text with markdownify

This commit is contained in:
Suraj Shetty 2022-07-06 08:23:14 +05:30
parent 4b6ab45c57
commit d4166dbe20
9 changed files with 18 additions and 15 deletions

View file

@ -1,6 +1,8 @@
# Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors
# License: MIT. See LICENSE
from markdownify import markdownify as md
import frappe
@ -86,3 +88,8 @@ def ljust_list(_list, length, fill_word=None):
_list.extend([fill_word] * fill_length)
return _list
def html2text(html, strip_links=False, wrap=True):
strip = ["a"] if strip_links else None
return md(html, heading_style="ATX", strip=strip, wrap=wrap)

View file

@ -183,7 +183,7 @@ def get_version(doctype, doc_name, frequency, user):
def get_comments(doctype, doc_name, frequency, user):
from html2text import html2text
from frappe.core.utils import html2text
timeline = []
filters = get_filters("reference_name", doc_name, frequency, user)
@ -225,7 +225,7 @@ def get_follow_users(doctype, doc_name):
def get_row_changed(row_changed, time, doctype, doc_name, v):
from html2text import html2text
from frappe.core.utils import html2text
items = []
for d in row_changed:
@ -269,7 +269,7 @@ def get_added_row(added, time, doctype, doc_name, v):
def get_field_changed(changed, time, doctype, doc_name, v):
from html2text import html2text
from frappe.core.utils import html2text
items = []
for d in changed:

View file

@ -8,11 +8,11 @@ import traceback
from email.parser import Parser
from email.policy import SMTPUTF8
from html2text import html2text
from rq.timeouts import JobTimeoutException
import frappe
from frappe import _, safe_encode, task
from frappe.core.utils import html2text
from frappe.email.doctype.email_account.email_account import EmailAccount
from frappe.email.email_body import add_attachment, get_email, get_formatted_html
from frappe.email.queue import get_unsubcribed_url, get_unsubscribe_message

View file

@ -1,8 +1,8 @@
import click
import requests
from html2text import html2text
import frappe
from frappe.core.utils import html2text
def frappecloud_migrator(local_site):

View file

@ -1911,7 +1911,7 @@ def get_string_between(start: str, string: str, end: str) -> str:
def to_markdown(html: str) -> str:
from html.parser import HTMLParser
from html2text import html2text
from frappe.core.utils import html2text
try:
return html2text(html or "")

View file

@ -4,7 +4,6 @@ import json
import mimetypes
import RestrictedPython.Guards
from html2text import html2text
from RestrictedPython import compile_restricted, safe_globals
import frappe
@ -13,6 +12,7 @@ import frappe.integrations.utils
import frappe.utils
import frappe.utils.data
from frappe import _
from frappe.core.utils import html2text
from frappe.frappeclient import FrappeClient
from frappe.handler import execute_cmd
from frappe.model.delete_doc import delete_doc

View file

@ -52,7 +52,7 @@ def make_xlsx(data, sheet_name, wb=None, column_widths=None):
def handle_html(data):
from html2text import HTML2Text
from frappe.core.utils import html2text
# return if no html tags found
data = frappe.as_unicode(data)
@ -62,12 +62,8 @@ def handle_html(data):
h = unescape_html(data or "")
obj = HTML2Text()
obj.ignore_links = True
obj.body_width = 0
try:
value = obj.handle(h)
value = html2text(h, strip_links=True, wrap=False)
except Exception:
# unable to parse html, send it raw
return data

View file

@ -1,8 +1,8 @@
from html2text import html2text
from jinja2 import utils
import frappe
from frappe import _
from frappe.core.utils import html2text
from frappe.utils import sanitize_html
from frappe.utils.global_search import web_search

View file

@ -35,7 +35,6 @@ dependencies = [
"git-url-parse~=1.2.2",
"gitdb~=4.0.7",
"gunicorn~=20.1.0",
"html2text==2020.1.16",
"html5lib~=1.1",
"ipython~=8.4.0",
"ldap3~=2.9",
@ -73,6 +72,7 @@ dependencies = [
"urllib3~=1.26.4",
"xlrd~=2.0.1",
"zxcvbn-python~=4.4.24",
"markdownify~=0.11.2",
# integration dependencies
"boto3~=1.17.53",