From bfb181333c1bc9afb0b129df0854ad212a3b57a2 Mon Sep 17 00:00:00 2001 From: Manas Solanki Date: Fri, 12 May 2017 15:23:11 +0530 Subject: [PATCH] handle the escape sequence in the html2text library (#3272) --- frappe/utils/xlsxutils.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/frappe/utils/xlsxutils.py b/frappe/utils/xlsxutils.py index 01feb84497..6728222535 100644 --- a/frappe/utils/xlsxutils.py +++ b/frappe/utils/xlsxutils.py @@ -9,7 +9,6 @@ import openpyxl from cStringIO import StringIO from openpyxl.styles import Font -import html2text # return xlsx file object def make_xlsx(data, sheet_name): @@ -24,19 +23,33 @@ def make_xlsx(data, sheet_name): clean_row = [] for item in row: if isinstance(item, basestring): - obj = html2text.HTML2Text() - obj.ignore_links = True - obj.body_width = 0 - obj = obj.handle(unicode(item or "")) - obj = obj.rsplit('\n', 1) - value = obj[0] + value = handle_html(item) else: value = item - clean_row.append(value) ws.append(clean_row) xlsx_file = StringIO() wb.save(xlsx_file) - return xlsx_file \ No newline at end of file + return xlsx_file + + +def handle_html(data): + # import html2text + from html2text import unescape, HTML2Text + + h = HTML2Text() + h.unicode_snob = True + h = h.unescape(data or "") + + obj = HTML2Text() + obj.ignore_links = True + obj.body_width = 0 + value = obj.handle(h) + value = value.split('\n', 1) + value = value[0].split('# ',1) + if len(value) < 2: + return value[0] + else: + return value[1]