fix(xmlx): handle unparseable HTML

This commit is contained in:
Rushabh Mehta 2019-03-06 16:11:06 +05:30
parent fd8aad457a
commit 9df07891ff

View file

@ -61,10 +61,17 @@ def handle_html(data):
obj = HTML2Text()
obj.ignore_links = True
obj.body_width = 0
value = obj.handle(h)
try:
value = obj.handle(h)
except Exception:
# unable to parse html, send it raw
return value
value = ", ".join(value.split(' \n'))
value = " ".join(value.split('\n'))
value = ", ".join(value.split('# '))
return value
def read_xlsx_file_from_attached_file(file_id=None, fcontent=None, filepath=None):