The license.txt file has been replaced with LICENSE for quite a while now. INAL but it didn't seem accurate to say "hey, checkout license.txt although there's no such file". Apart from this, there were inconsistencies in the headers altogether...this change brings consistency.
122 lines
2.8 KiB
Python
122 lines
2.8 KiB
Python
# Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors
|
|
# License: MIT. See LICENSE
|
|
import re
|
|
from io import BytesIO
|
|
|
|
import openpyxl
|
|
import xlrd
|
|
from openpyxl import load_workbook
|
|
from openpyxl.styles import Font
|
|
from openpyxl.utils import get_column_letter
|
|
|
|
import frappe
|
|
|
|
ILLEGAL_CHARACTERS_RE = re.compile(r'[\000-\010]|[\013-\014]|[\016-\037]')
|
|
|
|
|
|
# return xlsx file object
|
|
def make_xlsx(data, sheet_name, wb=None, column_widths=None):
|
|
column_widths = column_widths or []
|
|
if wb is None:
|
|
wb = openpyxl.Workbook(write_only=True)
|
|
|
|
ws = wb.create_sheet(sheet_name, 0)
|
|
|
|
for i, column_width in enumerate(column_widths):
|
|
if column_width:
|
|
ws.column_dimensions[get_column_letter(i + 1)].width = column_width
|
|
|
|
row1 = ws.row_dimensions[1]
|
|
row1.font = Font(name='Calibri', bold=True)
|
|
|
|
for row in data:
|
|
clean_row = []
|
|
for item in row:
|
|
if isinstance(item, str) and (sheet_name not in ['Data Import Template', 'Data Export']):
|
|
value = handle_html(item)
|
|
else:
|
|
value = item
|
|
|
|
if isinstance(item, str) and next(ILLEGAL_CHARACTERS_RE.finditer(value), None):
|
|
# Remove illegal characters from the string
|
|
value = re.sub(ILLEGAL_CHARACTERS_RE, '', value)
|
|
|
|
clean_row.append(value)
|
|
|
|
ws.append(clean_row)
|
|
|
|
xlsx_file = BytesIO()
|
|
wb.save(xlsx_file)
|
|
return xlsx_file
|
|
|
|
|
|
def handle_html(data):
|
|
# return if no html tags found
|
|
data = frappe.as_unicode(data)
|
|
|
|
if '<' not in data:
|
|
return data
|
|
if '>' not in data:
|
|
return data
|
|
|
|
from html2text import HTML2Text
|
|
|
|
h = HTML2Text()
|
|
h.unicode_snob = True
|
|
h = h.unescape(data or "")
|
|
|
|
obj = HTML2Text()
|
|
obj.ignore_links = True
|
|
obj.body_width = 0
|
|
|
|
try:
|
|
value = obj.handle(h)
|
|
except Exception:
|
|
# unable to parse html, send it raw
|
|
return data
|
|
|
|
value = ", ".join(value.split(' \n'))
|
|
value = " ".join(value.split('\n'))
|
|
value = ", ".join(value.split('# '))
|
|
|
|
return value
|
|
|
|
|
|
def read_xlsx_file_from_attached_file(file_url=None, fcontent=None, filepath=None):
|
|
if file_url:
|
|
_file = frappe.get_doc("File", {"file_url": file_url})
|
|
filename = _file.get_full_path()
|
|
elif fcontent:
|
|
filename = BytesIO(fcontent)
|
|
elif filepath:
|
|
filename = filepath
|
|
else:
|
|
return
|
|
|
|
rows = []
|
|
wb1 = load_workbook(filename=filename, read_only=True, data_only=True)
|
|
ws1 = wb1.active
|
|
for row in ws1.iter_rows():
|
|
tmp_list = []
|
|
for cell in row:
|
|
tmp_list.append(cell.value)
|
|
rows.append(tmp_list)
|
|
return rows
|
|
|
|
|
|
def read_xls_file_from_attached_file(content):
|
|
book = xlrd.open_workbook(file_contents=content)
|
|
sheets = book.sheets()
|
|
sheet = sheets[0]
|
|
rows = []
|
|
for i in range(sheet.nrows):
|
|
rows.append(sheet.row_values(i))
|
|
return rows
|
|
|
|
|
|
def build_xlsx_response(data, filename):
|
|
xlsx_file = make_xlsx(data, filename)
|
|
# write out response as a xlsx type
|
|
frappe.response['filename'] = filename + '.xlsx'
|
|
frappe.response['filecontent'] = xlsx_file.getvalue()
|
|
frappe.response['type'] = 'binary'
|