feat: specify row compression for tables (#31361)

Certain tables contain A LOT of duplicate data, it makes sense to enable
compressed row format on them by default. I've seen 5-10 fold reduction
in DB size after enabling compressed format on select few tables.

This has some performance overhead:
- both compressed and uncompressed pages live in buffer pool.
- compression/decompression

Note:
- These cons don't apply much on DocTypes I am enabling this for.
- I am not enabling this on existing sites, migration can take a long
  time! Do it manually with `transform-database` command if you want to.
This commit is contained in:
Ankush Menat 2025-02-21 09:44:40 +05:30 committed by GitHub
parent 7f0aa436f3
commit f90a450bd4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 52 additions and 11 deletions

View file

@ -372,7 +372,7 @@
"idx": 1,
"links": [],
"make_attachments_public": 1,
"modified": "2024-03-23 16:01:30.219380",
"modified": "2025-02-20 19:19:29.427081",
"modified_by": "Administrator",
"module": "Core",
"name": "Communication",
@ -427,6 +427,7 @@
"role": "All"
}
],
"row_format": "Compressed",
"search_fields": "subject",
"sender_field": "sender",
"sort_field": "creation",
@ -436,4 +437,4 @@
"title_field": "subject",
"track_changes": 1,
"track_seen": 1
}
}

View file

@ -60,7 +60,7 @@
],
"in_create": 1,
"links": [],
"modified": "2024-03-23 16:02:17.664513",
"modified": "2025-02-20 19:22:00.734438",
"modified_by": "Administrator",
"module": "Core",
"name": "Deleted Document",
@ -73,6 +73,7 @@
"role": "System Manager"
}
],
"row_format": "Compressed",
"sort_field": "creation",
"sort_order": "DESC",
"states": [],

View file

@ -94,6 +94,7 @@
"advanced",
"engine",
"migration_hash",
"row_format",
"connections_tab"
],
"fields": [
@ -671,6 +672,14 @@
"fieldname": "fields_tab",
"fieldtype": "Tab Break",
"label": "Fields"
},
{
"default": "Dynamic",
"fieldname": "row_format",
"fieldtype": "Select",
"hidden": 1,
"label": "Row Format",
"options": "Dynamic\nCompressed"
}
],
"icon": "fa fa-bolt",
@ -753,7 +762,7 @@
"link_fieldname": "reference_doctype"
}
],
"modified": "2024-11-30 16:09:21.536704",
"modified": "2025-02-20 19:05:52.119679",
"modified_by": "Administrator",
"module": "Core",
"name": "DocType",

View file

@ -158,6 +158,7 @@ class DocType(Document):
read_only: DF.Check
restrict_to_domain: DF.Link | None
route: DF.Data | None
row_format: DF.Literal["Dynamic", "Compressed"]
search_fields: DF.Data | None
sender_field: DF.Data | None
sender_name_field: DF.Data | None

View file

@ -25,6 +25,7 @@ from frappe.desk.form.load import getdoc
from frappe.model.delete_doc import delete_controllers
from frappe.model.sync import remove_orphan_doctypes
from frappe.tests import IntegrationTestCase, UnitTestCase
from frappe.utils import get_table_name
class UnitTestDoctype(UnitTestCase):
@ -806,6 +807,30 @@ class TestDocType(IntegrationTestCase):
doc.submit()
frappe.get_meta(doctype.name).as_dict()
def test_row_compression(self):
if frappe.db.db_type != "mariadb":
return
compressed_dt = new_doctype(row_format="Compressed").insert().name
dynamic_dt = new_doctype().insert().name
information_schema = frappe.qb.Schema("information_schema")
def get_format(dt):
return (
frappe.qb.from_(information_schema.tables)
.select("row_format")
.where(
(information_schema.tables.table_schema == frappe.conf.db_name)
& (information_schema.tables.table_name == get_table_name(dt))
)
.run()[0][0]
.upper()
)
self.assertEqual(get_format(compressed_dt), "COMPRESSED")
self.assertEqual(get_format(dynamic_dt), "DYNAMIC")
def new_doctype(
name: str | None = None,

View file

@ -53,7 +53,7 @@
"idx": 1,
"in_create": 1,
"links": [],
"modified": "2024-03-23 16:04:01.627592",
"modified": "2025-02-20 19:20:33.616072",
"modified_by": "Administrator",
"module": "Core",
"name": "Version",
@ -73,6 +73,7 @@
}
],
"quick_entry": 1,
"row_format": "Compressed",
"sort_field": "creation",
"sort_order": "DESC",
"states": [],

View file

@ -33,7 +33,7 @@
}
],
"links": [],
"modified": "2024-03-23 16:04:01.764239",
"modified": "2025-02-20 19:21:33.012224",
"modified_by": "Administrator",
"module": "Core",
"name": "View Log",
@ -50,6 +50,7 @@
}
],
"quick_entry": 1,
"row_format": "Compressed",
"sort_field": "creation",
"sort_order": "DESC",
"states": []

View file

@ -63,7 +63,7 @@ class MariaDBTable(DBTable):
idx int not null default '0',
{additional_definitions})
ENGINE={engine}
ROW_FORMAT=DYNAMIC
ROW_FORMAT={(self.meta.get("row_format") or "Dynamic").upper()}
CHARACTER SET=utf8mb4
COLLATE=utf8mb4_unicode_ci"""

View file

@ -154,7 +154,7 @@
"idx": 1,
"in_create": 1,
"links": [],
"modified": "2024-03-23 16:03:24.379339",
"modified": "2025-02-20 19:21:09.652451",
"modified_by": "Administrator",
"module": "Email",
"name": "Email Queue",
@ -170,6 +170,7 @@
"role": "System Manager"
}
],
"row_format": "Compressed",
"sort_field": "creation",
"sort_order": "DESC",
"states": [],

View file

@ -24,8 +24,8 @@
"fieldname": "path",
"fieldtype": "Data",
"label": "Path",
"set_only_once": 1,
"search_index": 1
"search_index": 1,
"set_only_once": 1
},
{
"fieldname": "referrer",
@ -94,7 +94,7 @@
],
"in_create": 1,
"links": [],
"modified": "2024-03-23 16:04:02.743377",
"modified": "2025-02-20 19:20:47.267461",
"modified_by": "Administrator",
"module": "Website",
"name": "Web Page View",
@ -115,6 +115,7 @@
],
"quick_entry": 1,
"read_only": 1,
"row_format": "Compressed",
"sort_field": "creation",
"sort_order": "DESC",
"states": [],