perf: use base32 space for random names instead of base16 (#25497)

This commit is contained in:
Ankush Menat 2024-03-17 20:02:57 +05:30 committed by GitHub
parent ef6af0b0ad
commit adf24b24d4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 44 additions and 5 deletions

View file

@ -1,6 +1,7 @@
# Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors
# License: MIT. See LICENSE
import base64
import datetime
import re
import struct
@ -264,15 +265,36 @@ def make_autoname(key="", doctype="", doc="", *, ignore_validate=False):
DE/09/01/00001 where 09 is the year, 01 is the month and 00001 is the series
"""
if key == "hash":
# Makeshift "ULID": first 4 chars are based on timestamp, other 8 are random
ts = hex(struct.unpack("<Q", struct.pack("<d", time.time()))[0])
return ts[-7:-4] + frappe.generate_hash(length=7)
# Makeshift "ULID": first 4 chars are based on timestamp, other 6 are random
return _get_timestamp_prefix() + _generate_random_string(6)
series = NamingSeries(key)
return series.generate_next_name(doc, ignore_validate=ignore_validate)
def _get_timestamp_prefix():
ts = int(time.time() * 10) # time in deciseconds
# we ~~don't need~~ can't get ordering over entire lifetime, so we wrap the time.
ts = ts % (32**4)
return base64.b32hexencode(ts.to_bytes(length=5, byteorder="big")).decode()[-4:].lower()
def _generate_random_string(length=10):
"""Better version of frappe.generate_hash for naming.
This uses entire base32 instead of base16 used by generate_hash. So it has twice as many
characters and hence more likely to have shorter common prefixes. i.e. slighly faster comparisons and less conflicts.
Why not base36?
It's not in standard library else using all characters is probably better approach.
Why not base64?
MySQL is case-insensitive, we can't use both upper and lower case characters.
"""
from secrets import token_bytes as get_random_bytes
return base64.b32hexencode(get_random_bytes(length)).decode()[:length].lower()
def parse_naming_series(
parts: list[str] | str,
doctype=None,

View file

@ -1,7 +1,9 @@
# Copyright (c) 2018, Frappe Technologies Pvt. Ltd. and Contributors
# License: MIT. See LICENSE
from unittest.mock import patch
import time
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_full_jitter
import frappe
from frappe.core.doctype.doctype.test_doctype import new_doctype
@ -11,6 +13,7 @@ from frappe.model.naming import (
append_number_if_name_exists,
determine_consecutive_week_number,
getseries,
make_autoname,
parse_naming_series,
revert_series_if_last,
)
@ -390,6 +393,20 @@ class TestNaming(FrappeTestCase):
expected_name = "TODO-" + nowdate().split("-")[1] + "-" + "0001"
self.assertEqual(name, expected_name)
@retry(
retry=retry_if_exception_type(AssertionError),
stop=stop_after_attempt(3),
wait=wait_full_jitter(),
reraise=True,
)
def test_hash_naming_is_roughly_sequential(self):
"""hash naming is supposed to be sequential *most of the time*"""
names = []
for _ in range(10):
time.sleep(0.1)
names.append(make_autoname("hash"))
self.assertEqual(names, sorted(names))
def parse_naming_series_variable(doc, variable):
if variable == "PM":