perf: use base32 space for random names instead of base16 (#25497)
This commit is contained in:
parent
ef6af0b0ad
commit
adf24b24d4
2 changed files with 44 additions and 5 deletions
|
|
@ -1,6 +1,7 @@
|
|||
# Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors
|
||||
# License: MIT. See LICENSE
|
||||
|
||||
import base64
|
||||
import datetime
|
||||
import re
|
||||
import struct
|
||||
|
|
@ -264,15 +265,36 @@ def make_autoname(key="", doctype="", doc="", *, ignore_validate=False):
|
|||
DE/09/01/00001 where 09 is the year, 01 is the month and 00001 is the series
|
||||
"""
|
||||
if key == "hash":
|
||||
# Makeshift "ULID": first 4 chars are based on timestamp, other 8 are random
|
||||
ts = hex(struct.unpack("<Q", struct.pack("<d", time.time()))[0])
|
||||
|
||||
return ts[-7:-4] + frappe.generate_hash(length=7)
|
||||
# Makeshift "ULID": first 4 chars are based on timestamp, other 6 are random
|
||||
return _get_timestamp_prefix() + _generate_random_string(6)
|
||||
|
||||
series = NamingSeries(key)
|
||||
return series.generate_next_name(doc, ignore_validate=ignore_validate)
|
||||
|
||||
|
||||
def _get_timestamp_prefix():
|
||||
ts = int(time.time() * 10) # time in deciseconds
|
||||
# we ~~don't need~~ can't get ordering over entire lifetime, so we wrap the time.
|
||||
ts = ts % (32**4)
|
||||
return base64.b32hexencode(ts.to_bytes(length=5, byteorder="big")).decode()[-4:].lower()
|
||||
|
||||
|
||||
def _generate_random_string(length=10):
|
||||
"""Better version of frappe.generate_hash for naming.
|
||||
|
||||
This uses entire base32 instead of base16 used by generate_hash. So it has twice as many
|
||||
characters and hence more likely to have shorter common prefixes. i.e. slighly faster comparisons and less conflicts.
|
||||
|
||||
Why not base36?
|
||||
It's not in standard library else using all characters is probably better approach.
|
||||
Why not base64?
|
||||
MySQL is case-insensitive, we can't use both upper and lower case characters.
|
||||
"""
|
||||
from secrets import token_bytes as get_random_bytes
|
||||
|
||||
return base64.b32hexencode(get_random_bytes(length)).decode()[:length].lower()
|
||||
|
||||
|
||||
def parse_naming_series(
|
||||
parts: list[str] | str,
|
||||
doctype=None,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
# Copyright (c) 2018, Frappe Technologies Pvt. Ltd. and Contributors
|
||||
# License: MIT. See LICENSE
|
||||
|
||||
from unittest.mock import patch
|
||||
import time
|
||||
|
||||
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_full_jitter
|
||||
|
||||
import frappe
|
||||
from frappe.core.doctype.doctype.test_doctype import new_doctype
|
||||
|
|
@ -11,6 +13,7 @@ from frappe.model.naming import (
|
|||
append_number_if_name_exists,
|
||||
determine_consecutive_week_number,
|
||||
getseries,
|
||||
make_autoname,
|
||||
parse_naming_series,
|
||||
revert_series_if_last,
|
||||
)
|
||||
|
|
@ -390,6 +393,20 @@ class TestNaming(FrappeTestCase):
|
|||
expected_name = "TODO-" + nowdate().split("-")[1] + "-" + "0001"
|
||||
self.assertEqual(name, expected_name)
|
||||
|
||||
@retry(
|
||||
retry=retry_if_exception_type(AssertionError),
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_full_jitter(),
|
||||
reraise=True,
|
||||
)
|
||||
def test_hash_naming_is_roughly_sequential(self):
|
||||
"""hash naming is supposed to be sequential *most of the time*"""
|
||||
names = []
|
||||
for _ in range(10):
|
||||
time.sleep(0.1)
|
||||
names.append(make_autoname("hash"))
|
||||
self.assertEqual(names, sorted(names))
|
||||
|
||||
|
||||
def parse_naming_series_variable(doc, variable):
|
||||
if variable == "PM":
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue