perf: "random" naming to improve concurrency and locality (#30053)

This feels overengineered and it kinda is, but other efforts to inroduce sequential naming/UUID naming haven't been that fruitful either. 10 character random "hash" i now changed to. 1. first character - last character in UUID4 ID of request/job 2. three characters - derived from current timestamp. 4. 6 characters - random data. This satisfies all three requirements: 1. Readers - temporal locality should result in spatial locality on disk. (fewer pages accessed) 2. Single writer - temporal locality should result in spatial locality. (fewer dirty pages) 3. Multiple writers - temporal locality should NOT result in spatial locality. (less lock contention) Mostly concludes https://github.com/frappe/frappe/pull/25309 and https://github.com/frappe/frappe/pull/28349 Rough probabiliy numbers Assumptions: - Unique per worker prefix - 16 (uuid's base16 version) - Rough time spent generating names - 10% of request (very very conservative estimate) Probability(collision) = P(at least one prefix collision) * P(time collision) Probability(collision) = (1 - p(all different)) * 10% Probability(collision) = (1 - (16! / 16-N! )/ 16^N ) * 10% | N (concurrency) | Probability(collision) | | 1 | 0.0% | | 2 | 0.6% | | 3 | 1.8% | | 4 | 3.3% | | 5 | 5.0% | | 6 | 6.6% | | 7 | 7.9% | | 8 | 8.8% |
2025-01-25 12:34:09 +05:30 · 2025-01-25 12:34:09 +05:30 · 9b79dfeb7b
commit 9b79dfeb7b
parent 473ff81a31
3 changed files with 11 additions and 5 deletions
--- a/frappe/model/naming.py
+++ b/frappe/model/naming.py
@ -14,6 +14,7 @@ import uuid_utils
 import frappe
 from frappe import _
 from frappe.model import log_types
+from frappe.monitor import get_trace_id
 from frappe.query_builder import DocType
 from frappe.utils import cint, cstr, now_datetime

@ -281,7 +282,7 @@ def make_autoname(key="", doctype="", doc="", *, ignore_validate=False):
 	                DE/09/01/00001 where 09 is the year, 01 is the month and 00001 is the series
 	"""
 	if key == "hash":
-		return _generate_random_string(10)
+		return (_get_timestamp_prefix() + _generate_random_string(7))[:10]

 	series = NamingSeries(key)
 	return series.generate_next_name(doc, ignore_validate=ignore_validate)
@ -291,7 +292,14 @@ def _get_timestamp_prefix():
 	ts = int(time.time() * 10)  # time in deciseconds
 	# we ~~don't need~~ can't get ordering over entire lifetime, so we wrap the time.
 	ts = ts % (32**4)
-	return base64.b32hexencode(ts.to_bytes(length=5, byteorder="big")).decode()[-4:].lower()
+	ts_part = base64.b32hexencode(ts.to_bytes(length=5, byteorder="big")).decode()[-3:].lower()
+
+	# First character is from request/job specific UUID, all documents created in this "session" will
+	# have same prefix. This avoids collision between parallel jobs with reasonable probabililistic
+	# guarantees.
+	request_part = (get_trace_id() or "")[-1:]
+
+	return request_part + ts_part


 def _generate_random_string(length=10):
--- a/frappe/monitor.py
+++ b/frappe/monitor.py
@ -83,7 +83,7 @@ class Monitor:
 			self.data.job.scheduled = True

 		if job := rq.get_current_job():
-			self.data.uuid = job.id
+			self.data.job_id = job.id
 			waitdiff = self.data.timestamp - job.enqueued_at.replace(tzinfo=datetime.timezone.utc)
 			self.data.job.wait = int(waitdiff.total_seconds() * 1000000)

--- a/frappe/tests/test_naming.py
+++ b/frappe/tests/test_naming.py
@ -2,7 +2,6 @@
 # License: MIT. See LICENSE

 import time
-import unittest
 from uuid import UUID

 import uuid_utils
@ -407,7 +406,6 @@ class TestNaming(IntegrationTestCase):
 			expected_name = "TODO-" + nowdate().split("-")[1] + "-" + "0001"
 			self.assertEqual(name, expected_name)

-	@unittest.skip("This is not supported anymore, see #28349.")
 	@retry(
 		retry=retry_if_exception_type(AssertionError),
 		stop=stop_after_attempt(3),