fix: re module cache settings - purge & cache size (#21808)

* fix: Remove re cache internals manipulation * fix: Purge re cache after module loads Empty cache would work better as we already got our pre-compiled patterns at the top level of every module. This leaves the cache open for dynamically generated patterns which are in better need of it. Over time, workers would converge to this anyway. This change only reduces the cache hit and eviction effort. I'd improve this by executing `re.purge` on every module import but complexity tradeoff lol. I'd prefer if re didn't cache patterns generated by `re.compile` but I dont see this behaviour or any escape hatches so this will have to do for now.
2023-07-27 10:40:50 +05:30 · 2023-07-27 10:40:50 +05:30 · 6cb51a536e
commit 6cb51a536e
parent 89b9b64a55
2 changed files with 8 additions and 5 deletions
--- a/frappe/init.py
+++ b/frappe/init.py
@ -53,12 +53,8 @@ local = Local()
 cache = None
 STANDARD_USERS = ("Guest", "Administrator")

-_dev_server = int(sbool(os.environ.get("DEV_SERVER", False)))
 _qb_patched = {}
-re._MAXCACHE = (
-	50  # reduced from default 512 given we are already maintaining this on parent worker
-)
-
+_dev_server = int(sbool(os.environ.get("DEV_SERVER", False)))
 _tune_gc = bool(sbool(os.environ.get("FRAPPE_TUNE_GC", True)))

 if _dev_server:
@ -2450,3 +2446,6 @@ if _tune_gc:
 	# everything else.
 	g0, g1, g2 = gc.get_threshold()  # defaults are 700, 10, 10.
 	gc.set_threshold(g0 * 10, g1 * 2, g2 * 2)
+
+# Remove references to pattern that are pre-compiled and loaded to global scopes.
+re.purge()
--- a/frappe/app.py
+++ b/frappe/app.py
@ -4,6 +4,7 @@
 import gc
 import logging
 import os
+import re

 from werkzeug.exceptions import HTTPException, NotFound
 from werkzeug.local import LocalManager
@ -428,6 +429,9 @@ def serve(
 	)


+# Remove references to pattern that are pre-compiled and loaded to global scopes.
+re.purge()
+
 # Both Gunicorn and RQ use forking to spawn workers. In an ideal world, the fork should be sharing
 # most of the memory if there are no writes made to data because of Copy on Write, however,
 # python's GC is not CoW friendly and writes to data even if user-code doesn't. Specifically, the