refactor: move all optimizations and pre/post fork hooks to separate file (#28832)

Now they will truly execute before/after fork = :pinch: few bytes saved!
This commit is contained in:
Ankush Menat 2024-12-19 16:46:26 +05:30 committed by GitHub
parent a560ba27e4
commit 9e8ab92371
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 110 additions and 80 deletions

View file

@ -12,15 +12,11 @@ Read the documentation: https://frappeframework.com/docs
"""
import copy
import faulthandler
import functools
import gc
import importlib
import inspect
import json
import os
import re
import signal
import sys
import traceback
import warnings
@ -89,7 +85,6 @@ STANDARD_USERS = ("Guest", "Administrator")
_one_time_setup: dict[str, bool] = {}
_dev_server = int(sbool(os.environ.get("DEV_SERVER", False)))
_tune_gc = bool(sbool(os.environ.get("FRAPPE_TUNE_GC", True)))
if _dev_server:
warnings.simplefilter("always", DeprecationWarning)
@ -285,7 +280,6 @@ def init(site: str, sites_path: str = ".", new_site: bool = False, force: bool =
if not _one_time_setup.get(local.conf.db_type):
patch_query_execute()
patch_query_aggregation()
_register_fault_handler()
_one_time_setup[local.conf.db_type] = True
setup_module_map(include_all_apps=not (frappe.request or frappe.job or frappe.flags.in_migrate))
@ -2577,24 +2571,7 @@ def validate_and_sanitize_search_inputs(fn):
return wrapper
def _register_fault_handler():
import io
import frappe.optimizations
from frappe.utils.error import log_error # Backward compatibility
# Some libraries monkey patch stderr, we need actual fd
if isinstance(sys.__stderr__, io.TextIOWrapper):
faulthandler.register(signal.SIGUSR1, file=sys.__stderr__)
from frappe.utils.error import log_error
if _tune_gc:
# generational GC gets triggered after certain allocs (g0) which is 700 by default.
# This number is quite small for frappe where a single query can potentially create 700+
# objects easily.
# Bump this number higher, this will make GC less aggressive but that improves performance of
# everything else.
g0, g1, g2 = gc.get_threshold() # defaults are 700, 10, 10.
gc.set_threshold(g0 * 10, g1 * 2, g2 * 2)
# Remove references to pattern that are pre-compiled and loaded to global scopes.
re.purge()
frappe.optimizations.optimize_all()

View file

@ -35,35 +35,34 @@ _sites_path = os.environ.get("SITES_PATH", ".")
# If gc.freeze is done then importing modules before forking allows us to share the memory
if frappe._tune_gc:
import gettext
import gettext
import babel
import babel.messages
import bleach
import num2words
import pydantic
import babel
import babel.messages
import bleach
import num2words
import pydantic
import frappe.boot
import frappe.client
import frappe.core.doctype.file.file
import frappe.core.doctype.user.user
import frappe.database.mariadb.database # Load database related utils
import frappe.database.query
import frappe.desk.desktop # workspace
import frappe.desk.form.save
import frappe.model.db_query
import frappe.query_builder
import frappe.utils.background_jobs # Enqueue is very common
import frappe.utils.data # common utils
import frappe.utils.jinja # web page rendering
import frappe.utils.jinja_globals
import frappe.utils.redis_wrapper # Exact redis_wrapper
import frappe.utils.safe_exec
import frappe.utils.typing_validations # any whitelisted method uses this
import frappe.website.path_resolver # all the page types and resolver
import frappe.website.router # Website router
import frappe.website.website_generator # web page doctypes
import frappe.boot
import frappe.client
import frappe.core.doctype.file.file
import frappe.core.doctype.user.user
import frappe.database.mariadb.database # Load database related utils
import frappe.database.query
import frappe.desk.desktop # workspace
import frappe.desk.form.save
import frappe.model.db_query
import frappe.query_builder
import frappe.utils.background_jobs # Enqueue is very common
import frappe.utils.data # common utils
import frappe.utils.jinja # web page rendering
import frappe.utils.jinja_globals
import frappe.utils.redis_wrapper # Exact redis_wrapper
import frappe.utils.safe_exec
import frappe.utils.typing_validations # any whitelisted method uses this
import frappe.website.path_resolver # all the page types and resolver
import frappe.website.router # Website router
import frappe.website.website_generator # web page doctypes
# end: module pre-loading
@ -519,20 +518,3 @@ def application_with_statics():
application = StaticDataMiddleware(application, {"/files": str(os.path.abspath(_sites_path))})
return application
# Remove references to pattern that are pre-compiled and loaded to global scopes.
re.purge()
# Both Gunicorn and RQ use forking to spawn workers. In an ideal world, the fork should be sharing
# most of the memory if there are no writes made to data because of Copy on Write, however,
# python's GC is not CoW friendly and writes to data even if user-code doesn't. Specifically, the
# generational GC which stores and mutates every python object: `PyGC_Head`
#
# Calling gc.freeze() moves all the objects imported so far into permanant generation and hence
# doesn't mutate `PyGC_Head`
#
# Refer to issue for more info: https://github.com/frappe/frappe/issues/18927
if frappe._tune_gc:
gc.collect() # clean up any garbage created so far before freeze
gc.freeze()

81
frappe/optimizations.py Normal file
View file

@ -0,0 +1,81 @@
import faulthandler
import gc
import io
import os
import re
import signal
import sys
def optimize_all():
"""Single entry point to enable all optimizations at right time automatically."""
# Note:
# - This function is ALWAYS executed as soon as `import frappe` ends.
# - Any deferred work should be deferred using os module's fork hooks.
# - Respect configurations using environement variables.
# - fork hooks can not be unregistered, so care should be taken to execute them only when they
# make sense.
_optimize_regex_cache()
_optimize_gc_parameters()
_optimize_gc_for_copy_on_write()
_register_fault_handler()
os.register_at_fork(after_in_child=_register_fault_handler)
def _optimize_gc_parameters():
from frappe.utils import sbool
if not bool(sbool(os.environ.get("FRAPPE_TUNE_GC", True))):
return
# generational GC gets triggered after certain allocs (g0) which is 700 by default.
# This number is quite small for frappe where a single query can potentially create 700+
# objects easily.
# Bump this number higher, this will make GC less aggressive but that improves performance of
# everything else.
g0, g1, g2 = gc.get_threshold() # defaults are 700, 10, 10.
gc.set_threshold(g0 * 10, g1 * 2, g2 * 2)
def _optimize_regex_cache():
# Remove references to pattern that are pre-compiled and loaded to global scopes.
# Leave that cache for dynamically generated regex.
os.register_at_fork(before=re.purge)
def _register_fault_handler():
# Some libraries monkey patch stderr, we need actual fd
if isinstance(sys.__stderr__, io.TextIOWrapper):
faulthandler.register(signal.SIGUSR1, file=sys.__stderr__)
def _optimize_gc_for_copy_on_write():
from frappe.utils import sbool
if not bool(sbool(os.environ.get("FRAPPE_TUNE_GC", True))):
return
os.register_at_fork(before=_freeze_gc)
_gc_frozen = False
def _freeze_gc():
global _gc_frozen
if _gc_frozen:
return
# Both Gunicorn and RQ use forking to spawn workers. In an ideal world, the fork should be sharing
# most of the memory if there are no writes made to data because of Copy on Write, however,
# python's GC is not CoW friendly and writes to data even if user-code doesn't. Specifically, the
# generational GC which stores and mutates every python object: `PyGC_Head`
#
# Calling gc.freeze() moves all the objects imported so far into permanant generation and hence
# doesn't mutate `PyGC_Head`
#
# Refer to issue for more info: https://github.com/frappe/frappe/issues/18927
gc.collect()
gc.freeze()
# RQ workers constantly fork, there' no benefit in doing this in that case.
_gc_frozen = True

View file

@ -298,7 +298,6 @@ def start_worker(
strategy = DequeueStrategy.DEFAULT
_start_sentry()
_freeze_gc()
with frappe.init_site():
# empty init is required to get redis_queue from common_site_config.json
@ -365,11 +364,8 @@ def start_worker_pool(
import frappe.utils.scheduler
import frappe.utils.typing_validations # any whitelisted method uses this
import frappe.website.path_resolver # all the page types and resolver
# end: module pre-loading
_freeze_gc()
with frappe.init_site():
redis_connection = get_redis_conn()
@ -394,12 +390,6 @@ def start_worker_pool(
pool.start(logging_level=logging_level, burst=burst)
def _freeze_gc():
if frappe._tune_gc:
gc.collect()
gc.freeze()
def get_worker_name(queue):
"""When limiting worker to a specific queue, also append queue name to default worker name"""
name = None