diff --git a/frappe/__init__.py b/frappe/__init__.py index 23c755adbf..6722ad095f 100644 --- a/frappe/__init__.py +++ b/frappe/__init__.py @@ -12,15 +12,11 @@ Read the documentation: https://frappeframework.com/docs """ import copy -import faulthandler import functools -import gc import importlib import inspect import json import os -import re -import signal import sys import traceback import warnings @@ -89,7 +85,6 @@ STANDARD_USERS = ("Guest", "Administrator") _one_time_setup: dict[str, bool] = {} _dev_server = int(sbool(os.environ.get("DEV_SERVER", False))) -_tune_gc = bool(sbool(os.environ.get("FRAPPE_TUNE_GC", True))) if _dev_server: warnings.simplefilter("always", DeprecationWarning) @@ -285,7 +280,6 @@ def init(site: str, sites_path: str = ".", new_site: bool = False, force: bool = if not _one_time_setup.get(local.conf.db_type): patch_query_execute() patch_query_aggregation() - _register_fault_handler() _one_time_setup[local.conf.db_type] = True setup_module_map(include_all_apps=not (frappe.request or frappe.job or frappe.flags.in_migrate)) @@ -2577,24 +2571,7 @@ def validate_and_sanitize_search_inputs(fn): return wrapper -def _register_fault_handler(): - import io +import frappe.optimizations +from frappe.utils.error import log_error # Backward compatibility - # Some libraries monkey patch stderr, we need actual fd - if isinstance(sys.__stderr__, io.TextIOWrapper): - faulthandler.register(signal.SIGUSR1, file=sys.__stderr__) - - -from frappe.utils.error import log_error - -if _tune_gc: - # generational GC gets triggered after certain allocs (g0) which is 700 by default. - # This number is quite small for frappe where a single query can potentially create 700+ - # objects easily. - # Bump this number higher, this will make GC less aggressive but that improves performance of - # everything else. - g0, g1, g2 = gc.get_threshold() # defaults are 700, 10, 10. - gc.set_threshold(g0 * 10, g1 * 2, g2 * 2) - -# Remove references to pattern that are pre-compiled and loaded to global scopes. -re.purge() +frappe.optimizations.optimize_all() diff --git a/frappe/app.py b/frappe/app.py index 0519dc92e4..894745895d 100644 --- a/frappe/app.py +++ b/frappe/app.py @@ -35,35 +35,34 @@ _sites_path = os.environ.get("SITES_PATH", ".") # If gc.freeze is done then importing modules before forking allows us to share the memory -if frappe._tune_gc: - import gettext +import gettext - import babel - import babel.messages - import bleach - import num2words - import pydantic +import babel +import babel.messages +import bleach +import num2words +import pydantic - import frappe.boot - import frappe.client - import frappe.core.doctype.file.file - import frappe.core.doctype.user.user - import frappe.database.mariadb.database # Load database related utils - import frappe.database.query - import frappe.desk.desktop # workspace - import frappe.desk.form.save - import frappe.model.db_query - import frappe.query_builder - import frappe.utils.background_jobs # Enqueue is very common - import frappe.utils.data # common utils - import frappe.utils.jinja # web page rendering - import frappe.utils.jinja_globals - import frappe.utils.redis_wrapper # Exact redis_wrapper - import frappe.utils.safe_exec - import frappe.utils.typing_validations # any whitelisted method uses this - import frappe.website.path_resolver # all the page types and resolver - import frappe.website.router # Website router - import frappe.website.website_generator # web page doctypes +import frappe.boot +import frappe.client +import frappe.core.doctype.file.file +import frappe.core.doctype.user.user +import frappe.database.mariadb.database # Load database related utils +import frappe.database.query +import frappe.desk.desktop # workspace +import frappe.desk.form.save +import frappe.model.db_query +import frappe.query_builder +import frappe.utils.background_jobs # Enqueue is very common +import frappe.utils.data # common utils +import frappe.utils.jinja # web page rendering +import frappe.utils.jinja_globals +import frappe.utils.redis_wrapper # Exact redis_wrapper +import frappe.utils.safe_exec +import frappe.utils.typing_validations # any whitelisted method uses this +import frappe.website.path_resolver # all the page types and resolver +import frappe.website.router # Website router +import frappe.website.website_generator # web page doctypes # end: module pre-loading @@ -519,20 +518,3 @@ def application_with_statics(): application = StaticDataMiddleware(application, {"/files": str(os.path.abspath(_sites_path))}) return application - - -# Remove references to pattern that are pre-compiled and loaded to global scopes. -re.purge() - -# Both Gunicorn and RQ use forking to spawn workers. In an ideal world, the fork should be sharing -# most of the memory if there are no writes made to data because of Copy on Write, however, -# python's GC is not CoW friendly and writes to data even if user-code doesn't. Specifically, the -# generational GC which stores and mutates every python object: `PyGC_Head` -# -# Calling gc.freeze() moves all the objects imported so far into permanant generation and hence -# doesn't mutate `PyGC_Head` -# -# Refer to issue for more info: https://github.com/frappe/frappe/issues/18927 -if frappe._tune_gc: - gc.collect() # clean up any garbage created so far before freeze - gc.freeze() diff --git a/frappe/optimizations.py b/frappe/optimizations.py new file mode 100644 index 0000000000..85f4381cea --- /dev/null +++ b/frappe/optimizations.py @@ -0,0 +1,81 @@ +import faulthandler +import gc +import io +import os +import re +import signal +import sys + + +def optimize_all(): + """Single entry point to enable all optimizations at right time automatically.""" + + # Note: + # - This function is ALWAYS executed as soon as `import frappe` ends. + # - Any deferred work should be deferred using os module's fork hooks. + # - Respect configurations using environement variables. + # - fork hooks can not be unregistered, so care should be taken to execute them only when they + # make sense. + _optimize_regex_cache() + _optimize_gc_parameters() + _optimize_gc_for_copy_on_write() + _register_fault_handler() + os.register_at_fork(after_in_child=_register_fault_handler) + + +def _optimize_gc_parameters(): + from frappe.utils import sbool + + if not bool(sbool(os.environ.get("FRAPPE_TUNE_GC", True))): + return + + # generational GC gets triggered after certain allocs (g0) which is 700 by default. + # This number is quite small for frappe where a single query can potentially create 700+ + # objects easily. + # Bump this number higher, this will make GC less aggressive but that improves performance of + # everything else. + g0, g1, g2 = gc.get_threshold() # defaults are 700, 10, 10. + gc.set_threshold(g0 * 10, g1 * 2, g2 * 2) + + +def _optimize_regex_cache(): + # Remove references to pattern that are pre-compiled and loaded to global scopes. + # Leave that cache for dynamically generated regex. + os.register_at_fork(before=re.purge) + + +def _register_fault_handler(): + # Some libraries monkey patch stderr, we need actual fd + if isinstance(sys.__stderr__, io.TextIOWrapper): + faulthandler.register(signal.SIGUSR1, file=sys.__stderr__) + + +def _optimize_gc_for_copy_on_write(): + from frappe.utils import sbool + + if not bool(sbool(os.environ.get("FRAPPE_TUNE_GC", True))): + return + + os.register_at_fork(before=_freeze_gc) + + +_gc_frozen = False + + +def _freeze_gc(): + global _gc_frozen + if _gc_frozen: + return + # Both Gunicorn and RQ use forking to spawn workers. In an ideal world, the fork should be sharing + # most of the memory if there are no writes made to data because of Copy on Write, however, + # python's GC is not CoW friendly and writes to data even if user-code doesn't. Specifically, the + # generational GC which stores and mutates every python object: `PyGC_Head` + # + # Calling gc.freeze() moves all the objects imported so far into permanant generation and hence + # doesn't mutate `PyGC_Head` + # + # Refer to issue for more info: https://github.com/frappe/frappe/issues/18927 + gc.collect() + gc.freeze() + # RQ workers constantly fork, there' no benefit in doing this in that case. + _gc_frozen = True diff --git a/frappe/utils/background_jobs.py b/frappe/utils/background_jobs.py index 8c7627f43d..d84bc468ea 100644 --- a/frappe/utils/background_jobs.py +++ b/frappe/utils/background_jobs.py @@ -298,7 +298,6 @@ def start_worker( strategy = DequeueStrategy.DEFAULT _start_sentry() - _freeze_gc() with frappe.init_site(): # empty init is required to get redis_queue from common_site_config.json @@ -365,11 +364,8 @@ def start_worker_pool( import frappe.utils.scheduler import frappe.utils.typing_validations # any whitelisted method uses this import frappe.website.path_resolver # all the page types and resolver - # end: module pre-loading - _freeze_gc() - with frappe.init_site(): redis_connection = get_redis_conn() @@ -394,12 +390,6 @@ def start_worker_pool( pool.start(logging_level=logging_level, burst=burst) -def _freeze_gc(): - if frappe._tune_gc: - gc.collect() - gc.freeze() - - def get_worker_name(queue): """When limiting worker to a specific queue, also append queue name to default worker name""" name = None