From 423e78132657b7731d272ed22903c57e061de6bf Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Sat, 24 Jun 2023 14:29:55 +0530 Subject: [PATCH 1/7] perf: Freeze GC before forking Gunicorn workers --- frappe/app.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/frappe/app.py b/frappe/app.py index ddde313ace..6883b42c51 100644 --- a/frappe/app.py +++ b/frappe/app.py @@ -1,6 +1,7 @@ # Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors # License: MIT. See LICENSE +import gc import logging import os @@ -394,3 +395,15 @@ def serve( use_evalex=not in_test_env, threaded=not no_threading, ) + + +# Both Gunicorn and RQ use forking to spawn workers. In an ideal world, the fork should be sharing +# most of the memory if there are no writes made to data because of Copy on Write, however, +# python's GC is not CoW friendly and writes to data even if user-code doesn't. Specifically, the +# generational GC which stores and mutates every python object: `PyGC_Head` +# +# Calling gc.freeze() moves all the objects imported so far into permanant generation and hence +# doesn't mutate `PyGC_Head` +# +# Refer to issue for more info: https://github.com/frappe/frappe/issues/18927 +gc.freeze() From cc43af984ed82976a681641cdad96e309af66976 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Sat, 24 Jun 2023 16:05:45 +0530 Subject: [PATCH 2/7] fix: Only import LDAP if it's enabled This is disabled on 99%+ sites, still incurs 3-4MB of memory hit on import of ldap3. --- frappe/www/login.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/frappe/www/login.py b/frappe/www/login.py index 0aa1eba0cd..7cf89e7c3c 100644 --- a/frappe/www/login.py +++ b/frappe/www/login.py @@ -5,7 +5,6 @@ import frappe import frappe.utils from frappe import _ from frappe.auth import LoginManager -from frappe.integrations.doctype.ldap_settings.ldap_settings import LDAPSettings from frappe.rate_limiter import rate_limit from frappe.utils import cint, get_url from frappe.utils.data import escape_html @@ -85,7 +84,10 @@ def get_context(context): ) context["social_login"] = True - context["ldap_settings"] = LDAPSettings.get_ldap_client_settings() + if cint(frappe.db.get_value("LDAP Settings", "LDAP Settings", "enabled")): + from frappe.integrations.doctype.ldap_settings.ldap_settings import LDAPSettings + + context["ldap_settings"] = LDAPSettings.get_ldap_client_settings() login_label = [_("Email")] From 32bd5d3e2c5fb4970293e3247e1f8d919ab1c34d Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Sat, 24 Jun 2023 16:26:22 +0530 Subject: [PATCH 3/7] Revert "perf: defer many requests imports" This reverts commit 71b44efcac4af3c7381ebe817694939e03bef8a4. This gets frequently imported from one place or another. Since with gc.freeze we can mostly reuse the import from parent, let's just leave it here. --- frappe/core/doctype/file/utils.py | 5 ++--- .../doctype/slack_webhook_url/slack_webhook_url.py | 4 ++-- frappe/integrations/doctype/webhook/webhook.py | 10 +++------- frappe/integrations/google_oauth.py | 12 ++++-------- frappe/utils/csvutils.py | 4 ++-- 5 files changed, 13 insertions(+), 22 deletions(-) diff --git a/frappe/core/doctype/file/utils.py b/frappe/core/doctype/file/utils.py index 7b44e94271..1d0d145303 100644 --- a/frappe/core/doctype/file/utils.py +++ b/frappe/core/doctype/file/utils.py @@ -7,6 +7,8 @@ from typing import TYPE_CHECKING, Optional from urllib.parse import unquote import filetype +import requests +import requests.exceptions from PIL import Image import frappe @@ -114,9 +116,6 @@ def get_local_image(file_url: str) -> tuple["ImageFile", str, str]: def get_web_image(file_url: str) -> tuple["ImageFile", str, str]: - import requests - import requests.exceptions - # download file_url = frappe.utils.get_url(file_url) r = requests.get(file_url, stream=True) diff --git a/frappe/integrations/doctype/slack_webhook_url/slack_webhook_url.py b/frappe/integrations/doctype/slack_webhook_url/slack_webhook_url.py index 4ea11aaccb..d71d7075a6 100644 --- a/frappe/integrations/doctype/slack_webhook_url/slack_webhook_url.py +++ b/frappe/integrations/doctype/slack_webhook_url/slack_webhook_url.py @@ -3,6 +3,8 @@ import json +import requests + import frappe from frappe import _ from frappe.model.document import Document @@ -22,8 +24,6 @@ class SlackWebhookURL(Document): def send_slack_message(webhook_url, message, reference_doctype, reference_name): - import requests - data = {"text": message, "attachments": []} slack_url, show_link = frappe.db.get_value( diff --git a/frappe/integrations/doctype/webhook/webhook.py b/frappe/integrations/doctype/webhook/webhook.py index 9e96870857..6fa24bfb67 100644 --- a/frappe/integrations/doctype/webhook/webhook.py +++ b/frappe/integrations/doctype/webhook/webhook.py @@ -5,10 +5,11 @@ import base64 import hashlib import hmac import json -import typing from time import sleep from urllib.parse import urlparse +import requests + import frappe from frappe import _ from frappe.model.document import Document @@ -17,9 +18,6 @@ from frappe.utils.safe_exec import get_safe_globals WEBHOOK_SECRET_HEADER = "X-Frappe-Webhook-Signature" -if typing.TYPE_CHECKING: - import requests - class Webhook(Document): def validate(self): @@ -114,8 +112,6 @@ def get_context(doc): def enqueue_webhook(doc, webhook) -> None: - import requests - webhook: Webhook = frappe.get_doc("Webhook", webhook.get("name")) headers = get_webhook_headers(doc, webhook) data = get_webhook_data(doc, webhook) @@ -158,7 +154,7 @@ def log_request( url: str, headers: dict, data: dict, - res: typing.Optional["requests.Response"] = None, + res: requests.Response | None = None, ): request_log = frappe.get_doc( { diff --git a/frappe/integrations/google_oauth.py b/frappe/integrations/google_oauth.py index 4a6f108150..8bc54e0b1d 100644 --- a/frappe/integrations/google_oauth.py +++ b/frappe/integrations/google_oauth.py @@ -2,6 +2,7 @@ import json from google.oauth2.credentials import Credentials from googleapiclient.discovery import build +from requests import get, post import frappe from frappe.utils import get_request_site_address @@ -55,8 +56,6 @@ class GoogleOAuth: frappe.throw(frappe._("Please update {} before continuing.").format(google_settings)) def authorize(self, oauth_code: str) -> dict[str, str | int]: - import requests - """Returns a dict with access and refresh token. :param oauth_code: code got back from google upon successful auhtorization @@ -74,14 +73,13 @@ class GoogleOAuth: } return handle_response( - requests.post(self.OAUTH_URL, data=data).json(), + post(self.OAUTH_URL, data=data).json(), "Google Oauth Authorization Error", "Something went wrong during the authorization.", ) def refresh_access_token(self, refresh_token: str) -> dict[str, str | int]: """Refreshes google access token using refresh token""" - import requests data = { "client_id": self.google_settings.client_id, @@ -94,7 +92,7 @@ class GoogleOAuth: } return handle_response( - requests.post(self.OAUTH_URL, data=data).json(), + post(self.OAUTH_URL, data=data).json(), "Google Oauth Access Token Refresh Error", "Something went wrong during the access token generation.", raise_err=True, @@ -160,9 +158,7 @@ def handle_response( def is_valid_access_token(access_token: str) -> bool: - import requests - - response = requests.get( + response = get( "https://oauth2.googleapis.com/tokeninfo", params={"access_token": access_token} ).json() diff --git a/frappe/utils/csvutils.py b/frappe/utils/csvutils.py index 86a0e9776f..4840c806bb 100644 --- a/frappe/utils/csvutils.py +++ b/frappe/utils/csvutils.py @@ -4,6 +4,8 @@ import csv import json from io import StringIO +import requests + import frappe from frappe import _, msgprint from frappe.utils import cint, comma_or, cstr, flt @@ -176,8 +178,6 @@ def getlink(doctype, name): def get_csv_content_from_google_sheets(url): - import requests - # https://docs.google.com/spreadsheets/d/{sheetid}}/edit#gid={gid} validate_google_sheets_url(url) # get gid, defaults to first sheet From 4f0a2e6e9c293417be9e17711d1c4985db6c684b Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Sat, 24 Jun 2023 16:33:32 +0530 Subject: [PATCH 4/7] fix: move gc.freeze behind environ variable --- frappe/app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/frappe/app.py b/frappe/app.py index 6883b42c51..af3cb89bfa 100644 --- a/frappe/app.py +++ b/frappe/app.py @@ -406,4 +406,5 @@ def serve( # doesn't mutate `PyGC_Head` # # Refer to issue for more info: https://github.com/frappe/frappe/issues/18927 -gc.freeze() +if bool(os.environ.get("FRAPPE_TUNE_GC", False)): + gc.freeze() From fbe3174914e21dffa6dc403cdec2026ed8119076 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Sat, 24 Jun 2023 16:40:49 +0530 Subject: [PATCH 5/7] perf: Bump alloc count to 7,000 for generation 0 This has overall 1-2% CPU usage reduction for little to no costs. Benefits increase when doing bulk processing with lots of objects. --- frappe/__init__.py | 12 ++++++++++++ frappe/app.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/frappe/__init__.py b/frappe/__init__.py index a4ac8111dc..998d881a13 100644 --- a/frappe/__init__.py +++ b/frappe/__init__.py @@ -11,6 +11,7 @@ be used to build database driven apps. Read the documentation: https://frappeframework.com/docs """ import functools +import gc import importlib import inspect import json @@ -57,6 +58,7 @@ re._MAXCACHE = ( 50 # reduced from default 512 given we are already maintaining this on parent worker ) +_tune_gc = bool(os.environ.get("FRAPPE_TUNE_GC", False)) if _dev_server: warnings.simplefilter("always", DeprecationWarning) @@ -2435,3 +2437,13 @@ def validate_and_sanitize_search_inputs(fn): return fn(**kwargs) return wrapper + + +if _tune_gc: + # generational GC gets triggered after certain allocs (g0) which is 700 by default. + # This number is quite small for frappe where a single query can potentially create 700+ + # objects easily. + # Bump this number higher, this will make GC less aggressive but that improves performance of + # everything else. + g0, g1, g2 = gc.get_threshold() # defaults are 700, 10, 10. + gc.set_threshold(g0 * 10, g1 * 2, g2 * 2) diff --git a/frappe/app.py b/frappe/app.py index af3cb89bfa..e031d6e84b 100644 --- a/frappe/app.py +++ b/frappe/app.py @@ -406,5 +406,5 @@ def serve( # doesn't mutate `PyGC_Head` # # Refer to issue for more info: https://github.com/frappe/frappe/issues/18927 -if bool(os.environ.get("FRAPPE_TUNE_GC", False)): +if frappe._tune_gc: gc.freeze() From 29d28a460f1e7bb14595e827b43c0ba9c57b8fe9 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Sat, 24 Jun 2023 17:06:23 +0530 Subject: [PATCH 6/7] perf: Freeze GC right before starting background worker BG worker forks are not CoW friendly. Freezing right before we start worker should lessen overall memory usage. Though this isn't useful much because at max you're sharing with 2 processes - master and horse. WorkerPool can improve this benefit a lot by forking each worker from master process and horse from forked processes. TBD when WorkerPool is out of beta. --- frappe/utils/background_jobs.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/frappe/utils/background_jobs.py b/frappe/utils/background_jobs.py index ea3ea4c191..9b2ff329df 100755 --- a/frappe/utils/background_jobs.py +++ b/frappe/utils/background_jobs.py @@ -1,3 +1,4 @@ +import gc import os import socket import time @@ -234,6 +235,9 @@ def start_worker( """Wrapper to start rq worker. Connects to redis and monitors these queues.""" DEQUEUE_STRATEGIES = {"round_robin": RoundRobinWorker, "random": RandomWorker} + if frappe._tune_gc: + gc.freeze() + with frappe.init_site(): # empty init is required to get redis_queue from common_site_config.json redis_connection = get_redis_conn(username=rq_username, password=rq_password) From 150c36c74d99e533673c1725f6e766ecf266bcf6 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Sat, 24 Jun 2023 17:36:10 +0530 Subject: [PATCH 7/7] fix: collect before freezing --- frappe/app.py | 1 + frappe/utils/background_jobs.py | 1 + 2 files changed, 2 insertions(+) diff --git a/frappe/app.py b/frappe/app.py index e031d6e84b..a698331e8e 100644 --- a/frappe/app.py +++ b/frappe/app.py @@ -407,4 +407,5 @@ def serve( # # Refer to issue for more info: https://github.com/frappe/frappe/issues/18927 if frappe._tune_gc: + gc.collect() # clean up any garbage created so far before freeze gc.freeze() diff --git a/frappe/utils/background_jobs.py b/frappe/utils/background_jobs.py index 9b2ff329df..6b0249e720 100755 --- a/frappe/utils/background_jobs.py +++ b/frappe/utils/background_jobs.py @@ -236,6 +236,7 @@ def start_worker( DEQUEUE_STRATEGIES = {"round_robin": RoundRobinWorker, "random": RandomWorker} if frappe._tune_gc: + gc.collect() gc.freeze() with frappe.init_site():