perf: optimize scheduling

- Randomize scheduling order
- Cache dormant status for 1 hour (this gets checked every second, too much work)
This commit is contained in:
Ankush Menat 2024-05-26 21:17:42 +05:30
parent 433e7281f7
commit 59bbbd7b56
3 changed files with 8 additions and 2 deletions

View file

@ -77,6 +77,7 @@ class TestScheduler(TestCase):
job_log.db_set(
"creation", add_days(_get_last_creation_timestamp("Activity Log"), 5), update_modified=False
)
schedule_jobs_based_on_activity.clear_cache()
# inactive site with recent job, don't run
self.assertFalse(

View file

@ -358,8 +358,9 @@ def start_worker_pool(
# If gc.freeze is done then importing modules before forking allows us to share the memory
import frappe.database.query # sqlparse and indirect imports
import frappe.query_builder # pypika
import frappe.utils.data # common utils
import frappe.utils # common utils
import frappe.utils.safe_exec
import frappe.utils.scheduler
import frappe.utils.typing_validations # any whitelisted method uses this
import frappe.website.path_resolver # all the page types and resolver

View file

@ -20,6 +20,7 @@ from filelock import FileLock, Timeout
import frappe
from frappe.utils import cint, get_bench_path, get_datetime, get_sites, now_datetime
from frappe.utils.background_jobs import set_niceness
from frappe.utils.caching import redis_cache
DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
@ -101,7 +102,9 @@ def enqueue_events_for_site(site: str) -> None:
def enqueue_events() -> list[str] | None:
if schedule_jobs_based_on_activity():
enqueued_jobs = []
for job_type in frappe.get_all("Scheduled Job Type", filters={"stopped": 0}, fields="*"):
all_jobs = frappe.get_all("Scheduled Job Type", filters={"stopped": 0}, fields="*")
random.shuffle(all_jobs)
for job_type in all_jobs:
job_type = frappe.get_doc(doctype="Scheduled Job Type", **job_type)
try:
if job_type.enqueue():
@ -158,6 +161,7 @@ def disable_scheduler():
toggle_scheduler(False)
@redis_cache(ttl=60 * 60)
def schedule_jobs_based_on_activity(check_time=None):
"""Return True for active sites as defined by `Activity Log`.
Also return True for inactive sites once every 24 hours based on `Scheduled Job Log`."""