Not all single doctypes are settings, so this is better. Implicit caching is fine, same is done for `db` APIs on singles. We *should* aim for 100% correctness of caching implementation, especially for singles. Thanks to @netchampfaris for the suggestion.
80 lines
2 KiB
Python
80 lines
2 KiB
Python
# Copyright (c) 2022, Frappe Technologies Pvt. Ltd. and Contributors
|
|
# License: MIT. See LICENSE
|
|
|
|
from urllib import robotparser
|
|
from urllib.parse import quote
|
|
|
|
import frappe
|
|
from frappe.model.document import get_controller
|
|
from frappe.utils import get_url, nowdate
|
|
from frappe.utils.caching import redis_cache
|
|
from frappe.website.router import get_doctypes_with_web_view, get_pages
|
|
|
|
no_cache = 1
|
|
base_template_path = "www/sitemap.xml"
|
|
|
|
|
|
def get_context(context):
|
|
"""generate the sitemap XML"""
|
|
links = [
|
|
{"loc": get_url(quote(page.name.encode("utf-8"))), "lastmod": nowdate()}
|
|
for route, page in get_pages().items()
|
|
if page.sitemap
|
|
]
|
|
|
|
links.extend(
|
|
{
|
|
"loc": get_url(quote((route or "").encode("utf-8"))),
|
|
"lastmod": f"{data['modified']:%Y-%m-%d}",
|
|
}
|
|
for route, data in get_public_pages_from_doctypes().items()
|
|
)
|
|
|
|
return {"links": links}
|
|
|
|
|
|
@redis_cache(ttl=6 * 60 * 60)
|
|
def get_public_pages_from_doctypes():
|
|
"""Return pages from doctypes that are publicly accessible."""
|
|
|
|
routes = {}
|
|
doctypes_with_web_view = get_doctypes_with_web_view()
|
|
|
|
robot_parser_instance = None
|
|
if robots_txt := frappe.get_single_value("Website Settings", "robots_txt"):
|
|
robot_parser_instance = robotparser.RobotFileParser()
|
|
robot_parser_instance.parse(robots_txt.splitlines())
|
|
|
|
for doctype in doctypes_with_web_view:
|
|
controller = get_controller(doctype)
|
|
meta = frappe.get_meta(doctype)
|
|
|
|
if not meta.allow_guest_to_view:
|
|
continue
|
|
|
|
condition_field = meta.is_published_field or controller.website.condition_field
|
|
|
|
if not condition_field:
|
|
continue
|
|
|
|
try:
|
|
res = frappe.get_all(
|
|
doctype,
|
|
fields=["route", "name", "modified"],
|
|
filters={condition_field: True},
|
|
)
|
|
except Exception as e:
|
|
if not frappe.db.is_missing_column(e):
|
|
raise e
|
|
|
|
for r in res:
|
|
if robot_parser_instance and not robot_parser_instance.can_fetch("*", f"/{r.route}"):
|
|
continue
|
|
|
|
routes[r.route] = {
|
|
"doctype": doctype,
|
|
"name": r.name,
|
|
"modified": r.modified,
|
|
}
|
|
|
|
return routes
|