fix: Make sure sitemap respects robot_txt
This commit is contained in:
parent
d1f308aad3
commit
2bf82e80ba
3 changed files with 16 additions and 9 deletions
|
|
@ -39,7 +39,6 @@ global_cache_keys = (
|
|||
"domain_restricted_doctypes",
|
||||
"domain_restricted_pages",
|
||||
"information_schema:counts",
|
||||
"sitemap_routes",
|
||||
"db_tables",
|
||||
"server_script_autocompletion_items",
|
||||
) + doctype_map_keys
|
||||
|
|
|
|||
|
|
@ -367,7 +367,6 @@ def clear_cache(path=None):
|
|||
"website_generator_routes",
|
||||
"website_pages",
|
||||
"website_full_index",
|
||||
"sitemap_routes",
|
||||
"languages_with_name",
|
||||
"languages",
|
||||
):
|
||||
|
|
|
|||
|
|
@ -1,12 +1,14 @@
|
|||
# Copyright (c) 2022, Frappe Technologies Pvt. Ltd. and Contributors
|
||||
# License: MIT. See LICENSE
|
||||
|
||||
from urllib import robotparser
|
||||
from urllib.parse import quote
|
||||
|
||||
import frappe
|
||||
from frappe.model.document import get_controller
|
||||
from frappe.utils import get_url, nowdate
|
||||
from frappe.website.router import get_pages
|
||||
from frappe.utils.caching import redis_cache
|
||||
from frappe.website.router import get_doctypes_with_web_view, get_pages
|
||||
|
||||
no_cache = 1
|
||||
base_template_path = "www/sitemap.xml"
|
||||
|
|
@ -31,20 +33,24 @@ def get_context(context):
|
|||
return {"links": links}
|
||||
|
||||
|
||||
@redis_cache()
|
||||
def get_public_pages_from_doctypes():
|
||||
"""Return pages from doctypes that are publicly accessible."""
|
||||
|
||||
def get_sitemap_routes():
|
||||
routes = {}
|
||||
doctypes_with_web_view = frappe.get_all(
|
||||
"DocType",
|
||||
filters={"has_web_view": True, "allow_guest_to_view": True},
|
||||
pluck="name",
|
||||
)
|
||||
doctypes_with_web_view = get_doctypes_with_web_view()
|
||||
rp = None
|
||||
if robots_txt := frappe.db.get_single_value("Website Settings", "robots_txt"):
|
||||
rp = robotparser.RobotFileParser()
|
||||
rp.parse(robots_txt.splitlines())
|
||||
|
||||
for doctype in doctypes_with_web_view:
|
||||
controller = get_controller(doctype)
|
||||
meta = frappe.get_meta(doctype)
|
||||
if not meta.allow_guest_to_view:
|
||||
continue
|
||||
|
||||
condition_field = meta.is_published_field or controller.website.condition_field
|
||||
|
||||
if not condition_field:
|
||||
|
|
@ -61,6 +67,9 @@ def get_public_pages_from_doctypes():
|
|||
raise e
|
||||
|
||||
for r in res:
|
||||
if rp and not rp.can_fetch("*", f"/{r.route}"):
|
||||
continue
|
||||
|
||||
routes[r.route] = {
|
||||
"doctype": doctype,
|
||||
"name": r.name,
|
||||
|
|
@ -69,4 +78,4 @@ def get_public_pages_from_doctypes():
|
|||
|
||||
return routes
|
||||
|
||||
return frappe.cache.get_value("sitemap_routes", get_sitemap_routes)
|
||||
return get_sitemap_routes()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue