From 51ece4a920f5e34f090f6ec49a9d89dbda69f430 Mon Sep 17 00:00:00 2001 From: Faris Ansari Date: Tue, 19 Mar 2019 15:12:28 +0530 Subject: [PATCH] feat: Flip sitemap switch BREAKING CHANGE A route is added to the sitemap if no_sitemap is not set. This PR reverses this design. Because sitemap should contain publicly accessible pages and not utility pages. Also, having lots of utility pages on sitemap does more harm than good. --- frappe/website/context.py | 4 ++-- frappe/website/router.py | 25 +++++++++++++++++++++---- frappe/www/about.py | 2 ++ frappe/www/sitemap.py | 29 +++++++++++++++++++++++++++-- frappe/www/sitemap.xml | 6 +++++- 5 files changed, 57 insertions(+), 9 deletions(-) diff --git a/frappe/website/context.py b/frappe/website/context.py index d499fa6dd7..cd31398fe9 100644 --- a/frappe/website/context.py +++ b/frappe/website/context.py @@ -38,7 +38,7 @@ def update_controller_context(context, controller): if module: # get config fields - for prop in ("base_template_path", "template", "no_cache", "no_sitemap", + for prop in ("base_template_path", "template", "no_cache", "sitemap", "condition_field"): if hasattr(module, prop): context[prop] = getattr(module, prop) @@ -89,7 +89,7 @@ def build_context(context): if ret: context.update(ret) - for prop in ("no_cache", "no_sitemap"): + for prop in ("no_cache", "sitemap"): if not prop in context: context[prop] = getattr(context.doc, prop, False) diff --git a/frappe/website/router.py b/frappe/website/router.py index 7f7df4fbfd..377bd92d97 100644 --- a/frappe/website/router.py +++ b/frappe/website/router.py @@ -215,7 +215,10 @@ def get_page_info(path, app, start, basepath=None, app_path=None, fname=None): setup_source(page_info) # extract properties from HTML comments - load_properties(page_info) + load_properties_from_source(page_info) + + # extract properties from controller attributes + load_properties_from_controller(page_info) # if not page_info.title: # print('no-title-for', page_info.route) @@ -285,8 +288,8 @@ def setup_index(page_info): if os.path.exists(index_txt_path): page_info.index = open(index_txt_path, 'r').read().splitlines() -def load_properties(page_info): - '''Load properties like no_cache, title from raw''' +def load_properties_from_source(page_info): + '''Load properties like no_cache, title from source html''' if not page_info.title: page_info.title = extract_title(page_info.source, page_info.route) @@ -322,7 +325,21 @@ def load_properties(page_info): page_info.no_cache = 1 if "" in page_info.source: - page_info.no_cache = 1 + page_info.sitemap = 0 + + if "" in page_info.source: + page_info.sitemap = 1 + +def load_properties_from_controller(page_info): + if not page_info.controller: return + + module = frappe.get_module(page_info.controller) + if not module: return + + for prop in ("base_template_path", "template", "no_cache", + "sitemap", "condition_field"): + if hasattr(module, prop): + page_info[prop] = getattr(module, prop) def get_doctypes_with_web_view(): '''Return doctypes with Has Web View or set via hooks''' diff --git a/frappe/www/about.py b/frappe/www/about.py index c8b5d4ab3c..0c922a6536 100644 --- a/frappe/www/about.py +++ b/frappe/www/about.py @@ -4,6 +4,8 @@ from __future__ import unicode_literals import frappe +sitemap = 1 + def get_context(context): context.doc = frappe.get_doc("About Us Settings", "About Us Settings") diff --git a/frappe/www/sitemap.py b/frappe/www/sitemap.py index bd1d02e26d..4fc2f6957d 100644 --- a/frappe/www/sitemap.py +++ b/frappe/www/sitemap.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals +import frappe +from frappe.model.document import get_controller from frappe.utils import get_request_site_address, get_datetime, nowdate from frappe.website.router import get_pages, get_all_page_context_from_doctypes from six import iteritems @@ -17,16 +19,39 @@ def get_context(context): host = get_request_site_address() links = [] for route, page in iteritems(get_pages()): - if not page.no_sitemap: + if page.sitemap: links.append({ "loc": urljoin(host, quote(page.name.encode("utf-8"))), "lastmod": nowdate() }) - for route, data in iteritems(get_all_page_context_from_doctypes()): + for route, data in iteritems(get_public_pages_from_doctypes()): links.append({ "loc": urljoin(host, quote((route or "").encode("utf-8"))), "lastmod": get_datetime(data.get("modified")).strftime("%Y-%m-%d") }) return {"links":links} + +def get_public_pages_from_doctypes(): + '''Returns pages from doctypes that are publicly accessible''' + routes = {} + doctypes_with_web_view = [d.name for d in frappe.db.get_all('DocType', { + 'has_web_view': 1, + 'allow_guest_to_view': 1 + })] + + for doctype in doctypes_with_web_view: + controller = get_controller(doctype) + meta = frappe.get_meta(doctype) + condition_field = meta.is_published_field or controller.website.condition_field + + try: + res = frappe.db.get_all(doctype, ['route', 'name', 'modified'], { condition_field: 1 }) + for r in res: + routes[r.route] = {"doctype": doctype, "name": r.name, "modified": r.modified} + + except Exception as e: + if not frappe.db.is_missing_column(e): raise e + + return routes \ No newline at end of file diff --git a/frappe/www/sitemap.xml b/frappe/www/sitemap.xml index 64905c2f5f..1fa59c2174 100644 --- a/frappe/www/sitemap.xml +++ b/frappe/www/sitemap.xml @@ -1,5 +1,9 @@ - {% for link in links %}{{ link.loc }}{{ link.lastmod }} + {% for link in links %} + + {{ link.loc }} + {{ link.lastmod }} + {% endfor %} \ No newline at end of file