feat: Flip sitemap switch

BREAKING CHANGE
A route is added to the sitemap if no_sitemap is not set.
This PR reverses this design. Because sitemap should contain publicly accessible pages and not utility pages.
Also, having lots of utility pages on sitemap does more harm than good.
This commit is contained in:
Faris Ansari 2019-03-19 15:12:28 +05:30
parent 53ab51a73d
commit 51ece4a920
5 changed files with 57 additions and 9 deletions

View file

@ -38,7 +38,7 @@ def update_controller_context(context, controller):
if module:
# get config fields
for prop in ("base_template_path", "template", "no_cache", "no_sitemap",
for prop in ("base_template_path", "template", "no_cache", "sitemap",
"condition_field"):
if hasattr(module, prop):
context[prop] = getattr(module, prop)
@ -89,7 +89,7 @@ def build_context(context):
if ret:
context.update(ret)
for prop in ("no_cache", "no_sitemap"):
for prop in ("no_cache", "sitemap"):
if not prop in context:
context[prop] = getattr(context.doc, prop, False)

View file

@ -215,7 +215,10 @@ def get_page_info(path, app, start, basepath=None, app_path=None, fname=None):
setup_source(page_info)
# extract properties from HTML comments
load_properties(page_info)
load_properties_from_source(page_info)
# extract properties from controller attributes
load_properties_from_controller(page_info)
# if not page_info.title:
# print('no-title-for', page_info.route)
@ -285,8 +288,8 @@ def setup_index(page_info):
if os.path.exists(index_txt_path):
page_info.index = open(index_txt_path, 'r').read().splitlines()
def load_properties(page_info):
'''Load properties like no_cache, title from raw'''
def load_properties_from_source(page_info):
'''Load properties like no_cache, title from source html'''
if not page_info.title:
page_info.title = extract_title(page_info.source, page_info.route)
@ -322,7 +325,21 @@ def load_properties(page_info):
page_info.no_cache = 1
if "<!-- no-sitemap -->" in page_info.source:
page_info.no_cache = 1
page_info.sitemap = 0
if "<!-- sitemap -->" in page_info.source:
page_info.sitemap = 1
def load_properties_from_controller(page_info):
if not page_info.controller: return
module = frappe.get_module(page_info.controller)
if not module: return
for prop in ("base_template_path", "template", "no_cache",
"sitemap", "condition_field"):
if hasattr(module, prop):
page_info[prop] = getattr(module, prop)
def get_doctypes_with_web_view():
'''Return doctypes with Has Web View or set via hooks'''

View file

@ -4,6 +4,8 @@
from __future__ import unicode_literals
import frappe
sitemap = 1
def get_context(context):
context.doc = frappe.get_doc("About Us Settings", "About Us Settings")

View file

@ -3,6 +3,8 @@
from __future__ import unicode_literals
import frappe
from frappe.model.document import get_controller
from frappe.utils import get_request_site_address, get_datetime, nowdate
from frappe.website.router import get_pages, get_all_page_context_from_doctypes
from six import iteritems
@ -17,16 +19,39 @@ def get_context(context):
host = get_request_site_address()
links = []
for route, page in iteritems(get_pages()):
if not page.no_sitemap:
if page.sitemap:
links.append({
"loc": urljoin(host, quote(page.name.encode("utf-8"))),
"lastmod": nowdate()
})
for route, data in iteritems(get_all_page_context_from_doctypes()):
for route, data in iteritems(get_public_pages_from_doctypes()):
links.append({
"loc": urljoin(host, quote((route or "").encode("utf-8"))),
"lastmod": get_datetime(data.get("modified")).strftime("%Y-%m-%d")
})
return {"links":links}
def get_public_pages_from_doctypes():
'''Returns pages from doctypes that are publicly accessible'''
routes = {}
doctypes_with_web_view = [d.name for d in frappe.db.get_all('DocType', {
'has_web_view': 1,
'allow_guest_to_view': 1
})]
for doctype in doctypes_with_web_view:
controller = get_controller(doctype)
meta = frappe.get_meta(doctype)
condition_field = meta.is_published_field or controller.website.condition_field
try:
res = frappe.db.get_all(doctype, ['route', 'name', 'modified'], { condition_field: 1 })
for r in res:
routes[r.route] = {"doctype": doctype, "name": r.name, "modified": r.modified}
except Exception as e:
if not frappe.db.is_missing_column(e): raise e
return routes

View file

@ -1,5 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% for link in links %}<url><loc>{{ link.loc }}</loc><lastmod>{{ link.lastmod }}</lastmod></url>
{% for link in links %}
<url>
<loc>{{ link.loc }}</loc>
<lastmod>{{ link.lastmod }}</lastmod>
</url>
{% endfor %}
</urlset>