From cbe673255bb1900a6a6c4900f9810d8e0f269839 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Thu, 14 Oct 2021 22:38:34 +0530 Subject: [PATCH 1/4] fix(ux): allow fuzzy search in website search FuzzyTerm allows edit distance based fuzzy searching. This means "webho" or "wabhool" will match "webhook". --- frappe/search/full_text_search.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/frappe/search/full_text_search.py b/frappe/search/full_text_search.py index 560ad55bf3..ebe9390c38 100644 --- a/frappe/search/full_text_search.py +++ b/frappe/search/full_text_search.py @@ -7,7 +7,7 @@ from frappe.utils import update_progress_bar from whoosh.index import create_in, open_dir, EmptyIndexError from whoosh.fields import TEXT, ID, Schema from whoosh.qparser import MultifieldParser, FieldsPlugin, WildcardPlugin -from whoosh.query import Prefix +from whoosh.query import Prefix, FuzzyTerm from whoosh.writing import AsyncWriter @@ -121,7 +121,7 @@ class FullTextSearch: out = [] with ix.searcher() as searcher: - parser = MultifieldParser(["title", "content"], ix.schema) + parser = MultifieldParser(["title", "content"], ix.schema, termclass=FuzzyTermExtended) parser.remove_plugin_class(FieldsPlugin) parser.remove_plugin_class(WildcardPlugin) query = parser.parse(text) @@ -136,5 +136,13 @@ class FullTextSearch: return out + +class FuzzyTermExtended(FuzzyTerm): + def __init__(self, fieldname, text, boost=1.0, maxdist=2, prefixlength=1, + constantscore=True): + super().__init__(fieldname, text, boost=boost, maxdist=maxdist, + prefixlength=prefixlength, constantscore=constantscore) + + def get_index_path(index_name): return frappe.get_site_path("indexes", index_name) From 56f5b3d1f32af727e574159be4e325ff663cd32d Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Tue, 19 Oct 2021 16:40:30 +0530 Subject: [PATCH 2/4] test: make global search tests less flaky --- frappe/search/test_full_text_search.py | 2 +- frappe/tests/test_global_search.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/frappe/search/test_full_text_search.py b/frappe/search/test_full_text_search.py index 348a0ec72a..0dbc7e775b 100644 --- a/frappe/search/test_full_text_search.py +++ b/frappe/search/test_full_text_search.py @@ -125,4 +125,4 @@ def get_documents(): deploy business applications with Rich Admin Interface. CommonSearchTerm""" }) - return docs \ No newline at end of file + return docs diff --git a/frappe/tests/test_global_search.py b/frappe/tests/test_global_search.py index 41d6427b77..9a86baa4e5 100644 --- a/frappe/tests/test_global_search.py +++ b/frappe/tests/test_global_search.py @@ -88,13 +88,13 @@ class TestGlobalSearch(unittest.TestCase): event = frappe.get_doc('Event', event_name) test_subject = event.subject results = global_search.search(test_subject) - self.assertEqual(len(results), 1) + self.assertTrue(any(r["name"] == event_name for r in results), msg="Failed to search document by exact name") frappe.delete_doc('Event', event_name) global_search.sync_global_search() results = global_search.search(test_subject) - self.assertEqual(len(results), 0) + self.assertTrue(all(r["name"] != event_name for r in results), msg="Deleted documents appearing in global search.") def test_insert_child_table(self): frappe.db.delete("Event") From 4ef4ecdf01945a0f38c77dd98a525bc35079f03e Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Sun, 24 Oct 2021 15:39:50 +0530 Subject: [PATCH 3/4] fix: dont hardcode search fields FullTextSearch uses name and content WebsiteSearch uses title and content Tests were failing because of hardcoded fieldnames which can't be overridden by inheriting class without rewriting search function. Made a separate function for defining search fields. --- frappe/search/full_text_search.py | 5 ++++- frappe/search/website_search.py | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/frappe/search/full_text_search.py b/frappe/search/full_text_search.py index ebe9390c38..23759ab785 100644 --- a/frappe/search/full_text_search.py +++ b/frappe/search/full_text_search.py @@ -23,6 +23,9 @@ class FullTextSearch: def get_schema(self): return Schema(name=ID(stored=True), content=TEXT(stored=True)) + def get_fields_to_search(self): + return ["name", "content"] + def get_id(self): return "name" @@ -121,7 +124,7 @@ class FullTextSearch: out = [] with ix.searcher() as searcher: - parser = MultifieldParser(["title", "content"], ix.schema, termclass=FuzzyTermExtended) + parser = MultifieldParser(self.get_fields_to_search(), ix.schema, termclass=FuzzyTermExtended) parser.remove_plugin_class(FieldsPlugin) parser.remove_plugin_class(WildcardPlugin) query = parser.parse(text) diff --git a/frappe/search/website_search.py b/frappe/search/website_search.py index 0bc06d1a9b..30eadae6f1 100644 --- a/frappe/search/website_search.py +++ b/frappe/search/website_search.py @@ -21,6 +21,9 @@ class WebsiteSearch(FullTextSearch): title=TEXT(stored=True), path=ID(stored=True), content=TEXT(stored=True) ) + def get_fields_to_search(self): + return ["title", "content"] + def get_id(self): return "path" From 99a6874affd1fd61ae093be3f6f72920d317babc Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Sun, 24 Oct 2021 15:58:56 +0530 Subject: [PATCH 4/4] feat: apply boosts on search queries This gives higher priority to title and match over content. --- frappe/search/full_text_search.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/frappe/search/full_text_search.py b/frappe/search/full_text_search.py index 23759ab785..1d4f3fef32 100644 --- a/frappe/search/full_text_search.py +++ b/frappe/search/full_text_search.py @@ -123,8 +123,15 @@ class FullTextSearch: results = None out = [] + search_fields = self.get_fields_to_search() + fieldboosts = {} + + # apply reducing boost on fields based on order. 1.0, 0.5, 0.33 and so on + for idx, field in enumerate(search_fields, start=1): + fieldboosts[field] = 1.0 / idx + with ix.searcher() as searcher: - parser = MultifieldParser(self.get_fields_to_search(), ix.schema, termclass=FuzzyTermExtended) + parser = MultifieldParser(search_fields, ix.schema, termclass=FuzzyTermExtended, fieldboosts=fieldboosts) parser.remove_plugin_class(FieldsPlugin) parser.remove_plugin_class(WildcardPlugin) query = parser.parse(text)