diff --git a/frappe/search/full_text_search.py b/frappe/search/full_text_search.py index 560ad55bf3..1d4f3fef32 100644 --- a/frappe/search/full_text_search.py +++ b/frappe/search/full_text_search.py @@ -7,7 +7,7 @@ from frappe.utils import update_progress_bar from whoosh.index import create_in, open_dir, EmptyIndexError from whoosh.fields import TEXT, ID, Schema from whoosh.qparser import MultifieldParser, FieldsPlugin, WildcardPlugin -from whoosh.query import Prefix +from whoosh.query import Prefix, FuzzyTerm from whoosh.writing import AsyncWriter @@ -23,6 +23,9 @@ class FullTextSearch: def get_schema(self): return Schema(name=ID(stored=True), content=TEXT(stored=True)) + def get_fields_to_search(self): + return ["name", "content"] + def get_id(self): return "name" @@ -120,8 +123,15 @@ class FullTextSearch: results = None out = [] + search_fields = self.get_fields_to_search() + fieldboosts = {} + + # apply reducing boost on fields based on order. 1.0, 0.5, 0.33 and so on + for idx, field in enumerate(search_fields, start=1): + fieldboosts[field] = 1.0 / idx + with ix.searcher() as searcher: - parser = MultifieldParser(["title", "content"], ix.schema) + parser = MultifieldParser(search_fields, ix.schema, termclass=FuzzyTermExtended, fieldboosts=fieldboosts) parser.remove_plugin_class(FieldsPlugin) parser.remove_plugin_class(WildcardPlugin) query = parser.parse(text) @@ -136,5 +146,13 @@ class FullTextSearch: return out + +class FuzzyTermExtended(FuzzyTerm): + def __init__(self, fieldname, text, boost=1.0, maxdist=2, prefixlength=1, + constantscore=True): + super().__init__(fieldname, text, boost=boost, maxdist=maxdist, + prefixlength=prefixlength, constantscore=constantscore) + + def get_index_path(index_name): return frappe.get_site_path("indexes", index_name) diff --git a/frappe/search/test_full_text_search.py b/frappe/search/test_full_text_search.py index 348a0ec72a..0dbc7e775b 100644 --- a/frappe/search/test_full_text_search.py +++ b/frappe/search/test_full_text_search.py @@ -125,4 +125,4 @@ def get_documents(): deploy business applications with Rich Admin Interface. CommonSearchTerm""" }) - return docs \ No newline at end of file + return docs diff --git a/frappe/search/website_search.py b/frappe/search/website_search.py index 0bc06d1a9b..30eadae6f1 100644 --- a/frappe/search/website_search.py +++ b/frappe/search/website_search.py @@ -21,6 +21,9 @@ class WebsiteSearch(FullTextSearch): title=TEXT(stored=True), path=ID(stored=True), content=TEXT(stored=True) ) + def get_fields_to_search(self): + return ["title", "content"] + def get_id(self): return "path" diff --git a/frappe/tests/test_global_search.py b/frappe/tests/test_global_search.py index 41d6427b77..9a86baa4e5 100644 --- a/frappe/tests/test_global_search.py +++ b/frappe/tests/test_global_search.py @@ -88,13 +88,13 @@ class TestGlobalSearch(unittest.TestCase): event = frappe.get_doc('Event', event_name) test_subject = event.subject results = global_search.search(test_subject) - self.assertEqual(len(results), 1) + self.assertTrue(any(r["name"] == event_name for r in results), msg="Failed to search document by exact name") frappe.delete_doc('Event', event_name) global_search.sync_global_search() results = global_search.search(test_subject) - self.assertEqual(len(results), 0) + self.assertTrue(all(r["name"] != event_name for r in results), msg="Deleted documents appearing in global search.") def test_insert_child_table(self): frappe.db.delete("Event")