Merge pull request #14463 from ankush/fuzzy_website_search

feat: allow fuzzy search in website search
This commit is contained in:
mergify[bot] 2021-10-25 06:57:33 +00:00 committed by GitHub
commit 92c60983cc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 26 additions and 5 deletions

View file

@ -7,7 +7,7 @@ from frappe.utils import update_progress_bar
from whoosh.index import create_in, open_dir, EmptyIndexError
from whoosh.fields import TEXT, ID, Schema
from whoosh.qparser import MultifieldParser, FieldsPlugin, WildcardPlugin
from whoosh.query import Prefix
from whoosh.query import Prefix, FuzzyTerm
from whoosh.writing import AsyncWriter
@ -23,6 +23,9 @@ class FullTextSearch:
def get_schema(self):
return Schema(name=ID(stored=True), content=TEXT(stored=True))
def get_fields_to_search(self):
return ["name", "content"]
def get_id(self):
return "name"
@ -120,8 +123,15 @@ class FullTextSearch:
results = None
out = []
search_fields = self.get_fields_to_search()
fieldboosts = {}
# apply reducing boost on fields based on order. 1.0, 0.5, 0.33 and so on
for idx, field in enumerate(search_fields, start=1):
fieldboosts[field] = 1.0 / idx
with ix.searcher() as searcher:
parser = MultifieldParser(["title", "content"], ix.schema)
parser = MultifieldParser(search_fields, ix.schema, termclass=FuzzyTermExtended, fieldboosts=fieldboosts)
parser.remove_plugin_class(FieldsPlugin)
parser.remove_plugin_class(WildcardPlugin)
query = parser.parse(text)
@ -136,5 +146,13 @@ class FullTextSearch:
return out
class FuzzyTermExtended(FuzzyTerm):
def __init__(self, fieldname, text, boost=1.0, maxdist=2, prefixlength=1,
constantscore=True):
super().__init__(fieldname, text, boost=boost, maxdist=maxdist,
prefixlength=prefixlength, constantscore=constantscore)
def get_index_path(index_name):
return frappe.get_site_path("indexes", index_name)

View file

@ -125,4 +125,4 @@ def get_documents():
deploy business applications with Rich Admin Interface. CommonSearchTerm"""
})
return docs
return docs

View file

@ -21,6 +21,9 @@ class WebsiteSearch(FullTextSearch):
title=TEXT(stored=True), path=ID(stored=True), content=TEXT(stored=True)
)
def get_fields_to_search(self):
return ["title", "content"]
def get_id(self):
return "path"

View file

@ -88,13 +88,13 @@ class TestGlobalSearch(unittest.TestCase):
event = frappe.get_doc('Event', event_name)
test_subject = event.subject
results = global_search.search(test_subject)
self.assertEqual(len(results), 1)
self.assertTrue(any(r["name"] == event_name for r in results), msg="Failed to search document by exact name")
frappe.delete_doc('Event', event_name)
global_search.sync_global_search()
results = global_search.search(test_subject)
self.assertEqual(len(results), 0)
self.assertTrue(all(r["name"] != event_name for r in results), msg="Deleted documents appearing in global search.")
def test_insert_child_table(self):
frappe.db.delete("Event")