seitime-frappe/frappe/search/full_text_search.py
Gavin D'souza 3446026555 chore: Update header: license.txt => LICENSE
The license.txt file has been replaced with LICENSE for quite a while
now. INAL but it didn't seem accurate to say "hey, checkout license.txt
although there's no such file". Apart from this, there were
inconsistencies in the headers altogether...this change brings
consistency.
2021-09-03 12:02:59 +05:30

138 lines
No EOL
3.6 KiB
Python

# Copyright (c) 2020, Frappe Technologies Pvt. Ltd. and Contributors
# License: MIT. See LICENSE
import frappe
from frappe.utils import update_progress_bar
from whoosh.index import create_in, open_dir, EmptyIndexError
from whoosh.fields import TEXT, ID, Schema
from whoosh.qparser import MultifieldParser, FieldsPlugin, WildcardPlugin
from whoosh.query import Prefix
class FullTextSearch:
""" Frappe Wrapper for Whoosh """
def __init__(self, index_name):
self.index_name = index_name
self.index_path = get_index_path(index_name)
self.schema = self.get_schema()
self.id = self.get_id()
def get_schema(self):
return Schema(name=ID(stored=True), content=TEXT(stored=True))
def get_id(self):
return "name"
def get_items_to_index(self):
"""Get all documents to be indexed conforming to the schema"""
return []
def get_document_to_index(self):
return {}
def build(self):
""" Build search index for all documents """
self.documents = self.get_items_to_index()
self.build_index()
def update_index_by_name(self, doc_name):
"""Wraps `update_index` method, gets the document from name
and updates the index. This function changes the current user
and should only be run as administrator or in a background job.
Args:
self (object): FullTextSearch Instance
doc_name (str): name of the document to be updated
"""
document = self.get_document_to_index(doc_name)
if document:
self.update_index(document)
def remove_document_from_index(self, doc_name):
"""Remove document from search index
Args:
self (object): FullTextSearch Instance
doc_name (str): name of the document to be removed
"""
if not doc_name:
return
ix = self.get_index()
with ix.searcher():
writer = ix.writer()
writer.delete_by_term(self.id, doc_name)
writer.commit(optimize=True)
def update_index(self, document):
"""Update search index for a document
Args:
self (object): FullTextSearch Instance
document (_dict): A dictionary with title, path and content
"""
ix = self.get_index()
with ix.searcher():
writer = ix.writer()
writer.delete_by_term(self.id, document[self.id])
writer.add_document(**document)
writer.commit(optimize=True)
def get_index(self):
try:
return open_dir(self.index_path)
except EmptyIndexError:
return self.create_index()
def create_index(self):
frappe.create_folder(self.index_path)
return create_in(self.index_path, self.schema)
def build_index(self):
"""Build index for all parsed documents"""
ix = self.create_index()
writer = ix.writer()
for i, document in enumerate(self.documents):
if document:
writer.add_document(**document)
update_progress_bar("Building Index", i, len(self.documents))
writer.commit(optimize=True)
def search(self, text, scope=None, limit=20):
"""Search from the current index
Args:
text (str): String to search for
scope (str, optional): Scope to limit the search. Defaults to None.
limit (int, optional): Limit number of search results. Defaults to 20.
Returns:
[List(_dict)]: Search results
"""
ix = self.get_index()
results = None
out = []
with ix.searcher() as searcher:
parser = MultifieldParser(["title", "content"], ix.schema)
parser.remove_plugin_class(FieldsPlugin)
parser.remove_plugin_class(WildcardPlugin)
query = parser.parse(text)
filter_scoped = None
if scope:
filter_scoped = Prefix(self.id, scope)
results = searcher.search(query, limit=limit, filter=filter_scoped)
for r in results:
out.append(self.parse_result(r))
return out
def get_index_path(index_name):
return frappe.get_site_path("indexes", index_name)