feat: SQLite FTS5 search framework for Frappe apps (#33359)

- Abstract SQLiteSearch base class with full-text search - Spelling correction, recency boosting, and custom scoring - Supports search filtering and configurable document indexing - hooks for auto-indexing - build index after migrate - build index (if not exists) every 15 mins - update doc index on_update - remove doc index on_trash
2025-07-29 11:18:56 +05:30 · 2025-07-29 11:18:56 +05:30 · 2676c9c2ec
commit 2676c9c2ec
parent 761751f269
5 changed files with 2402 additions and 1 deletions
--- a/frappe/hooks.py
+++ b/frappe/hooks.py
@ -154,6 +154,7 @@ doc_events = {
 			"frappe.automation.doctype.assignment_rule.assignment_rule.update_due_date",
 			"frappe.core.doctype.user_type.user_type.apply_permissions_for_non_standard_user_type",
 			"frappe.core.doctype.permission_log.permission_log.make_perm_log",
+			"frappe.search.sqlite_search.update_doc_index",
 		],
 		"after_rename": "frappe.desk.notifications.clear_doctype_notifications",
 		"on_cancel": [
@ -164,6 +165,7 @@ doc_events = {
 		"on_trash": [
 			"frappe.desk.notifications.clear_doctype_notifications",
 			"frappe.workflow.doctype.workflow_action.workflow_action.process_workflow_actions",
+			"frappe.search.sqlite_search.delete_doc_index",
 		],
 		"on_update_after_submit": [
 			"frappe.workflow.doctype.workflow_action.workflow_action.process_workflow_actions",
@ -206,6 +208,7 @@ scheduler_events = {
 			"frappe.deferred_insert.save_to_db",
 			"frappe.automation.doctype.reminder.reminder.send_reminders",
 			"frappe.model.utils.link_count.update_link_count",
+			"frappe.search.sqlite_search.build_index_if_not_exists",
 		],
 		# 10 minutes
 		"0/10 * * * *": [
@ -278,7 +281,10 @@ setup_wizard_exception = [
 ]

 before_migrate = ["frappe.core.doctype.patch_log.patch_log.before_migrate"]
-after_migrate = ["frappe.website.doctype.website_theme.website_theme.after_migrate"]
+after_migrate = [
+	"frappe.website.doctype.website_theme.website_theme.after_migrate",
+	"frappe.search.sqlite_search.build_index_in_background",
+]

 otp_methods = ["OTP App", "Email", "SMS"]

--- a/frappe/search/init.py
+++ b/frappe/search/init.py
@ -3,6 +3,7 @@

 import frappe
 from frappe.search.full_text_search import FullTextSearch
+from frappe.search.sqlite_search import SQLiteSearch
 from frappe.search.website_search import WebsiteSearch
 from frappe.utils import cint

--- a/frappe/search/sqlite_search.md
+++ b/frappe/search/sqlite_search.md
@ -0,0 +1,470 @@
+# SQLite Search Framework
+
+SQLite Search is a full-text search framework for Frappe applications that provides advanced search capabilities using SQLite's FTS5 (Full-Text Search) engine. It offers features like spelling correction, time-based recency scoring, custom ranking, permission-aware filtering, and extensible scoring pipelines.
+
+## Table of Contents
+
+- [Quick Start](#quick-start)
+- [How It Works](#how-it-works)
+- [Configuration](#configuration)
+- [Features & Customization](#features--customization)
+- [API Reference](#api-reference)
+
+## Quick Start
+
+### 1. Create a Search Class
+
+Create a search implementation by extending `SQLiteSearch`:
+
+```python
+# my_app/search.py
+from frappe.search.sqlite_search import SQLiteSearch
+
+class MyAppSearch(SQLiteSearch):
+    # Database file name
+    INDEX_NAME = "my_app_search.db"
+
+    # Define the search schema
+    INDEX_SCHEMA = {
+        "metadata_fields": ["project", "owner", "status"],
+        "tokenizer": "unicode61 remove_diacritics 2 tokenchars '-_'",
+    }
+
+    # Define which doctypes to index and their field mappings
+    INDEXABLE_DOCTYPES = {
+        "Task": {
+            "fields": ["name", {"title": "subject"}, {"content": "description"}, "modified", "project", "owner", "status"],
+        },
+        "Issue": {
+            "fields": ["name", "title", "description", {"modified": "last_updated"}, "project", "owner"],
+            "filters": {"status": ("!=", "Closed")},  # Only index non-closed issues
+        },
+    }
+
+    def get_search_filters(self):
+        """Return permission filters for current user"""
+        # Get projects accessible to current user
+        accessible_projects = frappe.get_all(
+            "Project",
+            filters={"owner": frappe.session.user},
+            pluck="name"
+        )
+
+        if not accessible_projects:
+            return {"project": []}  # No access
+
+        return {"project": accessible_projects}
+```
+
+### 2. Register the Search Class
+
+Add your search class to hooks.py:
+
+```python
+# my_app/hooks.py
+sqlite_search = ['my_app.search.MyAppSearch']
+```
+
+### 3. Create API Endpoint
+
+Create a whitelisted method to expose search functionality:
+
+```python
+# my_app/api.py
+import frappe
+from my_app.search import MyAppSearch
+
+@frappe.whitelist()
+def search(query, filters=None):
+    search = MyAppSearch()
+    result = search.search(query, filters=filters)
+
+    return result
+```
+
+### 4. Build the Index
+
+Build the search index programmatically or via console:
+
+```python
+from my_app.search import MyAppSearch
+search = MyAppSearch()
+search.build_index()
+```
+
+## How It Works
+
+### 1. Indexing Process
+
+#### Full Index Building
+
+When you call `build_index()`, the framework performs a complete index rebuild:
+
+1. **Database Preparation**: Creates a temporary SQLite database with FTS5 tables configured according to your schema
+2. **Document Collection**: Queries all specified doctypes using the configured field mappings and filters
+3. **Document Processing**: For each document:
+   - Extracts and maps fields according to `INDEXABLE_DOCTYPES` configuration
+   - Cleans HTML content using BeautifulSoup to extract plain text
+   - Applies custom document preparation logic if `prepare_document()` is overridden
+   - Validates required fields (title, content) are present
+4. **Batch Insertion**: Inserts processed documents into the FTS5 index in batches for performance
+5. **Vocabulary Building**: Constructs a spelling correction dictionary from all indexed text
+6. **Atomic Replacement**: Replaces the existing index database with the new one atomically
+
+#### Individual Document Indexing
+
+For real-time updates using `index_doc()` or `remove_doc()`:
+
+1. **Single Document Processing**: Retrieves and processes one document using the same field mapping logic
+2. **Incremental Update**: Updates the existing FTS5 index by inserting, updating, or deleting the specific document
+3. **Vocabulary Update**: Updates the spelling dictionary with new terms from the document
+
+### 2. Search Process
+
+When a user performs a search using `search()`, the framework executes these steps:
+
+1. **Permission Filtering**: Calls `get_search_filters()` to determine what documents the current user can access
+2. **Query Preprocessing**:
+   - Validates the search query is not empty
+   - Combines user-provided filters with permission filters
+3. **Spelling Correction**:
+   - Analyzes query terms against the vocabulary dictionary
+   - Uses trigram similarity to suggest corrections for misspelled words
+   - Expands the original query with corrected terms
+4. **FTS5 Query Execution**:
+   - Constructs an FTS5-compatible query string
+   - Executes the full-text search against the SQLite database
+   - Applies metadata filters (status, owner, project, etc.)
+   - Retrieves raw results with BM25 scores
+5. **Results Processing**:
+   - **Custom Scoring**: Applies the scoring pipeline to calculate final relevance scores
+     - Base BM25 score processing
+     - Title matching boosts (exact and partial matches)
+     - Recency boosting based on document age
+     - Custom scoring functions (doctype-specific, priority-based, etc.)
+   - **Ranking**: Sorts results by final scores and assigns rank positions
+   - **Content Formatting**: Generates content snippets and highlights matching terms
+
+## Configuration
+
+### INDEX_SCHEMA
+
+Defines the structure of your search index:
+
+```python
+INDEX_SCHEMA = {
+    # Text fields that will be searchable (defaults to ["title", "content"])
+    "text_fields": ["title", "content"],
+
+    # Metadata fields stored alongside text content for filtering
+    "metadata_fields": ["project", "owner", "status", "priority"],
+
+    # FTS5 tokenizer configuration
+    "tokenizer": "unicode61 remove_diacritics 2 tokenchars '-_@.'"
+}
+```
+
+### INDEXABLE_DOCTYPES
+
+Specifies which doctypes to index and how to map their fields:
+
+```python
+INDEXABLE_DOCTYPES = {
+    "Task": {
+        # Field mapping
+        "fields": [
+            "name",
+            {"title": "subject"},        # Maps subject field to title
+            {"content": "description"},  # Maps description field to content
+            {"modified": "creation"},    # Use creation instead of modified for recency boost
+            "project",
+            "owner"
+        ],
+
+        # Optional filters to limit which records are indexed
+        "filters": {
+            "status": ("!=", "Cancelled"),
+            "docstatus": ("!=", 2)
+        }
+    }
+}
+```
+
+### Field Mapping Rules
+
+- **String fields**: Direct mapping `"field_name"`
+- **Aliased fields**: Dictionary mapping `{"schema_field": "doctype_field"}`
+- **Required fields**: `title` and `content` fields must be present or explicitly mapped (e.g., `{"title": "subject"}`)
+- **Auto-added fields**: `doctype` and `name` are automatically included
+- **Modified field**: Added automatically if used in any doctype configuration. Used for recency boosting - if you want to use a different timestamp field (like `creation` or `last_updated`), map it to `modified` using `{"modified": "creation"}`
+
+## Features & Customization
+
+### Permission Filtering
+
+Implement `get_search_filters()` to control access:
+
+```python
+def get_search_filters(self):
+    """Return filters based on user permissions"""
+    user = frappe.session.user
+
+    if user == "Administrator":
+        return {}  # No restrictions
+
+    # Example: User can only see their own and public documents
+    return {
+        "owner": user,
+        "status": ["Active", "Published"]
+    }
+```
+
+### Custom Scoring
+
+Create custom scoring functions to influence search relevance:
+
+```python
+class MyAppSearch(SQLiteSearch):
+    ...
+
+    @SQLiteSearch.scoring_function
+    def _get_priority_boost(self, row, query, query_words):
+        """Boost high-priority items"""
+        priority = row.get("priority", "Medium")
+
+        if priority == "High":
+            return 1.5
+        if priority == "Medium":
+            return 1.1
+        return 1.0
+```
+
+### Recency Boosting
+
+The framework automatically provides time-based recency boosting using the `modified` field:
+
+```python
+# The modified field is used for calculating document age
+# Recent documents get higher scores:
+# - Last 24 hours: 1.8x boost
+# - Last 7 days: 1.5x boost
+# - Last 30 days: 1.2x boost
+# - Last 90 days: 1.1x boost
+# - Older documents: gradually decreasing boost
+
+# If your doctype uses a different timestamp field, map it to modified:
+INDEXABLE_DOCTYPES = {
+    "GP Discussion": {
+        "fields": ["name", "title", "content", {"modified": "last_post_at"}, "project"],
+    },
+    "Article": {
+        "fields": ["name", "title", "content", {"modified": "published_date"}, "category"],
+    }
+}
+```
+
+### Document Preparation
+
+Override `prepare_document()` for custom document processing:
+
+```python
+def prepare_document(self, doc):
+    """Custom document preparation"""
+    document = super().prepare_document(doc)
+    if not document:
+        return None
+
+    # Add computed fields
+    if doc.doctype == "Task":
+        # Combine multiple fields into content
+        content_parts = [
+            doc.description or "",
+            doc.notes or "",
+            "\n".join([comment.content for comment in doc.get("comments", [])])
+        ]
+        document["content"] = "\n".join(filter(None, content_parts))
+
+        # set fields that might be stored in another table
+        document["category"] = get_category_for_task(doc)
+
+    return document
+```
+
+### Spelling Correction
+
+The framework includes built-in spelling correction using trigram similarity:
+
+```python
+# Spelling correction happens automatically
+search_result = search.search("projetc managment")  # Will find "project management"
+
+# Access correction information
+print(search_result["summary"]["corrected_words"])
+# Output: {"projetc": "project", "managment": "management"}
+```
+
+### Content Processing
+
+HTML content is automatically cleaned and processed using BeautifulSoup:
+
+```python
+# Complex HTML content like this:
+html_content = """
+<div class="article">
+    <h1>API Documentation</h1>
+    <p>Learn how to integrate with our <a href="/api">REST API</a>.</p>
+    <img src="/images/api-flow.png" alt="API workflow diagram" />
+    <ul>
+        <li><strong>Authentication:</strong> Use <code>Bearer tokens</code></li>
+        <li>Rate limiting: <em>1000 requests/hour</em></li>
+    </ul>
+    <blockquote>See our <a href="/examples">code examples</a> for details.</blockquote>
+    <table><tr><td>Method</td><td>POST</td></tr></table>
+    <script>analytics.track('page_view');</script>
+    <style>.hidden { display: none; }</style>
+</div>
+"""
+
+# Is automatically converted to clean, searchable plain text:
+"""
+API Documentation
+
+Learn how to integrate with our REST API.
+
+Authentication: Use Bearer tokens
+Rate limiting: 1000 requests/hour
+
+See our code examples for details.
+
+Method POST
+"""
+
+# The cleaning process:
+# 1. Removes all HTML tags (<div>, <h1>, <strong>, <code>, etc.)
+# 2. Strips out scripts, styles, and non-content elements
+# 3. Extracts link text while removing href URLs
+# 4. Normalizes whitespace and line breaks
+```
+
+### Title-Only Search
+
+```python
+results = search.search("project update", title_only=True)
+```
+
+### Advanced Filtering
+
+```python
+accessible_projects = ['PROJ001', 'PROJ002', ...]
+
+filters = {
+    "project": accessible_projects,     # Multiple values (IN clause)
+    "owner": current_user,              # Single value (= clause)
+}
+
+results = search.search("bug fix", filters=filters)
+```
+
+### Automatic Index Handling
+
+The framework handles index building and maintenance automatically when you register your search class:
+
+```python
+# hooks.py
+sqlite_search = ['my_app.search.MyAppSearch']
+```
+
+**What the framework does automatically:**
+
+1. **Post-Migration Index Building**: Builds the search index automatically after running `bench migrate`
+2. **Periodic Index Verification**: Checks every 15 minutes that the index exists and rebuilds if missing
+3. **Real-time Document Updates**: Automatically calls `index_doc()` and `remove_doc()` on document lifecycle events (insert, update, delete) for all doctypes defined in your `INDEXABLE_DOCTYPES`
+
+## Manual Index Handling
+
+If you prefer to have manual control over the lifecycle of indexing, then you can simply opt out of automatic index handling by not registering the search class in `sqlite_search` hook.
+
+```python
+from my_app.search import MyAppSearch
+
+def build_index_in_background():
+    """Manually trigger background index building"""
+    search = MyAppSearch()
+    if search.is_search_enabled() and not search.index_exists():
+        frappe.enqueue("my_app.search.build_index", queue="long")
+
+# hooks.py
+scheduler_events = {
+    # Custom scheduler (if you want different timing)
+    "daily": ["my_app.search.build_index_if_not_exists"],
+}
+```
+
+## API Reference
+
+#### `search(query, title_only=False, filters=None)`
+Main search method that returns formatted results.
+
+**Parameters:**
+- `query` (str): Search query text
+- `title_only` (bool): Search only in title fields
+- `filters` (dict): Additional filters to apply
+
+**Returns:**
+```python
+{
+    "results": [
+        {
+            "doctype": "Task",
+            "name": "TASK-001",
+            "title": "Fix login bug",
+            "content": "User cannot login after password reset...",
+            "score": 0.85,
+            "original_rank": 3, # original bm25 rank
+            "rank": 1, # modified rank after custom scoring pipeline
+            # ... other metadata fields
+        }
+    ],
+    "summary": {
+        "duration": 0.023,
+        "total_matches": 15,
+        "returned_matches": 15,
+        "corrected_words": {"loggin": "login"},
+        "corrected_query": "Fix login bug",
+        "title_only": False,
+        "filtered_matches": 15,
+        "applied_filters": {"status": ["Open"]}
+    }
+}
+```
+
+#### `build_index()`
+Build the complete search index from scratch.
+
+#### `index_doc(doctype, docname)`
+Index a single document.
+
+#### `remove_doc(doctype, docname)`
+Remove a single document from the index.
+
+#### `is_search_enabled()`
+Check if search is enabled (override to add disable logic).
+
+#### `index_exists()`
+Check if the search index exists.
+
+#### `get_search_filters()`
+**Must be implemented by subclasses.** Return filters for the current user.
+
+**Returns:**
+```python
+{
+    "field_name": "value",           # Single value
+    "field_name": ["val1", "val2"],  # Multiple values
+}
+```
+
+
+#### `scoring_function()`
+
+Use the `@SQLiteSearch.scoring_function` decorator to mark a function as a scoring function.
--- a/frappe/search/sqlite_search.py
+++ b/frappe/search/sqlite_search.py
--- a/frappe/tests/test_sqlite_search.py
+++ b/frappe/tests/test_sqlite_search.py
@ -0,0 +1,505 @@
+import os
+import sqlite3
+import time
+from typing import ClassVar
+from unittest.mock import patch
+
+import frappe
+from frappe.search.sqlite_search import SQLiteSearch, SQLiteSearchIndexMissingError
+from frappe.tests import IntegrationTestCase
+
+
+class TestSQLiteSearch(SQLiteSearch):
+	"""Test implementation of SQLiteSearch for testing purposes."""
+
+	INDEX_NAME = "test_search.db"
+
+	INDEX_SCHEMA: ClassVar = {
+		"text_fields": ["title", "content"],
+		"metadata_fields": ["doctype", "name", "owner", "modified"],
+		"tokenizer": "unicode61 remove_diacritics 2",
+	}
+
+	INDEXABLE_DOCTYPES: ClassVar = {
+		"Note": {
+			"fields": ["name", "title", "content", "owner", {"modified": "creation"}],
+		},
+		"ToDo": {
+			"fields": ["name", {"title": "description"}, {"content": "description"}, "owner", "modified"],
+		},
+		"User": {
+			"fields": ["name", {"title": "full_name"}, {"content": "email"}, "name", "modified"],
+			"filters": {"enabled": 1},
+		},
+	}
+
+	def get_search_filters(self):
+		"""Return permission filters - for testing, allow all documents."""
+		if frappe.session.user == "Administrator":
+			return {}
+		# Simulate user-specific filtering
+		return {"owner": frappe.session.user}
+
+
+class TestSQLiteSearchAPI(IntegrationTestCase):
+	"""Test suite for SQLiteSearch public API functionality."""
+
+	@classmethod
+	def setUpClass(cls):
+		super().setUpClass()
+		cls.search = TestSQLiteSearch()
+		# Clean up any existing test database
+		cls.search.drop_index()
+
+	@classmethod
+	def tearDownClass(cls):
+		super().tearDownClass()
+		# Clean up test database
+		cls.search.drop_index()
+
+	def setUp(self):
+		"""Set up test data for each test."""
+		super().setUp()
+		# Create test documents
+		self.test_notes = []
+		self.test_todos = []
+
+		# Create test notes with different content
+		note_data = [
+			{"title": "Python Programming Guide", "content": "Learn Python basics and advanced concepts"},
+			{"title": "Project Management Tips", "content": "How to manage software projects effectively"},
+			{"title": "Cooking Recipe Collection", "content": "Delicious recipes for home cooking"},
+			{
+				"title": "Machine Learning Tutorial",
+				"content": "Introduction to ML algorithms and Python implementation",
+			},
+		]
+
+		for data in note_data:
+			note = frappe.get_doc({"doctype": "Note", "title": data["title"], "content": data["content"]})
+			note.insert()
+			self.test_notes.append(note)
+
+		# Create test todos
+		todo_data = [
+			{"description": "Review Python code for search functionality"},
+			{"description": "Update project documentation"},
+			{"description": "Plan team meeting agenda"},
+		]
+
+		for data in todo_data:
+			todo = frappe.get_doc({"doctype": "ToDo", "description": data["description"], "status": "Open"})
+			todo.insert()
+			self.test_todos.append(todo)
+
+	def tearDown(self):
+		"""Clean up test data after each test."""
+		# Delete test documents
+		for note in self.test_notes:
+			try:
+				note.delete()
+			except Exception:
+				pass
+
+		for todo in self.test_todos:
+			try:
+				todo.delete()
+			except Exception:
+				pass
+
+		super().tearDown()
+
+	def test_index_lifecycle_and_status_methods(self):
+		"""Test index building, existence checking, and status validation."""
+		# Initially index should not exist
+		self.search.drop_index()  # Ensure clean state
+		self.assertFalse(self.search.index_exists())
+
+		# Should raise error when trying to search without index
+		with self.assertRaises(SQLiteSearchIndexMissingError):
+			self.search.raise_if_not_indexed()
+
+		# Build index
+		self.search.build_index()
+
+		# Now index should exist
+		self.assertTrue(self.search.index_exists())
+
+		# Should not raise error now
+		try:
+			self.search.raise_if_not_indexed()
+		except SQLiteSearchIndexMissingError:
+			self.fail("raise_if_not_indexed() raised exception when index exists")
+
+		# Verify database file exists and has correct tables
+		self.assertTrue(os.path.exists(self.search.db_path))
+
+		conn = sqlite3.connect(self.search.db_path)
+		cursor = conn.cursor()
+
+		# Check if FTS table exists
+		cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='search_fts'")
+		self.assertTrue(cursor.fetchone())
+
+		# Check if vocabulary tables exist
+		cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='search_vocabulary'")
+		self.assertTrue(cursor.fetchone())
+
+		cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='search_trigrams'")
+		self.assertTrue(cursor.fetchone())
+
+		conn.close()
+
+		# Test drop_index method
+		self.search.drop_index()
+		self.assertFalse(self.search.index_exists())
+		self.assertFalse(os.path.exists(self.search.db_path))
+
+		# Dropping non-existent index should not raise error
+		self.search.drop_index()  # Should not raise error
+
+	def test_basic_search_functionality(self):
+		"""Test core search functionality with various query types."""
+		# Build index first
+		self.search.build_index()
+
+		# Test basic text search
+		results = self.search.search("Python")
+		self.assertGreater(len(results["results"]), 0)
+		self.assertIn("Python", results["results"][0]["title"] + results["results"][0]["content"])
+
+		# Verify result structure
+		result = results["results"][0]
+		required_fields = [
+			"id",
+			"title",
+			"content",
+			"doctype",
+			"name",
+			"score",
+			"original_rank",
+			"modified_rank",
+		]
+		for field in required_fields:
+			self.assertIn(field, result)
+
+		# Test case-insensitive search
+		results_lower = self.search.search("python")
+		results_upper = self.search.search("PYTHON")
+		self.assertEqual(len(results_lower["results"]), len(results_upper["results"]))
+
+		# Test partial word matching
+		results = self.search.search("prog")  # Should match "Programming"
+		self.assertGreater(len(results["results"]), 0)
+
+		# Test multi-word search
+		results = self.search.search("Python programming")
+		self.assertGreater(len(results["results"]), 0)
+
+		# Test empty query
+		results = self.search.search("")
+		self.assertEqual(len(results["results"]), 0)
+
+		# Test title-only search
+		results = self.search.search("Python", title_only=True)
+		self.assertGreater(len(results["results"]), 0)
+		for result in results["results"]:
+			self.assertIn("Python", result["title"])
+
+	def test_search_filtering_and_permissions(self):
+		"""Test search filtering and permission-based result filtering."""
+		self.search.build_index()
+
+		# Test basic filtering by doctype
+		results = self.search.search("", filters={"doctype": "Note"})
+		for result in results["results"]:
+			self.assertEqual(result["doctype"], "Note")
+
+		# Test filtering with list values
+		results = self.search.search("", filters={"doctype": ["Note", "ToDo"]})
+		for result in results["results"]:
+			self.assertIn(result["doctype"], ["Note", "ToDo"])
+
+		# Test empty filter list (should return no results)
+		results = self.search.search("", filters={"doctype": []})
+		self.assertEqual(len(results["results"]), 0)
+
+		# Test permission filtering by switching users
+		original_user = frappe.session.user
+		try:
+			# Create a test user and switch to them
+			test_user_email = "test_search_user@example.com"
+			if not frappe.db.exists("User", test_user_email):
+				test_user = frappe.get_doc(
+					{
+						"doctype": "User",
+						"email": test_user_email,
+						"first_name": "Test",
+						"last_name": "User",
+						"enabled": 1,
+					}
+				)
+				test_user.insert()
+
+			frappe.set_user(test_user_email)
+
+			# Search should now filter by owner (based on our test implementation)
+			results = self.search.search("Python")
+			# Results should be limited based on permission filters
+			self.assertIsInstance(results["results"], list)
+
+		finally:
+			frappe.set_user(original_user)
+
+	def test_advanced_scoring_and_ranking(self):
+		"""Test scoring pipeline, ranking, and result ordering."""
+		self.search.build_index()
+
+		# Search for a term that appears in multiple documents
+		results = self.search.search("Python")
+
+		# Verify results are sorted by score (descending)
+		scores = [result["score"] for result in results["results"]]
+		self.assertEqual(scores, sorted(scores, reverse=True))
+
+		# Verify both original and modified rankings are present
+		for i, result in enumerate(results["results"]):
+			self.assertEqual(result["modified_rank"], i + 1)
+			self.assertIsInstance(result["original_rank"], int)
+			self.assertGreater(result["original_rank"], 0)
+
+		# Test title boost - documents with search term in title should rank higher
+		results = self.search.search("Programming")
+		title_match_found = False
+		for result in results["results"]:
+			if "Programming" in result["title"]:
+				title_match_found = True
+				# Title matches should have higher scores
+				self.assertGreater(result["score"], 1.0)
+				break
+		self.assertTrue(title_match_found, "No title matches found for scoring test")
+
+		# Test that BM25 score is included
+		for result in results["results"]:
+			self.assertIn("bm25_score", result)
+			self.assertIsInstance(result["bm25_score"], (int, float))
+
+	def test_spelling_correction_and_query_expansion(self):
+		"""Test spelling correction and query expansion functionality."""
+		self.search.build_index()
+
+		# Test with a misspelled word that should be corrected
+		results = self.search.search("Pythom")  # Misspelled "Python"
+
+		# Check if corrections were applied
+		summary = results["summary"]
+		if summary.get("corrected_words"):
+			self.assertIsInstance(summary["corrected_words"], dict)
+			self.assertIsInstance(summary["corrected_query"], str)
+
+		# Even with misspelling, we should get some results due to correction
+		# (This might not always work depending on vocabulary, so we test gracefully)
+		self.assertIsInstance(results["results"], list)
+
+		# Test with a completely made-up word
+		results = self.search.search("xyzabc123nonexistent")
+		# Should return empty results or minimal results
+		self.assertLessEqual(len(results["results"]), 1)
+
+	def test_document_indexing_operations(self):
+		"""Test individual document indexing and removal operations."""
+		self.search.build_index()
+
+		# Create a new document after index is built
+		new_note = frappe.get_doc(
+			{
+				"doctype": "Note",
+				"title": "Newly Added Document",
+				"content": "This document was added after initial indexing",
+			}
+		)
+		new_note.insert()
+
+		try:
+			# Initially, the new document shouldn't be in search results
+			results = self.search.search("Newly Added Document")
+			initial_count = len(results["results"])
+
+			# Index the new document
+			self.search.index_doc("Note", new_note.name)
+
+			# Now it should be findable
+			results = self.search.search("Newly Added Document")
+			self.assertGreater(len(results["results"]), initial_count)
+
+			# Verify the document is in results
+			found = False
+			for result in results["results"]:
+				if result["name"] == new_note.name:
+					found = True
+					break
+			self.assertTrue(found, "Newly indexed document not found in search results")
+
+			# Remove the document from index
+			self.search.remove_doc("Note", new_note.name)
+
+			# Should not be findable anymore
+			results = self.search.search("Newly Added Document")
+			found = False
+			for result in results["results"]:
+				if result["name"] == new_note.name:
+					found = True
+					break
+			self.assertFalse(found, "Removed document still found in search results")
+
+		finally:
+			new_note.delete()
+
+	def test_search_result_summary_and_metadata(self):
+		"""Test search result summary and metadata information."""
+		self.search.build_index()
+
+		results = self.search.search("Python")
+		summary = results["summary"]
+
+		# Verify summary structure
+		required_summary_fields = [
+			"total_matches",
+			"filtered_matches",
+			"returned_matches",
+			"duration",
+			"title_only",
+			"applied_filters",
+		]
+		for field in required_summary_fields:
+			self.assertIn(field, summary)
+
+		# Verify summary values make sense
+		self.assertIsInstance(summary["duration"], (int, float))
+		self.assertGreater(summary["duration"], 0)
+		self.assertEqual(summary["total_matches"], summary["filtered_matches"])
+		self.assertEqual(summary["filtered_matches"], len(results["results"]))
+		self.assertFalse(summary["title_only"])
+		self.assertEqual(summary["applied_filters"], {})
+
+		# Test with filters applied
+		results = self.search.search("Python", filters={"doctype": "Note"})
+		summary = results["summary"]
+		self.assertEqual(summary["applied_filters"], {"doctype": "Note"})
+
+		# Test title-only search
+		results = self.search.search("Python", title_only=True)
+		summary = results["summary"]
+		self.assertTrue(summary["title_only"])
+
+	def test_configuration_and_schema_validation(self):
+		"""Test configuration validation and schema handling."""
+
+		# Test invalid configuration
+		class InvalidSearchClass(SQLiteSearch):
+			# Missing required INDEX_SCHEMA
+			INDEXABLE_DOCTYPES: ClassVar = {"Note": {"fields": ["name", "title"]}}
+
+			def get_search_filters(self):
+				return {}
+
+		with self.assertRaises(ValueError):
+			InvalidSearchClass()
+
+		# Test invalid doctype configuration
+		class InvalidDoctypeConfig(SQLiteSearch):
+			INDEX_SCHEMA: ClassVar = {"text_fields": ["title", "content"]}
+			INDEXABLE_DOCTYPES: ClassVar = {
+				"Note": {
+					# Missing 'fields' key
+					"title_field": "title"
+				}
+			}
+
+			def get_search_filters(self):
+				return {}
+
+		with self.assertRaises(ValueError):
+			InvalidDoctypeConfig()
+
+	def test_content_processing_and_html_handling(self):
+		"""Test content processing including HTML tag removal and text normalization."""
+		self.search.build_index()
+
+		# Create a note with HTML content
+		html_note = frappe.get_doc(
+			{
+				"doctype": "Note",
+				"title": "HTML Content Test",
+				"content": "<p>This is <strong>bold</strong> text with <a href='http://example.com'>links</a> and <br> line breaks.</p>",
+			}
+		)
+		html_note.insert()
+
+		try:
+			# Index the document
+			self.search.index_doc("Note", html_note.name)
+
+			# Search should find processed content
+			results = self.search.search("bold text links")
+
+			# Should find the document
+			found = False
+			for result in results["results"]:
+				if result["name"] == html_note.name:
+					found = True
+					# Content should be processed (HTML tags removed)
+					self.assertNotIn("<p>", result["content"])
+					self.assertNotIn("<strong>", result["content"])
+					self.assertIn("bold", result["content"])
+					self.assertNotIn(
+						"<a href='http://example.com'>", result["content"]
+					)  # Links should be replaced
+					break
+
+			self.assertTrue(found, "HTML content document not found in search")
+
+		finally:
+			html_note.delete()
+
+	def test_search_disabled_state(self):
+		"""Test behavior when search is disabled."""
+
+		# Create a search class with search disabled
+		class DisabledSearch(TestSQLiteSearch):
+			def is_search_enabled(self):
+				return False
+
+		disabled_search = DisabledSearch()
+		disabled_search.drop_index()  # Ensure clean state
+
+		# Should return empty results when disabled
+		results = disabled_search.search("Python")
+		self.assertEqual(len(results["results"]), 0)
+
+		# Build index should do nothing when disabled
+		disabled_search.build_index()  # Should not raise error but do nothing
+		self.assertFalse(disabled_search.index_exists())
+
+	@patch("frappe.enqueue")
+	def test_background_operations(self, mock_enqueue):
+		"""Test background job integration and module-level functions."""
+		from frappe.search.sqlite_search import (
+			build_index_in_background,
+			get_search_classes,
+		)
+
+		# Test getting search classes
+		with patch("frappe.get_hooks") as mock_get_hooks:
+			mock_get_hooks.return_value = ["frappe.tests.test_sqlite_search.TestSQLiteSearch"]
+			classes = get_search_classes()
+			self.assertEqual(len(classes), 1)
+			self.assertEqual(classes[0], TestSQLiteSearch)
+
+		# Test background index building
+		with patch("frappe.get_hooks") as mock_get_hooks:
+			mock_get_hooks.return_value = ["frappe.tests.test_sqlite_search.TestSQLiteSearch"]
+			build_index_in_background()
+
+			# Should have enqueued a background job
+			self.assertTrue(mock_enqueue.called)