Skip to content

Commit 9af9d8f

Browse files
feat: Improve meilisearch configuration step (#38384)
* feat: Adds a way to find the difference in Meiliseach state and come up with a migration plan and configuration plan depending on the state. This introduces a mechanism it or a drift engine which drill down the Meiliseach configuration and figures out what has changed: - settings - primary key depending on the change we follow a strategy wether to migrate the data or recreate the index * feat: Add the command to schedule the celery task for populating the index.
1 parent fd89102 commit 9af9d8f

9 files changed

Lines changed: 1102 additions & 104 deletions

File tree

openedx/core/djangoapps/content/search/api.py

Lines changed: 218 additions & 82 deletions
Large diffs are not rendered by default.

openedx/core/djangoapps/content/search/apps.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,13 @@ class ContentSearchConfig(AppConfig):
1010

1111
default_auto_field = "django.db.models.BigAutoField"
1212
name = "openedx.core.djangoapps.content.search"
13+
label = "search"
1314

1415
def ready(self):
1516
# Connect signal handlers
17+
# Connect post_migrate for Meilisearch index reconciliation.
18+
# No sender= argument here; the handler filters by sender.label internally.
19+
from django.db.models.signals import post_migrate # pylint: disable=import-outside-toplevel
20+
1621
from . import handlers # pylint: disable=unused-import # noqa: F401
22+
post_migrate.connect(handlers.handle_post_migrate)

openedx/core/djangoapps/content/search/handlers.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@
4747
from xmodule.modulestore.django import SignalHandler
4848

4949
from .api import (
50+
is_meilisearch_enabled,
5051
only_if_meilisearch_enabled,
52+
reconcile_index,
5153
upsert_content_object_tags_index_doc,
5254
upsert_item_collections_index_docs,
5355
upsert_item_containers_index_docs,
@@ -68,6 +70,37 @@
6870
log = logging.getLogger(__name__)
6971

7072

73+
def handle_post_migrate(sender, **kwargs):
74+
"""
75+
Reconcile Meilisearch index state after Django migrations run.
76+
77+
Filters on sender.label to only execute for the search app's post_migrate signal.
78+
Tolerant of Meilisearch unavailability — logs a warning and continues.
79+
"""
80+
from .apps import ContentSearchConfig # pylint: disable=import-outside-toplevel
81+
82+
if sender.label != ContentSearchConfig.label:
83+
return
84+
85+
if not is_meilisearch_enabled():
86+
return
87+
88+
try:
89+
reconcile_index(status_cb=log.info, warn_cb=log.warning)
90+
except ConnectionError as exc:
91+
log.warning(
92+
"Meilisearch reconciliation skipped during post_migrate: %s. "
93+
"Will retry on next migrate run.",
94+
exc,
95+
)
96+
except Exception as exc: # pylint: disable=broad-except
97+
log.warning(
98+
"Meilisearch reconciliation failed during post_migrate: %s. "
99+
"Will retry on next migrate run.",
100+
exc,
101+
)
102+
103+
71104
# Using post_delete here because there is no COURSE_DELETED event defined.
72105
@receiver(post_delete, sender=CourseOverview)
73106
def delete_course_search_access(sender, instance, **kwargs): # pylint: disable=unused-argument

openedx/core/djangoapps/content/search/index_config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
"""Configuration for the search index."""
22
from .documents import Fields
33

4+
# The Meilisearch primary key for all documents in the index.
5+
INDEX_PRIMARY_KEY = Fields.id
6+
47
INDEX_DISTINCT_ATTRIBUTE = "usage_key"
58

69
# Mark which attributes can be used for filtering/faceted search:
Lines changed: 76 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,100 @@
11
"""
2-
Command to build or re-build the search index for courses (in Studio, i.e. Draft
3-
mode), in Meilisearch.
2+
Command to queue incremental population of the Studio Meilisearch search index.
3+
4+
Index creation, configuration, and schema reconciliation are handled
5+
automatically via the post_migrate signal. This command is solely
6+
responsible for enqueuing the population task in Celery.
47
58
See also cms/djangoapps/contentstore/management/commands/reindex_course.py which
69
indexes LMS (published) courses in ElasticSearch.
710
"""
11+
12+
import logging
13+
14+
from django.conf import settings
815
from django.core.management import BaseCommand, CommandError
916

1017
from ... import api
18+
from ...tasks import rebuild_index_incremental
19+
20+
log = logging.getLogger(__name__)
1121

1222

1323
class Command(BaseCommand):
1424
"""
15-
Build or re-build the Meilisearch search index for courses and libraries in Studio.
25+
Add all course and library content to the Studio search index.
26+
27+
This enqueues a Celery task that incrementally indexes all courses and
28+
libraries. Progress is tracked via IncrementalIndexCompleted, so the task
29+
can safely resume if interrupted.
30+
31+
Index creation and configuration are handled by post_migrate reconciliation
32+
(runs automatically on ./manage.py cms migrate).
1633
17-
This is separate from LMS search features like courseware search or forum search.
34+
If it's ever necessary to reset the incremental indexing state (force
35+
the full re-index process to start from the beginning), use:
36+
37+
./manage.py cms shell -c 'IncrementalIndexCompleted.objects.all().delete()'
38+
39+
This will delete all the IncrementalIndexCompleted records and will help in restarting the index population.
1840
"""
1941

20-
# TODO: improve this - see https://github.com/openedx/edx-platform/issues/36868
42+
help = "Add all course and library content to the Studio search index."
2143

2244
def add_arguments(self, parser):
23-
parser.add_argument("--experimental", action="store_true") # kept for compatibility but ignored.
24-
parser.add_argument("--reset", action="store_true")
25-
parser.add_argument("--init", action="store_true")
26-
parser.add_argument("--incremental", action="store_true")
27-
parser.set_defaults(experimental=False, reset=False, init=False, incremental=False)
45+
# Removed flags — provide clear error messages for operators with old automation.
46+
parser.add_argument(
47+
"--reset",
48+
action="store_true",
49+
default=False,
50+
help="(Removed) Index reset is now handled by post_migrate reconciliation.",
51+
)
52+
parser.add_argument(
53+
"--init",
54+
action="store_true",
55+
default=False,
56+
help="(Removed) Index initialization is now handled by post_migrate reconciliation.",
57+
)
58+
parser.add_argument(
59+
"--incremental",
60+
action="store_true",
61+
default=False,
62+
help="(Removed) Incremental is now the default and only population mode.",
63+
)
2864

2965
def handle(self, *args, **options):
30-
"""
31-
Build a new search index for Studio, containing content from courses and libraries
32-
"""
3366
if not api.is_meilisearch_enabled():
3467
raise CommandError("Meilisearch is not enabled. Please set MEILISEARCH_ENABLED to True in your settings.")
3568

3669
if options["reset"]:
37-
api.reset_index(self.stdout.write)
38-
elif options["init"]:
39-
api.init_index(self.stdout.write, self.stderr.write)
40-
elif options["incremental"]:
41-
api.rebuild_index(self.stdout.write, incremental=True)
70+
raise CommandError(
71+
"The --reset flag has been removed. "
72+
"Index reset is now handled automatically by post_migrate reconciliation. "
73+
"Run: ./manage.py cms migrate"
74+
)
75+
76+
if options["init"]:
77+
raise CommandError(
78+
"The --init flag has been removed. "
79+
"Index initialization is now handled automatically by post_migrate reconciliation. "
80+
"Run: ./manage.py cms migrate"
81+
)
82+
83+
if options["incremental"]:
84+
log.warning(
85+
"The --incremental flag has been removed. "
86+
"Incremental population is now the default behavior of this command."
87+
)
88+
89+
result = rebuild_index_incremental.delay()
90+
91+
if settings.CELERY_ALWAYS_EAGER:
92+
self.stdout.write("Indexing complete!")
4293
else:
43-
api.rebuild_index(self.stdout.write)
94+
self.stdout.write(
95+
f"Studio search index population has been queued (task_id={result.id}). "
96+
"Population will run incrementally in a Celery worker. "
97+
"Monitor progress in Celery worker logs. "
98+
"In order to reset the incremental indexing state, please run: "
99+
"./manage.py cms shell -c 'IncrementalIndexCompleted.objects.all().delete()'"
100+
)

openedx/core/djangoapps/content/search/tasks.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,36 @@ def delete_course_index_docs(course_key_str: str) -> None:
184184

185185
# Delete children index data for course blocks.
186186
api.delete_docs_with_context_key(course_key)
187+
188+
189+
@shared_task(
190+
base=LoggedTask,
191+
autoretry_for=(MeilisearchError, ConnectionError),
192+
max_retries=3,
193+
retry_backoff=True,
194+
)
195+
@set_code_owner_attribute
196+
def rebuild_index_incremental() -> None:
197+
"""
198+
Celery task to incrementally populate the Studio Meilisearch index.
199+
200+
Uses IncrementalIndexCompleted to track progress and resume from where
201+
it left off if interrupted. Safe to call multiple times — already-indexed
202+
contexts are skipped.
203+
204+
If a rebuild is already in progress (lock held), the task exits gracefully.
205+
"""
206+
log.info("Starting incremental Studio search index population...")
207+
208+
try:
209+
api.rebuild_index(status_cb=log.info, incremental=True)
210+
except RuntimeError as exc:
211+
# rebuild_index -> _using_temp_index or lock contention
212+
if "already in progress" in str(exc).lower():
213+
log.warning(
214+
"Studio index population skipped: a rebuild is already in progress. Will retry later if re-enqueued."
215+
)
216+
return
217+
raise
218+
219+
log.info("Incremental Studio search index population complete.")

openedx/core/djangoapps/content/search/tests/test_api.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -449,19 +449,32 @@ def test_reset_meilisearch_index(self, mock_meilisearch) -> None:
449449

450450
@override_settings(MEILISEARCH_ENABLED=True)
451451
def test_init_meilisearch_index(self, mock_meilisearch) -> None:
452-
# Test index already exists
452+
# Test index already exists, is populated, and correctly configured
453+
mock_index = Mock()
454+
mock_index.primary_key = "id"
455+
mock_index.get_stats.return_value = Mock(number_of_documents=100)
456+
mock_index.get_settings.return_value = {
457+
"distinctAttribute": "usage_key",
458+
"filterableAttributes": list(api.INDEX_FILTERABLE_ATTRIBUTES),
459+
"searchableAttributes": list(api.INDEX_SEARCHABLE_ATTRIBUTES),
460+
"sortableAttributes": list(api.INDEX_SORTABLE_ATTRIBUTES),
461+
"rankingRules": list(api.INDEX_RANKING_RULES),
462+
}
463+
mock_meilisearch.return_value.get_index.return_value = mock_index
464+
453465
api.init_index()
454466
mock_meilisearch.return_value.swap_indexes.assert_not_called()
455467
mock_meilisearch.return_value.create_index.assert_not_called()
456468
mock_meilisearch.return_value.delete_index.assert_not_called()
457469

458-
# Test index already exists and has no documents
459-
mock_meilisearch.return_value.get_stats.return_value = 0
470+
# Test index already exists and is empty but correctly configured
471+
mock_index.get_stats.return_value = Mock(number_of_documents=0)
460472
api.init_index()
461473
mock_meilisearch.return_value.swap_indexes.assert_not_called()
462474
mock_meilisearch.return_value.create_index.assert_not_called()
463475
mock_meilisearch.return_value.delete_index.assert_not_called()
464476

477+
# Test index does not exist — should create it
465478
mock_meilisearch.return_value.get_index.side_effect = [
466479
MeilisearchApiError("Testing reindex", Mock(text='{"code":"index_not_found"}')),
467480
MeilisearchApiError("Testing reindex", Mock(text='{"code":"index_not_found"}')),

0 commit comments

Comments
 (0)