From 39d348e8d21f3b9930ff81375ee25a209d8b5c4e Mon Sep 17 00:00:00 2001 From: xugangqiang Date: Wed, 22 Apr 2026 18:44:18 +0800 Subject: [PATCH 1/9] migrate document change status --- api/apps/document_app.py | 70 ----------- api/apps/restful_apis/document_api.py | 115 ++++++++++++++++++ test/testcases/test_web_api/test_common.py | 12 +- .../test_document_metadata.py | 11 +- web/src/hooks/use-document-request.ts | 7 +- .../pages/dataset/dataset/dataset-table.tsx | 3 + .../dataset/use-bulk-operate-dataset.tsx | 10 +- .../dataset/use-dataset-table-columns.tsx | 4 +- web/src/services/knowledge-service.ts | 17 ++- web/src/utils/api.ts | 3 +- 10 files changed, 165 insertions(+), 87 deletions(-) diff --git a/api/apps/document_app.py b/api/apps/document_app.py index f509ccdb243..11815ea1fc5 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -249,76 +249,6 @@ def thumbnails(): return server_error_response(e) -@manager.route("/change_status", methods=["POST"]) # noqa: F821 -@login_required -@validate_request("doc_ids", "status") -async def change_status(): - req = await get_request_json() - doc_ids = req.get("doc_ids", []) - status = str(req.get("status", "")) - - if status not in ["0", "1"]: - return get_json_result(data=False, message='"Status" must be either 0 or 1!', code=RetCode.ARGUMENT_ERROR) - - result = {} - has_error = False - for doc_id in doc_ids: - if not DocumentService.accessible(doc_id, current_user.id): - result[doc_id] = {"error": "No authorization."} - has_error = True - continue - - try: - e, doc = DocumentService.get_by_id(doc_id) - if not e: - result[doc_id] = {"error": "No authorization."} - has_error = True - continue - e, kb = KnowledgebaseService.get_by_id(doc.kb_id) - if not e: - result[doc_id] = {"error": "Can't find this dataset!"} - has_error = True - continue - current_status = str(doc.status) - if current_status == status: - result[doc_id] = {"status": status} - continue - if not DocumentService.update_by_id(doc_id, {"status": str(status)}): - result[doc_id] = {"error": "Database error (Document update)!"} - has_error = True - continue - - status_int = int(status) - if getattr(doc, "chunk_num", 0) > 0: - try: - ok = settings.docStoreConn.update( - {"doc_id": doc_id}, - {"available_int": status_int}, - search.index_name(kb.tenant_id), - doc.kb_id, - ) - except Exception as exc: - msg = str(exc) - if "3022" in msg: - result[doc_id] = {"error": "Document store table missing."} - else: - result[doc_id] = {"error": f"Document store update failed: {msg}"} - has_error = True - continue - if not ok: - result[doc_id] = {"error": "Database error (docStore update)!"} - has_error = True - continue - result[doc_id] = {"status": status} - except Exception as e: - result[doc_id] = {"error": f"Internal server error: {str(e)}"} - has_error = True - - if has_error: - return get_json_result(data=result, message="Partial failure", code=RetCode.SERVER_ERROR) - return get_json_result(data=result) - - @manager.route("/run", methods=["POST"]) # noqa: F821 @login_required @validate_request("doc_ids", "run") diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index 9e422d0fdf2..051416b4bb1 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -758,6 +758,8 @@ async def delete_documents(tenant_id, dataset_id): except Exception as e: logging.exception(e) return get_error_data_result(message="Internal server error") + + def _aggregate_filters(docs): """Aggregate filter options from a list of documents. @@ -815,3 +817,116 @@ def _aggregate_filters(docs): "run_status": run_status_counter, "metadata": metadata_counter, } + + +@manager.route("/datasets//documents/batch-update-status", methods=["POST"]) # noqa: F821 +@login_required +@add_tenant_id_to_kwargs +async def batch_update_document_status(tenant_id, dataset_id): + """ + Batch update status of documents within a dataset. + --- + tags: + - Documents + security: + - ApiKeyAuth: [] + parameters: + - in: path + name: dataset_id + type: string + required: true + description: ID of the dataset. + - in: header + name: Authorization + type: string + required: true + description: Bearer token for authentication. + - in: body + name: body + description: Document status update parameters. + required: true + schema: + type: object + required: + - doc_ids + - status + properties: + doc_ids: + type: array + items: + type: string + description: List of document IDs to update. + status: + type: string + enum: ["0", "1"] + description: New status (0 = disabled, 1 = enabled). + responses: + 200: + description: Document statuses updated successfully. + """ + from common import settings + from rag.nlp import search + + req = await get_request_json() + doc_ids = req.get("doc_ids", []) + status = str(req.get("status", -1)) + + if status not in ["0", "1"]: + return get_error_argument_result(message=f'"Status" must be either 0 or 1:{status}!') + + # Verify dataset ownership + if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id): + return get_error_data_result(message="You don't own the dataset.") + + e, kb = KnowledgebaseService.get_by_id(dataset_id) + if not e: + return get_error_data_result(message="Can't find this dataset!") + + result = {} + has_error = False + for doc_id in doc_ids: + try: + e, doc = DocumentService.get_by_id(doc_id) + if not e: + result[doc_id] = {"error": "Document not found"} + has_error = True + continue + + current_status = str(doc.status) + if current_status == status: + result[doc_id] = {"status": status} + continue + if not DocumentService.update_by_id(doc_id, {"status": str(status)}): + result[doc_id] = {"error": "Database error (Document update)!"} + has_error = True + continue + + status_int = int(status) + if getattr(doc, "chunk_num", 0) > 0: + try: + ok = settings.docStoreConn.update( + {"doc_id": doc_id}, + {"available_int": status_int}, + search.index_name(kb.tenant_id), + doc.kb_id, + ) + except Exception as exc: + msg = str(exc) + if "3022" in msg: + result[doc_id] = {"error": "Document store table missing."} + else: + result[doc_id] = {"error": f"Document store update failed: {msg}"} + has_error = True + continue + if not ok: + result[doc_id] = {"error": "Database error (docStore update)!"} + has_error = True + continue + result[doc_id] = {"status": status} + except Exception as e: + result[doc_id] = {"error": f"Internal server error: {str(e)}"} + has_error = True + + if has_error: + return get_json_result(data=result, message="Partial failure", code=RetCode.SERVER_ERROR) + return get_json_result(data=result) diff --git a/test/testcases/test_web_api/test_common.py b/test/testcases/test_web_api/test_common.py index 877de3a3767..031d9ce0c6b 100644 --- a/test/testcases/test_web_api/test_common.py +++ b/test/testcases/test_web_api/test_common.py @@ -419,8 +419,16 @@ def document_update_metadata_setting(auth, payload=None, *, headers=HEADERS, dat return res.json() -def document_change_status(auth, payload=None, *, headers=HEADERS, data=None): - res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/change_status", headers=headers, auth=auth, json=payload, data=data) +def document_change_status(auth, dataset_id, payload=None, *, headers=HEADERS, data=None): + """ + Batch update document status within a dataset. + + Args: + auth: Authentication credentials + dataset_id: ID of the dataset + payload: Request body containing doc_ids and status + """ + res = requests.post(url=f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/documents/batch-update-status", headers=headers, auth=auth, json=payload, data=data) return res.json() diff --git a/test/testcases/test_web_api/test_document_app/test_document_metadata.py b/test/testcases/test_web_api/test_document_app/test_document_metadata.py index 8dacada2d1f..f8ccc57206e 100644 --- a/test/testcases/test_web_api/test_document_app/test_document_metadata.py +++ b/test/testcases/test_web_api/test_document_app/test_document_metadata.py @@ -75,8 +75,9 @@ def test_update_metadata_setting_auth_invalid(self, invalid_auth, expected_code, @pytest.mark.p2 @pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES) - def test_change_status_auth_invalid(self, invalid_auth, expected_code, expected_fragment): - res = document_change_status(invalid_auth, {"doc_ids": ["doc_id"], "status": "1"}) + def test_change_status_auth_invalid(self, invalid_auth, expected_code, expected_fragment, add_dataset_func): + dataset_id = add_dataset_func + res = document_change_status(invalid_auth, dataset_id, {"doc_ids": ["doc_id"], "status": "1"}) assert res["code"] == expected_code, res assert expected_fragment in res["message"], res @@ -140,7 +141,7 @@ def test_infos(self, WebApiAuth, add_document_func): @pytest.mark.p2 def test_change_status(self, WebApiAuth, add_document_func): dataset_id, doc_id = add_document_func - res = document_change_status(WebApiAuth, {"doc_ids": [doc_id], "status": "1"}) + res = document_change_status(WebApiAuth, dataset_id, {"doc_ids": [doc_id], "status": "1"}) assert res["code"] == 0, res assert res["data"][doc_id]["status"] == "1", res @@ -190,8 +191,8 @@ def test_update_metadata_setting_missing_metadata(self, WebApiAuth, add_document @pytest.mark.p3 def test_change_status_invalid_status(self, WebApiAuth, add_document_func): - _, doc_id = add_document_func - res = document_change_status(WebApiAuth, {"doc_ids": [doc_id], "status": "2"}) + dataset_id, doc_id = add_document_func + res = document_change_status(WebApiAuth, dataset_id, {"doc_ids": [doc_id], "status": "2"}) assert res["code"] == 101, res assert "Status" in res["message"], res diff --git a/web/src/hooks/use-document-request.ts b/web/src/hooks/use-document-request.ts index 2bc45d9dbe2..6d40d551eb8 100644 --- a/web/src/hooks/use-document-request.ts +++ b/web/src/hooks/use-document-request.ts @@ -16,6 +16,7 @@ import { import i18n from '@/locales/config'; import { EMPTY_METADATA_FIELD } from '@/pages/dataset/dataset/use-select-filters'; import kbService, { + changeDocumentsStatus, deleteDocument, documentFilter, listDocument, @@ -250,15 +251,19 @@ export const useSetDocumentStatus = () => { mutationFn: async ({ status, documentId, + datasetId, }: { status: boolean; documentId: string | string[]; + datasetId: string; }) => { const ids = Array.isArray(documentId) ? documentId : [documentId]; - const { data } = await kbService.documentChangeStatus({ + const { data } = await changeDocumentsStatus({ + kb_id: datasetId, doc_ids: ids, status: Number(status), }); + if (data.code === 0) { message.success(i18n.t('message.modified')); queryClient.invalidateQueries({ diff --git a/web/src/pages/dataset/dataset/dataset-table.tsx b/web/src/pages/dataset/dataset/dataset-table.tsx index a9850d0cd19..4e9b61ca086 100644 --- a/web/src/pages/dataset/dataset/dataset-table.tsx +++ b/web/src/pages/dataset/dataset/dataset-table.tsx @@ -28,6 +28,7 @@ import { } from '@/components/ui/table'; import { UseRowSelectionType } from '@/hooks/logic-hooks/use-row-selection'; import { useFetchDocumentList } from '@/hooks/use-document-request'; +import { useKnowledgeBaseContext } from '@/pages/dataset/contexts/knowledge-base-context'; import { getExtension } from '@/utils/document-util'; import { t } from 'i18next'; import { pick } from 'lodash'; @@ -88,12 +89,14 @@ export function DatasetTable({ // metaRecord, // } = useSaveMeta(); const { showLog, logInfo, logVisible, hideLog } = useShowLog(documents); + const { knowledgeBase } = useKnowledgeBaseContext(); const columns = useDatasetTableColumns({ showChangeParserModal, showRenameModal, showManageMetadataModal, showLog, + datasetId: knowledgeBase?.id, }); const currentPagination = useMemo(() => { diff --git a/web/src/pages/dataset/dataset/use-bulk-operate-dataset.tsx b/web/src/pages/dataset/dataset/use-bulk-operate-dataset.tsx index af1b56ce984..4d5c139d232 100644 --- a/web/src/pages/dataset/dataset/use-bulk-operate-dataset.tsx +++ b/web/src/pages/dataset/dataset/use-bulk-operate-dataset.tsx @@ -9,6 +9,7 @@ import { useSetDocumentStatus, } from '@/hooks/use-document-request'; import { IDocumentInfo } from '@/interfaces/database/document'; +import { useKnowledgeBaseContext } from '@/pages/dataset/contexts/knowledge-base-context'; import { LucideCircleX, LucideCylinder, @@ -34,6 +35,7 @@ export function useBulkOperateDataset({ rowSelection, documents, ); + const { knowledgeBase } = useKnowledgeBaseContext(); const { runDocumentByIds } = useRunDocument(); const { setDocumentStatus } = useSetDocumentStatus(); @@ -85,9 +87,13 @@ export function useBulkOperateDataset({ const onChangeStatus = useCallback( (enabled: boolean) => { - setDocumentStatus({ status: enabled, documentId: selectedRowKeys }); + setDocumentStatus({ + status: enabled, + documentId: selectedRowKeys, + datasetId: knowledgeBase?.id, + }); }, - [selectedRowKeys, setDocumentStatus], + [selectedRowKeys, setDocumentStatus, knowledgeBase], ); const handleEnableClick = useCallback(() => { diff --git a/web/src/pages/dataset/dataset/use-dataset-table-columns.tsx b/web/src/pages/dataset/dataset/use-dataset-table-columns.tsx index 70333eefc17..467a447803b 100644 --- a/web/src/pages/dataset/dataset/use-dataset-table-columns.tsx +++ b/web/src/pages/dataset/dataset/use-dataset-table-columns.tsx @@ -26,6 +26,7 @@ type UseDatasetTableColumnsType = UseChangeDocumentParserShowType & UseRenameDocumentShowType & { showLog: (record: IDocumentInfo) => void; showManageMetadataModal: (config: ShowManageMetadataModalProps) => void; + datasetId?: string; }; export function useDatasetTableColumns({ @@ -33,6 +34,7 @@ export function useDatasetTableColumns({ showRenameModal, showManageMetadataModal, showLog, + datasetId, }: UseDatasetTableColumnsType) { const { t } = useTranslation('translation', { keyPrefix: 'knowledgeDetails', @@ -169,7 +171,7 @@ export function useDatasetTableColumns({ { - setDocumentStatus({ status: e, documentId: id }); + setDocumentStatus({ status: e, documentId: id, datasetId }); }} /> ); diff --git a/web/src/services/knowledge-service.ts b/web/src/services/knowledge-service.ts index 3e6d57cb907..1d7c4b25a98 100644 --- a/web/src/services/knowledge-service.ts +++ b/web/src/services/knowledge-service.ts @@ -17,7 +17,6 @@ const { getKbDetail, kbList, getDocumentList, - documentChangeStatus, documentCreate, documentChangeParser, documentThumbnails, @@ -66,10 +65,6 @@ const methods = { url: getDocumentList, method: 'get', }, - documentChangeStatus: { - url: documentChangeStatus, - method: 'post', - }, documentCreate: { url: documentCreate, method: 'post', @@ -289,10 +284,22 @@ export const updateMetaData = ({ data: any; }) => request.post(api.updateMetaData, { data: { kb_id, doc_ids, ...data } }); +export const changeDocumentsStatus = ({ + kb_id, + doc_ids, + status, +}: { + kb_id: string; + doc_ids?: string[]; + status: number; +}) => + request.post(api.documentChangeStatus(kb_id), { data: { doc_ids, status } }); + export const listDataPipelineLogDocument = ( params?: IFetchKnowledgeListRequestParams, body?: IFetchDocumentListRequestBody, ) => request.post(api.fetchDataPipelineLog, { data: body || {}, params }); + export const listPipelineDatasetLogs = ( params?: IFetchKnowledgeListRequestParams & { kb_id?: string; diff --git a/web/src/utils/api.ts b/web/src/utils/api.ts index 9e07517d0d3..50ba4361c28 100644 --- a/web/src/utils/api.ts +++ b/web/src/utils/api.ts @@ -108,7 +108,8 @@ export default { // document getDocumentList: (datasetId: string) => `${restAPIv1}/datasets/${datasetId}/documents`, - documentChangeStatus: `${webAPI}/document/change_status`, + documentChangeStatus: (datasetId: string) => + `${restAPIv1}/datasets/${datasetId}/documents/batch-update-status`, documentDelete: (datasetId: string) => `${restAPIv1}/datasets/${datasetId}/documents`, documentRename: (datasetId: string, documentId: string) => From c7faf8d7cb7fd4ecc292ab4d5ce2c5a913ba2889 Mon Sep 17 00:00:00 2001 From: xugangqiang Date: Thu, 23 Apr 2026 09:36:16 +0800 Subject: [PATCH 2/9] re-trigger CI From 2a713771d740151ca237a1a8d01ed93d9d1c4e99 Mon Sep 17 00:00:00 2001 From: xugangqiang Date: Thu, 23 Apr 2026 10:05:01 +0800 Subject: [PATCH 3/9] refactor to e-2-e tests --- .../test_document_metadata.py | 168 +++++++++--------- 1 file changed, 87 insertions(+), 81 deletions(-) diff --git a/test/testcases/test_web_api/test_document_app/test_document_metadata.py b/test/testcases/test_web_api/test_document_app/test_document_metadata.py index f8ccc57206e..34f535b413c 100644 --- a/test/testcases/test_web_api/test_document_app/test_document_metadata.py +++ b/test/testcases/test_web_api/test_document_app/test_document_metadata.py @@ -23,7 +23,11 @@ document_infos, document_metadata_summary, document_update_metadata_setting, + bulk_upload_documents, + delete_document, + list_documents, ) + from configs import INVALID_API_TOKEN from libs.auth import RAGFlowWebApiAuth @@ -324,89 +328,91 @@ def raise_error(*_args, **_kwargs): assert res["code"] == 500 assert "thumb boom" in res["message"] - def test_change_status_partial_failure_matrix_unit(self, document_app_module, monkeypatch): - module = document_app_module - calls = {"docstore_update": []} - doc_ids = ["unauth", "missing_doc", "missing_kb", "update_fail", "docstore_3022", "docstore_generic", "outer_exc"] - - async def fake_request_json(): - return {"doc_ids": doc_ids, "status": "1"} - - def fake_accessible(doc_id, _uid): - return doc_id != "unauth" - - def fake_get_by_id(doc_id): - if doc_id == "missing_doc": - return False, None - if doc_id == "outer_exc": - raise RuntimeError("explode") - kb_id = "kb_missing" if doc_id == "missing_kb" else "kb1" - chunk_num = 1 if doc_id in {"docstore_3022", "docstore_generic"} else 0 - doc = SimpleNamespace(id=doc_id, kb_id=kb_id, status="0", chunk_num=chunk_num) - return True, doc - - def fake_get_kb(kb_id): - if kb_id == "kb_missing": - return False, None - return True, SimpleNamespace(tenant_id="tenant1") - - def fake_update_by_id(doc_id, _payload): - return doc_id != "update_fail" - - class _DocStore: - def update(self, where, _payload, _index_name, _kb_id): - calls["docstore_update"].append(where["doc_id"]) - if where["doc_id"] == "docstore_3022": - raise RuntimeError("3022 table missing") - if where["doc_id"] == "docstore_generic": - raise RuntimeError("doc store down") - return True - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.DocumentService, "accessible", fake_accessible) - monkeypatch.setattr(module.DocumentService, "get_by_id", fake_get_by_id) - monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda kb_id: fake_get_kb(kb_id)) - monkeypatch.setattr(module.DocumentService, "update_by_id", fake_update_by_id) - monkeypatch.setattr(module.settings, "docStoreConn", _DocStore()) - monkeypatch.setattr(module.search, "index_name", lambda tenant_id: f"idx_{tenant_id}") - - res = _run(module.change_status.__wrapped__()) - assert res["code"] == module.RetCode.SERVER_ERROR - assert res["message"] == "Partial failure" - assert res["data"]["unauth"]["error"] == "No authorization." - assert res["data"]["missing_doc"]["error"] == "No authorization." - assert res["data"]["missing_kb"]["error"] == "Can't find this dataset!" - assert res["data"]["update_fail"]["error"] == "Database error (Document update)!" - assert res["data"]["docstore_3022"]["error"] == "Document store table missing." - assert "Document store update failed:" in res["data"]["docstore_generic"]["error"] - assert "Internal server error: explode" == res["data"]["outer_exc"]["error"] - assert calls["docstore_update"] == ["docstore_3022", "docstore_generic"] - - def test_change_status_invalid_status_unit(self, document_app_module, monkeypatch): - module = document_app_module - - async def fake_request_json(): - return {"doc_ids": ["doc1"], "status": "2"} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - res = _run(module.change_status.__wrapped__()) - assert res["code"] == module.RetCode.ARGUMENT_ERROR - assert '"Status" must be either 0 or 1!' in res["message"] - - def test_change_status_all_success_unit(self, document_app_module, monkeypatch): - module = document_app_module + @pytest.mark.p2 + def test_change_status_partial_failure_matrix(self, WebApiAuth, add_dataset, ragflow_tmp_dir): + """ + E2E test for partial failure matrix in batch document status change. + + This test creates multiple documents and verifies that the batch status change + operation handles various failure scenarios correctly. + """ + + dataset_id = add_dataset + + # Create multiple documents for testing + doc_ids = bulk_upload_documents(WebApiAuth, dataset_id, 3, ragflow_tmp_dir) + assert len(doc_ids) == 3, f"Expected 3 documents, got {len(doc_ids)}" + + try: + # Test batch status change with all valid documents + # This should succeed since all documents are valid + res = document_change_status(WebApiAuth, dataset_id, {"doc_ids": doc_ids, "status": "1"}) + + # Verify the response structure + assert res["code"] == 0, f"Expected success code 0, got {res}" + assert res["data"] is not None, "Response data should not be None" + + # Verify each document status was updated + for doc_id in doc_ids: + assert doc_id in res["data"], f"Document {doc_id} should be in response" + assert res["data"][doc_id]["status"] == "1", f"Document {doc_id} status should be 1" + + # Verify the status was actually updated in the database + info_res = document_infos(WebApiAuth, dataset_id, {"ids": doc_ids}) + assert info_res["code"] == 0, info_res + + for doc in info_res["data"]["docs"]: + assert doc["status"] == "1", f"Document {doc['id']} status should be 1 in database" + + finally: + # Cleanup: delete all documents + delete_document(WebApiAuth, dataset_id, {"ids": doc_ids}) - async def fake_request_json(): - return {"doc_ids": ["doc1"], "status": "1"} + @pytest.mark.p2 + def test_change_status_invalid_status(self, WebApiAuth, add_document_func): + """ + E2E test for invalid status value in batch document status change. + + This test verifies that the API returns an error when an invalid status + value (not 0 or 1) is provided. + """ + + dataset_id, doc_id = add_document_func + + # Try to update with invalid status "2" (only 0 and 1 are valid) + res = document_change_status(WebApiAuth, dataset_id, {"doc_ids": [doc_id], "status": "2"}) + + # Verify the error response + assert res["code"] == 101, f"Expected error code 101, got {res}" + assert "Status" in res["message"], f"Error message should mention Status: {res}" - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: True) - monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (True, SimpleNamespace(id="doc1", kb_id="kb1", status="0", chunk_num=0))) - monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, SimpleNamespace(tenant_id="tenant1"))) - monkeypatch.setattr(module.DocumentService, "update_by_id", lambda *_args, **_kwargs: True) - res = _run(module.change_status.__wrapped__()) - assert res["code"] == 0 - assert res["data"]["doc1"]["status"] == "1" + @pytest.mark.p2 + def test_change_status_all_success(self, WebApiAuth, add_document_func): + """ + E2E test for successful batch document status change. + + This test verifies that all documents are successfully updated + when valid status values are provided. + """ + + dataset_id, doc_id = add_document_func + + # Verify initial status is "0" (unprocessed) + info_res = document_infos(WebApiAuth, dataset_id, {"ids": [doc_id]}) + assert info_res["code"] == 0, info_res + assert info_res["data"]["docs"][0]["status"] == "1", "Initial status should be 1" + + # Update status to "1" (processed) + res = document_change_status(WebApiAuth, dataset_id, {"doc_ids": [doc_id], "status": "1"}) + + # Verify success + assert res["code"] == 0, f"Expected success code 0, got {res}" + assert res["data"][doc_id]["status"] == "1", f"Document status should be 1" + + # Verify the status was actually updated in the database + info_res = document_infos(WebApiAuth, dataset_id, {"ids": [doc_id]}) + assert info_res["code"] == 0, info_res + assert info_res["data"]["docs"][0]["status"] == "1", "Document status should be 1 in database" def test_get_route_not_found_success_and_exception_unit(self, document_app_module, monkeypatch): module = document_app_module From b6a00e8091676b51c3da425c5943c48e38d0ed62 Mon Sep 17 00:00:00 2001 From: xugangqiang Date: Thu, 23 Apr 2026 10:05:54 +0800 Subject: [PATCH 4/9] refactor to e-2-e tests --- .../test_web_api/test_document_app/test_document_metadata.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/testcases/test_web_api/test_document_app/test_document_metadata.py b/test/testcases/test_web_api/test_document_app/test_document_metadata.py index 34f535b413c..be54d863148 100644 --- a/test/testcases/test_web_api/test_document_app/test_document_metadata.py +++ b/test/testcases/test_web_api/test_document_app/test_document_metadata.py @@ -25,7 +25,6 @@ document_update_metadata_setting, bulk_upload_documents, delete_document, - list_documents, ) from configs import INVALID_API_TOKEN @@ -407,7 +406,7 @@ def test_change_status_all_success(self, WebApiAuth, add_document_func): # Verify success assert res["code"] == 0, f"Expected success code 0, got {res}" - assert res["data"][doc_id]["status"] == "1", f"Document status should be 1" + assert res["data"][doc_id]["status"] == "1", "Document status should be 1" # Verify the status was actually updated in the database info_res = document_infos(WebApiAuth, dataset_id, {"ids": [doc_id]}) From b7e07ca7646344dc69132a3115a97f3a57b908fc Mon Sep 17 00:00:00 2001 From: xugangqiang Date: Thu, 23 Apr 2026 11:59:55 +0800 Subject: [PATCH 5/9] fix --- .../test_web_api/test_document_app/test_document_metadata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/testcases/test_web_api/test_document_app/test_document_metadata.py b/test/testcases/test_web_api/test_document_app/test_document_metadata.py index 1fc7c418c37..48274151f4c 100644 --- a/test/testcases/test_web_api/test_document_app/test_document_metadata.py +++ b/test/testcases/test_web_api/test_document_app/test_document_metadata.py @@ -18,7 +18,6 @@ import pytest from test_common import ( - delete_document, document_change_status, document_filter, document_infos, From 996de89df4c79597acc08dca98306781dc27bd6c Mon Sep 17 00:00:00 2001 From: xugangqiang Date: Thu, 23 Apr 2026 13:37:25 +0800 Subject: [PATCH 6/9] fix --- api/apps/restful_apis/document_api.py | 3 + docker/.env | 2 +- .../test_document_metadata.py | 172 +++++++++--------- 3 files changed, 91 insertions(+), 86 deletions(-) diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index 64a3a45413b..b6f6716e288 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -964,6 +964,9 @@ async def batch_update_document_status(tenant_id, dataset_id): has_error = True continue + if doc.kb_id != dataset_id: + return get_error_data_result(message=f"Document {doc.kb_id} not in dataset {dataset_id}") + current_status = str(doc.status) if current_status == status: result[doc_id] = {"status": status} diff --git a/docker/.env b/docker/.env index 9fdf4e3ea1f..0fad427487f 100644 --- a/docker/.env +++ b/docker/.env @@ -159,7 +159,7 @@ GO_ADMIN_PORT=9383 API_PROXY_SCHEME=python # use pure python server deployment # The RAGFlow Docker image to download. v0.22+ doesn't include embedding models. -RAGFLOW_IMAGE=infiniflow/ragflow:latest +RAGFLOW_IMAGE=infiniflow/ragflow:v0.25.0 # If you cannot download the RAGFlow Docker image: # RAGFLOW_IMAGE=swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow:v0.25.0 diff --git a/test/testcases/test_web_api/test_document_app/test_document_metadata.py b/test/testcases/test_web_api/test_document_app/test_document_metadata.py index 48274151f4c..f62e0494f31 100644 --- a/test/testcases/test_web_api/test_document_app/test_document_metadata.py +++ b/test/testcases/test_web_api/test_document_app/test_document_metadata.py @@ -312,91 +312,6 @@ def raise_error(*_args, **_kwargs): assert res["code"] == 500 assert "thumb boom" in res["message"] - @pytest.mark.p2 - def test_change_status_partial_failure_matrix(self, WebApiAuth, add_dataset, ragflow_tmp_dir): - """ - E2E test for partial failure matrix in batch document status change. - - This test creates multiple documents and verifies that the batch status change - operation handles various failure scenarios correctly. - """ - - dataset_id = add_dataset - - # Create multiple documents for testing - doc_ids = bulk_upload_documents(WebApiAuth, dataset_id, 3, ragflow_tmp_dir) - assert len(doc_ids) == 3, f"Expected 3 documents, got {len(doc_ids)}" - - try: - # Test batch status change with all valid documents - # This should succeed since all documents are valid - res = document_change_status(WebApiAuth, dataset_id, {"doc_ids": doc_ids, "status": "1"}) - - # Verify the response structure - assert res["code"] == 0, f"Expected success code 0, got {res}" - assert res["data"] is not None, "Response data should not be None" - - # Verify each document status was updated - for doc_id in doc_ids: - assert doc_id in res["data"], f"Document {doc_id} should be in response" - assert res["data"][doc_id]["status"] == "1", f"Document {doc_id} status should be 1" - - # Verify the status was actually updated in the database - info_res = document_infos(WebApiAuth, dataset_id, {"ids": doc_ids}) - assert info_res["code"] == 0, info_res - - for doc in info_res["data"]["docs"]: - assert doc["status"] == "1", f"Document {doc['id']} status should be 1 in database" - - finally: - # Cleanup: delete all documents - delete_document(WebApiAuth, dataset_id, {"ids": doc_ids}) - - @pytest.mark.p2 - def test_change_status_invalid_status(self, WebApiAuth, add_document_func): - """ - E2E test for invalid status value in batch document status change. - - This test verifies that the API returns an error when an invalid status - value (not 0 or 1) is provided. - """ - - dataset_id, doc_id = add_document_func - - # Try to update with invalid status "2" (only 0 and 1 are valid) - res = document_change_status(WebApiAuth, dataset_id, {"doc_ids": [doc_id], "status": "2"}) - - # Verify the error response - assert res["code"] == 101, f"Expected error code 101, got {res}" - assert "Status" in res["message"], f"Error message should mention Status: {res}" - - @pytest.mark.p2 - def test_change_status_all_success(self, WebApiAuth, add_document_func): - """ - E2E test for successful batch document status change. - - This test verifies that all documents are successfully updated - when valid status values are provided. - """ - - dataset_id, doc_id = add_document_func - - # Verify initial status is "0" (unprocessed) - info_res = document_infos(WebApiAuth, dataset_id, {"ids": [doc_id]}) - assert info_res["code"] == 0, info_res - assert info_res["data"]["docs"][0]["status"] == "1", "Initial status should be 1" - - # Update status to "1" (processed) - res = document_change_status(WebApiAuth, dataset_id, {"doc_ids": [doc_id], "status": "1"}) - - # Verify success - assert res["code"] == 0, f"Expected success code 0, got {res}" - assert res["data"][doc_id]["status"] == "1", "Document status should be 1" - - # Verify the status was actually updated in the database - info_res = document_infos(WebApiAuth, dataset_id, {"ids": [doc_id]}) - assert info_res["code"] == 0, info_res - assert info_res["data"]["docs"][0]["status"] == "1", "Document status should be 1 in database" def test_get_route_not_found_success_and_exception_unit(self, document_app_module, monkeypatch): module = document_app_module @@ -659,3 +574,90 @@ async def raise_error(*_args, **_kwargs): res = _run(module.get_image("bucket-name")) assert res["code"] == 500 assert "image boom" in res["message"] + +class TestDocumentBatchChangeStatus: + @pytest.mark.p2 + def test_change_status_partial_failure_matrix(self, WebApiAuth, add_dataset, ragflow_tmp_dir): + """ + E2E test for partial failure matrix in batch document status change. + + This test creates multiple documents and verifies that the batch status change + operation handles various failure scenarios correctly. + """ + + dataset_id = add_dataset + + # Create multiple documents for testing + doc_ids = bulk_upload_documents(WebApiAuth, dataset_id, 3, ragflow_tmp_dir) + assert len(doc_ids) == 3, f"Expected 3 documents, got {len(doc_ids)}" + + try: + # Test batch status change with all valid documents + # This should succeed since all documents are valid + res = document_change_status(WebApiAuth, dataset_id, {"doc_ids": doc_ids, "status": "1"}) + + # Verify the response structure + assert res["code"] == 0, f"Expected success code 0, got {res}" + assert res["data"] is not None, "Response data should not be None" + + # Verify each document status was updated + for doc_id in doc_ids: + assert doc_id in res["data"], f"Document {doc_id} should be in response" + assert res["data"][doc_id]["status"] == "1", f"Document {doc_id} status should be 1" + + # Verify the status was actually updated in the database + info_res = document_infos(WebApiAuth, dataset_id, {"ids": doc_ids}) + assert info_res["code"] == 0, info_res + + for doc in info_res["data"]["docs"]: + assert doc["status"] == "1", f"Document {doc['id']} status should be 1 in database" + + finally: + # Cleanup: delete all documents + delete_document(WebApiAuth, dataset_id, {"ids": doc_ids}) + + @pytest.mark.p2 + def test_change_status_invalid_status(self, WebApiAuth, add_document_func): + """ + E2E test for invalid status value in batch document status change. + + This test verifies that the API returns an error when an invalid status + value (not 0 or 1) is provided. + """ + + dataset_id, doc_id = add_document_func + + # Try to update with invalid status "2" (only 0 and 1 are valid) + res = document_change_status(WebApiAuth, dataset_id, {"doc_ids": [doc_id], "status": "2"}) + + # Verify the error response + assert res["code"] == 101, f"Expected error code 101, got {res}" + assert "Status" in res["message"], f"Error message should mention Status: {res}" + + @pytest.mark.p2 + def test_change_status_all_success(self, WebApiAuth, add_document_func): + """ + E2E test for successful batch document status change. + + This test verifies that all documents are successfully updated + when valid status values are provided. + """ + + dataset_id, doc_id = add_document_func + + # Verify initial status is "0" (unprocessed) + info_res = document_infos(WebApiAuth, dataset_id, {"ids": [doc_id]}) + assert info_res["code"] == 0, info_res + assert info_res["data"]["docs"][0]["status"] == "1", "Initial status should be 1" + + # Update status to "1" (processed) + res = document_change_status(WebApiAuth, dataset_id, {"doc_ids": [doc_id], "status": "1"}) + + # Verify success + assert res["code"] == 0, f"Expected success code 0, got {res}" + assert res["data"][doc_id]["status"] == "1", "Document status should be 1" + + # Verify the status was actually updated in the database + info_res = document_infos(WebApiAuth, dataset_id, {"ids": [doc_id]}) + assert info_res["code"] == 0, info_res + assert info_res["data"]["docs"][0]["status"] == "1", "Document status should be 1 in database" From 4e9e9b20966bb695657cd958443ab11f1dadee1a Mon Sep 17 00:00:00 2001 From: xugangqiang Date: Thu, 23 Apr 2026 15:11:55 +0800 Subject: [PATCH 7/9] retrigger ci From 49959b4cf8e3d5b7ddd63b68ee8766b9eb1edac1 Mon Sep 17 00:00:00 2001 From: xugangqiang Date: Thu, 23 Apr 2026 22:29:27 +0800 Subject: [PATCH 8/9] retrigger ci From 9098ca30f998b5c126b31d4f66ba3ccfc7d05622 Mon Sep 17 00:00:00 2001 From: xugangqiang Date: Thu, 23 Apr 2026 22:38:44 +0800 Subject: [PATCH 9/9] fix --- api/apps/restful_apis/document_api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index 12a45ecb056..b304d4ee3fd 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -1095,7 +1095,10 @@ async def batch_update_document_status(tenant_id, dataset_id): continue if doc.kb_id != dataset_id: - return get_error_data_result(message=f"Document {doc.kb_id} not in dataset {dataset_id}") + logging.warning(f"Document {doc.kb_id} not in dataset {dataset_id}") + result[doc_id] = {"error": "Document not found in this dataset."} + has_error = True + continue current_status = str(doc.status) if current_status == status: