diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 766430a8ba4..429de7be45e 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -18,7 +18,6 @@ from quart import make_response, request from api.apps import current_user, login_required -from api.constants import IMG_BASE64_PREFIX from api.db import FileType from api.db.services.document_service import DocumentService from api.db.services.file2document_service import File2DocumentService @@ -36,25 +35,6 @@ from rag.nlp import search -@manager.route("/thumbnails", methods=["GET"]) # noqa: F821 -# @login_required -def thumbnails(): - doc_ids = request.args.getlist("doc_ids") - if not doc_ids: - return get_json_result(data=False, message='Lack of "Document ID"', code=RetCode.ARGUMENT_ERROR) - - try: - docs = DocumentService.get_thumbnails(doc_ids) - - for doc_item in docs: - if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX): - doc_item["thumbnail"] = f"/v1/document/image/{doc_item['kb_id']}-{doc_item['thumbnail']}" - - return get_json_result(data={d["id"]: d["thumbnail"] for d in docs}) - except Exception as e: - return server_error_response(e) - - @manager.route("/get/", methods=["GET"]) # noqa: F821 @login_required async def get(doc_id): @@ -147,19 +127,3 @@ def reset_doc(): return get_json_result(data=True) except Exception as e: return server_error_response(e) - - -@manager.route("/image/", methods=["GET"]) # noqa: F821 -# @login_required -async def get_image(image_id): - try: - arr = image_id.split("-") - if len(arr) != 2: - return get_data_error_result(message="Image not found.") - bkt, nm = image_id.split("-") - data = await thread_pool_exec(settings.STORAGE_IMPL.get, bkt, nm) - response = await make_response(data) - response.headers.set("Content-Type", "image/JPEG") - return response - except Exception as e: - return server_error_response(e) diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index 4ad8e68f86d..f9687bfea5b 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -719,7 +719,7 @@ def list_docs(dataset_id, tenant_id): renamed_doc_list = [map_doc_keys(doc) for doc in docs] for doc_item in renamed_doc_list: if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX): - doc_item["thumbnail"] = f"/v1/document/image/{dataset_id}-{doc_item['thumbnail']}" + doc_item["thumbnail"] = f"/api/v1/documents/images/{dataset_id}-{doc_item['thumbnail']}" if doc_item.get("source_type"): doc_item["source_type"] = doc_item["source_type"].split("/")[0] if doc_item["parser_config"].get("metadata"): @@ -1168,6 +1168,44 @@ async def update_metadata_config(tenant_id, dataset_id, document_id): return get_result(data=doc.to_dict()) +@manager.route("/thumbnails", methods=["GET"]) # noqa: F821 +def list_thumbnails(): + """ + Get thumbnails for documents. + --- + tags: + - Documents + parameters: + - in: query + name: doc_ids + type: array + required: true + description: List of document IDs to get thumbnails for. + responses: + 200: + description: Successfully retrieved thumbnails + 400: + description: Missing document IDs + """ + from api.constants import IMG_BASE64_PREFIX + from api.db.services.document_service import DocumentService + + doc_ids = request.args.getlist("doc_ids") + if not doc_ids: + return get_json_result(data=False, message='Lack of "Document ID"', code=RetCode.ARGUMENT_ERROR) + + try: + docs = DocumentService.get_thumbnails(doc_ids) + + for doc_item in docs: + if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX): + doc_item["thumbnail"] = f"/api/v1/documents/images/{doc_item['kb_id']}-{doc_item['thumbnail']}" + + return get_json_result(data={d["id"]: d["thumbnail"] for d in docs}) + except Exception as e: + return server_error_response(e) + + @manager.route("/datasets//documents/metadatas", methods=["PATCH"]) # noqa: F821 @login_required @add_tenant_id_to_kwargs @@ -1581,6 +1619,42 @@ def _run_sync(): return get_error_data_result(message="Internal server error") +@manager.route("/documents/images/", methods=["GET"]) # noqa: F821 +async def get_document_image(image_id): + """ + Get a document image by ID. + --- + tags: + - Documents + parameters: + - name: image_id + in: path + required: true + schema: + type: string + description: The image ID (format: bucket-name-image-name) + responses: + 200: + description: Image file + content: + image/jpeg: + schema: + type: string + format: binary + """ + try: + arr = image_id.split("-") + if len(arr) != 2: + return get_data_error_result(message="Image not found.") + bkt, nm = image_id.split("-") + data = await thread_pool_exec(settings.STORAGE_IMPL.get, bkt, nm) + response = await make_response(data) + response.headers.set("Content-Type", "image/JPEG") + return response + except Exception as e: + return server_error_response(e) + + ARTIFACT_CONTENT_TYPES = { ".png": "image/png", ".jpg": "image/jpeg", diff --git a/test/testcases/test_web_api/test_common.py b/test/testcases/test_web_api/test_common.py index 8d687f02889..cfe9c1ce638 100644 --- a/test/testcases/test_web_api/test_common.py +++ b/test/testcases/test_web_api/test_common.py @@ -451,6 +451,17 @@ def document_change_status(auth, dataset_id, payload=None, *, headers=HEADERS, d return res.json() +def document_thumbnails(auth, params=None, *, headers=HEADERS, data=None): + """Get document thumbnails. + + Args: + auth: Authentication object + params: Query parameters (e.g., {"doc_ids": ["doc1", "doc2"]}) + """ + res = requests.get(url=f"{HOST_ADDRESS}/api/v1/thumbnails", params=params, headers=headers, auth=auth, data=data) + return res.json() + + def bulk_upload_documents(auth, kb_id, num, tmp_path): fps = [] for i in range(num): diff --git a/test/testcases/test_web_api/test_document_app/test_document_metadata.py b/test/testcases/test_web_api/test_document_app/test_document_metadata.py index 0be70e5bfd7..6e77983e9a1 100644 --- a/test/testcases/test_web_api/test_document_app/test_document_metadata.py +++ b/test/testcases/test_web_api/test_document_app/test_document_metadata.py @@ -288,37 +288,6 @@ def test_update_metadata_invalid_delete_item(self, WebApiAuth, add_document_func assert "Each delete requires key" in res["message"], res - def test_thumbnails_missing_ids_rewrite_and_exception_unit(self, document_app_module, monkeypatch): - module = document_app_module - monkeypatch.setattr(module, "request", _DummyRequest(args={})) - res = module.thumbnails() - assert res["code"] == module.RetCode.ARGUMENT_ERROR - assert 'Lack of "Document ID"' in res["message"] - - monkeypatch.setattr(module, "request", _DummyRequest(args={"doc_ids": ["doc1", "doc2"]})) - monkeypatch.setattr( - module.DocumentService, - "get_thumbnails", - lambda _doc_ids: [ - {"id": "doc1", "kb_id": "kb1", "thumbnail": "thumb.jpg"}, - {"id": "doc2", "kb_id": "kb1", "thumbnail": f"{module.IMG_BASE64_PREFIX}blob"}, - ], - ) - res = module.thumbnails() - assert res["code"] == 0 - assert res["data"]["doc1"] == "/v1/document/image/kb1-thumb.jpg" - assert res["data"]["doc2"] == f"{module.IMG_BASE64_PREFIX}blob" - - def raise_error(*_args, **_kwargs): - raise RuntimeError("thumb boom") - - monkeypatch.setattr(module.DocumentService, "get_thumbnails", raise_error) - monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)}) - res = module.thumbnails() - assert res["code"] == 500 - assert "thumb boom" in res["message"] - - def test_get_route_not_found_success_and_exception_unit(self, document_app_module, monkeypatch): module = document_app_module monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (False, None)) @@ -546,6 +515,7 @@ def raise_parser_config(*_args, **_kwargs): assert res["code"] == 500 assert "parser boom" in res["message"] + @pytest.mark.skip(reason="Moved to /api/v1/documents/images/") def test_get_image_success_and_exception_unit(self, document_app_module, monkeypatch): module = document_app_module diff --git a/web/src/utils/api.ts b/web/src/utils/api.ts index a2551d1daa4..c2f19d97e57 100644 --- a/web/src/utils/api.ts +++ b/web/src/utils/api.ts @@ -123,7 +123,7 @@ export default { documentCreate: (datasetId: string) => `${restAPIv1}/datasets/${datasetId}/documents?type=empty`, documentChangeParser: `${webAPI}/document/change_parser`, - documentThumbnails: `${webAPI}/document/thumbnails`, + documentThumbnails: `${restAPIv1}/thumbnails`, getDocumentFile: `${webAPI}/document/get`, getDocumentFileDownload: (docId: string) => `${webAPI}/document/download/${docId}`,