Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 90 additions & 12 deletions api/apps/restful_apis/document_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,7 +733,7 @@ def list_docs(dataset_id, tenant_id):
renamed_doc_list = [map_doc_keys(doc) for doc in payload]
for doc_item in renamed_doc_list:
if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX):
doc_item["thumbnail"] = f"/api/v1/documents/images/{dataset_id}-{doc_item['thumbnail']}"
doc_item["thumbnail"] = f"/api/v1/documents/{doc_item['id']}/thumbnail"
if doc_item.get("source_type"):
doc_item["source_type"] = doc_item["source_type"].split("/")[0]
if doc_item["parser_config"].get("metadata"):
Expand Down Expand Up @@ -1165,23 +1165,33 @@ async def update_metadata_config(tenant_id, dataset_id, document_id):


@manager.route("/thumbnails", methods=["GET"]) # noqa: F821
@login_required
Copy link
Copy Markdown
Collaborator

@KevinHuSh KevinHuSh May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this way, images loadding will all fail for embeded <iframe> dialogs. (The dialogs can be embeded into iframe)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean this might be removed.

def list_thumbnails():
"""
Get thumbnails for documents.
Get thumbnails for documents the caller can access.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- in: query
name: doc_ids
type: array
required: true
description: List of document IDs to get thumbnails for.
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
responses:
200:
description: Successfully retrieved thumbnails
description: Successfully retrieved thumbnails. Inaccessible IDs are
silently filtered out so the response cannot be used to
enumerate cross-tenant document IDs.
400:
description: Missing document IDs
description: Missing document IDs.
"""
from api.constants import IMG_BASE64_PREFIX
from api.db.services.document_service import DocumentService
Expand All @@ -1191,11 +1201,15 @@ def list_thumbnails():
return get_json_result(data=False, message='Lack of "Document ID"', code=RetCode.ARGUMENT_ERROR)

try:
docs = DocumentService.get_thumbnails(doc_ids)
accessible_ids = [doc_id for doc_id in doc_ids if DocumentService.accessible(doc_id, current_user.id)]
if not accessible_ids:
return get_json_result(data={})

docs = DocumentService.get_thumbnails(accessible_ids)

for doc_item in docs:
if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX):
doc_item["thumbnail"] = f"/api/v1/documents/images/{doc_item['kb_id']}-{doc_item['thumbnail']}"
doc_item["thumbnail"] = f"/api/v1/documents/{doc_item['id']}/thumbnail"

return get_json_result(data={d["id"]: d["thumbnail"] for d in docs})
except Exception as e:
Expand Down Expand Up @@ -1615,20 +1629,30 @@ def _run_sync():
return get_error_data_result(message="Internal server error")


# Chunk image keys are xxhash64 hex digests (16 chars) generated in
# rag/svr/task_executor.py. Restricting the storage key to this shape
# prevents the endpoint from being coerced into serving arbitrary objects
# (e.g. raw documents) that happen to share the bucket.
_CHUNK_IMAGE_KEY_RE = re.compile(r"^[0-9a-f]{16}$")


@manager.route("/documents/images/<image_id>", methods=["GET"]) # noqa: F821
@login_required
async def get_document_image(image_id):
"""
Get a document image by ID.
Get a chunk reference image by ID.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- name: image_id
in: path
required: true
schema:
type: string
description: The image ID (format: bucket-name-image-name)
description: The chunk image ID (format: kb_id-chunk_xxhash)
responses:
200:
description: Image file
Expand All @@ -1639,18 +1663,72 @@ async def get_document_image(image_id):
format: binary
"""
try:
arr = image_id.split("-")
if len(arr) != 2:
kb_id, sep, key = image_id.rpartition("-")
if not sep or not kb_id:
logging.warning("get_document_image: malformed image_id=%r user_id=%s", image_id, current_user.id)
return get_data_error_result(message="Image not found.")
if not _CHUNK_IMAGE_KEY_RE.match(key):
logging.warning("get_document_image: invalid key shape image_id=%s user_id=%s", image_id, current_user.id)
return get_data_error_result(message="Image not found.")
bkt, nm = image_id.split("-")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, bkt, nm)
if not KnowledgebaseService.accessible(kb_id, current_user.id):
logging.warning("get_document_image: access denied image_id=%s user_id=%s", image_id, current_user.id)
return get_data_error_result(message="No authorization.")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, kb_id, key)
response = await make_response(data)
response.headers.set("Content-Type", "image/JPEG")
return response
except Exception as e:
return server_error_response(e)


@manager.route("/documents/<doc_id>/thumbnail", methods=["GET"]) # noqa: F821
@login_required
async def get_document_thumbnail(doc_id):
"""
Get a document's thumbnail image.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- name: doc_id
in: path
required: true
schema:
type: string
description: The document ID.
responses:
200:
description: Thumbnail image
content:
image/png:
schema:
type: string
format: binary
"""
try:
if not DocumentService.accessible(doc_id, current_user.id):
logging.warning("get_document_thumbnail: access denied doc_id=%s user_id=%s", doc_id, current_user.id)
return get_data_error_result(message="No authorization.")
e, doc = DocumentService.get_by_id(doc_id)
if not e:
logging.warning("get_document_thumbnail: document not found doc_id=%s", doc_id)
return get_data_error_result(message="Document not found.")
# Storage key shape mirrors the producer in api/db/services/file_service.py.
thumbnail_key = f"thumbnail_{doc.id}.png"
data = await thread_pool_exec(settings.STORAGE_IMPL.get, doc.kb_id, thumbnail_key)
if not data:
logging.info("get_document_thumbnail: missing thumbnail doc_id=%s key=%s", doc.id, thumbnail_key)
return get_data_error_result(message="Thumbnail not found.")
response = await make_response(data)
response.headers.set("Content-Type", "image/png")
return response
except Exception as e:
logging.exception("get_document_thumbnail failed doc_id=%s", doc_id)
return server_error_response(e)


ARTIFACT_CONTENT_TYPES = {
".png": "image/png",
".jpg": "image/jpeg",
Expand Down
10 changes: 9 additions & 1 deletion test/testcases/restful_api/test_document_raw_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,16 @@


@pytest.mark.p2
def test_document_image_invalid_id_contract(rest_client_noauth):
def test_document_image_requires_auth(rest_client_noauth):
res = rest_client_noauth.get("/documents/images/not-a-valid-image-id")
assert res.status_code == 401
payload = res.json()
assert payload["code"] == 401, payload


@pytest.mark.p2
def test_document_image_invalid_id_contract(rest_client):
res = rest_client.get("/documents/images/not-a-valid-image-id")
assert res.status_code == 200
payload = res.json()
assert payload["code"] == 102, payload
Expand Down
5 changes: 5 additions & 0 deletions test/testcases/test_web_api/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,11 @@ def document_get(auth, document_id, *, headers=HEADERS, data=None):
return res


def document_thumbnail(auth, document_id, *, headers=HEADERS, data=None):
res = requests.get(url=f"{HOST_ADDRESS}/api/{VERSION}/documents/{document_id}/thumbnail", headers=headers, auth=auth, data=data)
return res


def document_download(auth, attachment_id, *, ext="markdown", headers=HEADERS, data=None):
res = requests.get(
url=f"{HOST_ADDRESS}/api/{VERSION}/documents/{attachment_id}/download",
Expand Down
Loading
Loading