Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 87 additions & 12 deletions api/apps/restful_apis/document_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,7 +733,7 @@ def list_docs(dataset_id, tenant_id):
renamed_doc_list = [map_doc_keys(doc) for doc in payload]
for doc_item in renamed_doc_list:
if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX):
doc_item["thumbnail"] = f"/api/v1/documents/images/{dataset_id}-{doc_item['thumbnail']}"
doc_item["thumbnail"] = f"/api/v1/documents/{doc_item['id']}/thumbnail"
if doc_item.get("source_type"):
doc_item["source_type"] = doc_item["source_type"].split("/")[0]
if doc_item["parser_config"].get("metadata"):
Expand Down Expand Up @@ -1165,23 +1165,33 @@ async def update_metadata_config(tenant_id, dataset_id, document_id):


@manager.route("/thumbnails", methods=["GET"]) # noqa: F821
@login_required
Copy link
Copy Markdown
Collaborator

@KevinHuSh KevinHuSh May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this way, images loadding will all fail for embeded <iframe> dialogs. (The dialogs can be embeded into iframe)

def list_thumbnails():
"""
Get thumbnails for documents.
Get thumbnails for documents the caller can access.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- in: query
name: doc_ids
type: array
required: true
description: List of document IDs to get thumbnails for.
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
responses:
200:
description: Successfully retrieved thumbnails
description: Successfully retrieved thumbnails. Inaccessible IDs are
silently filtered out so the response cannot be used to
enumerate cross-tenant document IDs.
400:
description: Missing document IDs
description: Missing document IDs.
"""
from api.constants import IMG_BASE64_PREFIX
from api.db.services.document_service import DocumentService
Expand All @@ -1191,11 +1201,15 @@ def list_thumbnails():
return get_json_result(data=False, message='Lack of "Document ID"', code=RetCode.ARGUMENT_ERROR)

try:
docs = DocumentService.get_thumbnails(doc_ids)
accessible_ids = [doc_id for doc_id in doc_ids if DocumentService.accessible(doc_id, current_user.id)]
if not accessible_ids:
return get_json_result(data={})

docs = DocumentService.get_thumbnails(accessible_ids)

for doc_item in docs:
if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX):
doc_item["thumbnail"] = f"/api/v1/documents/images/{doc_item['kb_id']}-{doc_item['thumbnail']}"
doc_item["thumbnail"] = f"/api/v1/documents/{doc_item['id']}/thumbnail"

return get_json_result(data={d["id"]: d["thumbnail"] for d in docs})
except Exception as e:
Expand Down Expand Up @@ -1615,20 +1629,30 @@ def _run_sync():
return get_error_data_result(message="Internal server error")


# Chunk image keys are xxhash64 hex digests (16 chars) generated in
# rag/svr/task_executor.py. Restricting the storage key to this shape
# prevents the endpoint from being coerced into serving arbitrary objects
# (e.g. raw documents) that happen to share the bucket.
_CHUNK_IMAGE_KEY_RE = re.compile(r"^[0-9a-f]{16}$")


@manager.route("/documents/images/<image_id>", methods=["GET"]) # noqa: F821
@login_required
async def get_document_image(image_id):
"""
Get a document image by ID.
Get a chunk reference image by ID.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- name: image_id
in: path
required: true
schema:
type: string
description: The image ID (format: bucket-name-image-name)
description: The chunk image ID (format: kb_id-chunk_xxhash)
responses:
200:
description: Image file
Expand All @@ -1639,18 +1663,69 @@ async def get_document_image(image_id):
format: binary
"""
try:
arr = image_id.split("-")
if len(arr) != 2:
parts = image_id.split("-", 1)
if len(parts) != 2:
return get_data_error_result(message="Image not found.")
kb_id, key = parts
if not _CHUNK_IMAGE_KEY_RE.match(key):
logging.warning("get_document_image: invalid key shape image_id=%s user_id=%s", image_id, current_user.id)
return get_data_error_result(message="Image not found.")
bkt, nm = image_id.split("-")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, bkt, nm)
if not KnowledgebaseService.accessible(kb_id, current_user.id):
logging.warning("get_document_image: access denied image_id=%s user_id=%s", image_id, current_user.id)
return get_data_error_result(message="No authorization.")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, kb_id, key)
response = await make_response(data)
response.headers.set("Content-Type", "image/JPEG")
return response
except Exception as e:
return server_error_response(e)


@manager.route("/documents/<doc_id>/thumbnail", methods=["GET"]) # noqa: F821
@login_required
async def get_document_thumbnail(doc_id):
"""
Get a document's thumbnail image.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- name: doc_id
in: path
required: true
schema:
type: string
description: The document ID.
responses:
200:
description: Thumbnail image
content:
image/png:
schema:
type: string
format: binary
"""
try:
if not DocumentService.accessible(doc_id, current_user.id):
logging.warning("get_document_thumbnail: access denied doc_id=%s user_id=%s", doc_id, current_user.id)
return get_data_error_result(message="No authorization.")
e, doc = DocumentService.get_by_id(doc_id)
if not e:
return get_data_error_result(message="Document not found.")
# Storage key shape mirrors the producer in api/db/services/file_service.py.
thumbnail_key = f"thumbnail_{doc.id}.png"
data = await thread_pool_exec(settings.STORAGE_IMPL.get, doc.kb_id, thumbnail_key)
if not data:
return get_data_error_result(message="Thumbnail not found.")
response = await make_response(data)
response.headers.set("Content-Type", "image/png")
return response
except Exception as e:
return server_error_response(e)


ARTIFACT_CONTENT_TYPES = {
".png": "image/png",
".jpg": "image/jpeg",
Expand Down
5 changes: 5 additions & 0 deletions test/testcases/test_web_api/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,11 @@ def document_get(auth, document_id, *, headers=HEADERS, data=None):
return res


def document_thumbnail(auth, document_id, *, headers=HEADERS, data=None):
res = requests.get(url=f"{HOST_ADDRESS}/api/{VERSION}/documents/{document_id}/thumbnail", headers=headers, auth=auth, data=data)
return res


def document_download(auth, attachment_id, *, ext="markdown", headers=HEADERS, data=None):
res = requests.get(
url=f"{HOST_ADDRESS}/api/{VERSION}/documents/{attachment_id}/download",
Expand Down
Loading