Skip to content
Merged
21 changes: 1 addition & 20 deletions api/apps/document_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from api.apps import current_user, login_required
from api.common.check_team_permission import check_kb_team_permission
from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX
from api.constants import FILE_NAME_LEN_LIMIT
from api.db import FileType
from api.db.db_models import Task
from api.db.services import duplicate_name
Expand Down Expand Up @@ -182,25 +182,6 @@ async def create():
return server_error_response(e)


@manager.route("/thumbnails", methods=["GET"]) # noqa: F821
# @login_required
def thumbnails():
doc_ids = request.args.getlist("doc_ids")
if not doc_ids:
return get_json_result(data=False, message='Lack of "Document ID"', code=RetCode.ARGUMENT_ERROR)

try:
docs = DocumentService.get_thumbnails(doc_ids)

for doc_item in docs:
if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX):
doc_item["thumbnail"] = f"/v1/document/image/{doc_item['kb_id']}-{doc_item['thumbnail']}"

return get_json_result(data={d["id"]: d["thumbnail"] for d in docs})
except Exception as e:
return server_error_response(e)


@manager.route("/change_status", methods=["POST"]) # noqa: F821
@login_required
@validate_request("doc_ids", "status")
Expand Down
38 changes: 38 additions & 0 deletions api/apps/restful_apis/document_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,44 @@ async def update_metadata_config(tenant_id, dataset_id, document_id):
return get_result(data=doc.to_dict())


@manager.route("/thumbnails", methods=["GET"]) # noqa: F821
def list_thumbnails():
"""
Get thumbnails for documents.
---
tags:
- Documents
parameters:
- in: query
name: doc_ids
type: array
required: true
description: List of document IDs to get thumbnails for.
responses:
200:
description: Successfully retrieved thumbnails
400:
description: Missing document IDs
"""
from api.constants import IMG_BASE64_PREFIX
from api.db.services.document_service import DocumentService

doc_ids = request.args.getlist("doc_ids")
if not doc_ids:
return get_json_result(data=False, message='Lack of "Document ID"', code=RetCode.ARGUMENT_ERROR)

try:
docs = DocumentService.get_thumbnails(doc_ids)

for doc_item in docs:
if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX):
doc_item["thumbnail"] = f"/v1/document/image/{doc_item['kb_id']}-{doc_item['thumbnail']}"

return get_json_result(data={d["id"]: d["thumbnail"] for d in docs})
Comment thread
coderabbitai[bot] marked this conversation as resolved.
except Exception as e:
return server_error_response(e)
Comment on lines +1171 to +1206
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Inspect the handler as it existed just before this PR's changes on the target branch.
git fetch origin main --depth=1 >/dev/null 2>&1 || true
git show origin/main:api/apps/document_app.py 2>/dev/null \
  | awk '/@manager\.route\("\/thumbnails"/{flag=1} flag{print; if(/^def |^async def /){c++}; if(c>=1 && /^$/){exit}}'

Repository: infiniflow/ragflow

Length of output: 331


Add authentication and authorization to the /thumbnails endpoint.

The list_thumbnails handler is missing @login_required and tenant/KB scoping, unlike all other handlers in this module:

  1. Unauthenticated public access. Any caller can retrieve thumbnail data and leak document kb_id values. While the old handler was also undecorated, this endpoint must enforce authentication.
  2. Cross-tenant enumeration. No permission check scopes DocumentService.get_thumbnails(doc_ids) to the caller's tenant, allowing an authenticated user to enumerate documents across other tenants via direct-object-reference.
  3. Missing logging. No logging for invalid arguments or exceptions, violating the coding guideline to add logging for new flows.

Add @login_required, @add_tenant_id_to_kwargs, and filter results by calling KnowledgebaseService.accessible(kb_id=d["kb_id"], user_id=tenant_id) before returning. Also add logging for the missing doc_ids case and exception handler. Sync the implementation with pattern used in list_docs and other authenticated endpoints.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@api/apps/restful_apis/document_api.py` around lines 896 - 931, Add
authentication, tenant scoping, and logging to the list_thumbnails handler:
decorate list_thumbnails with `@login_required` and `@add_tenant_id_to_kwargs`,
accept tenant_id from kwargs, log the missing doc_ids case before returning the
ARGUMENT_ERROR, call DocumentService.get_thumbnails(doc_ids) as before but then
filter the returned docs by keeping only those where
KnowledgebaseService.accessible(kb_id=d["kb_id"], user_id=tenant_id) is True
(import KnowledgebaseService), and add logging in the exception handler before
returning server_error_response(e) so failures are recorded; keep the existing
thumbnail prefix logic and produced response shape.



@manager.route("/datasets/<dataset_id>/documents/metadatas", methods=["PATCH"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
Expand Down
11 changes: 11 additions & 0 deletions test/testcases/test_web_api/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,17 @@ def document_change_status(auth, payload=None, *, headers=HEADERS, data=None):
return res.json()


def document_thumbnails(auth, params=None, *, headers=HEADERS, data=None):
"""Get document thumbnails.

Args:
auth: Authentication object
params: Query parameters (e.g., {"doc_ids": ["doc1", "doc2"]})
"""
res = requests.get(url=f"{HOST_ADDRESS}/api/v1/thumbnails", params=params, headers=headers, auth=auth, data=data)
return res.json()


def bulk_upload_documents(auth, kb_id, num, tmp_path):
fps = []
for i in range(num):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@
document_infos,
document_metadata_summary,
document_metadata_update,
document_thumbnails,
document_update_metadata_setting,
upload_documents,
)
from utils.file_utils import create_txt_file
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

Expand Down Expand Up @@ -100,6 +103,7 @@ def test_infos(self, WebApiAuth, add_document_func):
assert len(docs) == 1, docs
assert docs[0]["id"] == doc_id, res


## The inputs has been changed to add 'doc_ids'
## TODO:
#@pytest.mark.p2
Expand Down Expand Up @@ -152,6 +156,34 @@ def test_change_status(self, WebApiAuth, add_document_func):
assert info_res["data"]["docs"][0]["status"] == "1", info_res


@pytest.mark.p2
def test_thumbnails_missing_ids(self, WebApiAuth):
"""Test thumbnails endpoint returns error when doc_ids is missing."""
res = document_thumbnails(WebApiAuth, params={})
assert res["code"] == 101, res
assert 'Lack of "Document ID"' in res["message"], res

@pytest.mark.p2
def test_thumbnails_with_doc_ids(self, WebApiAuth, add_dataset_func, tmp_path):
"""Test thumbnails endpoint returns thumbnails for valid doc_ids."""
dataset_id = add_dataset_func

# Create and upload a test document
fp = create_txt_file(tmp_path / "test_thumb.txt")

upload_res = upload_documents(WebApiAuth, {"kb_id": dataset_id}, [fp])
assert upload_res["code"] == 0, upload_res

# Get the document ID
doc_id = upload_res["data"][0]["id"]

# Get thumbnails for the document
res = document_thumbnails(WebApiAuth, params={"doc_ids": [doc_id]})

assert res["code"] == 0, res
assert doc_id in res["data"], res


class TestDocumentMetadataNegative:
@pytest.mark.p2
def test_filter_missing_kb_id(self, WebApiAuth, add_document_func):
Expand Down Expand Up @@ -285,36 +317,6 @@ def test_update_metadata_invalid_delete_item(self, WebApiAuth, add_document_func
assert "Each delete requires key" in res["message"], res


def test_thumbnails_missing_ids_rewrite_and_exception_unit(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module, "request", _DummyRequest(args={}))
res = module.thumbnails()
assert res["code"] == module.RetCode.ARGUMENT_ERROR
assert 'Lack of "Document ID"' in res["message"]

monkeypatch.setattr(module, "request", _DummyRequest(args={"doc_ids": ["doc1", "doc2"]}))
monkeypatch.setattr(
module.DocumentService,
"get_thumbnails",
lambda _doc_ids: [
{"id": "doc1", "kb_id": "kb1", "thumbnail": "thumb.jpg"},
{"id": "doc2", "kb_id": "kb1", "thumbnail": f"{module.IMG_BASE64_PREFIX}blob"},
],
)
res = module.thumbnails()
assert res["code"] == 0
assert res["data"]["doc1"] == "/v1/document/image/kb1-thumb.jpg"
assert res["data"]["doc2"] == f"{module.IMG_BASE64_PREFIX}blob"

def raise_error(*_args, **_kwargs):
raise RuntimeError("thumb boom")

monkeypatch.setattr(module.DocumentService, "get_thumbnails", raise_error)
monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)})
res = module.thumbnails()
assert res["code"] == 500
assert "thumb boom" in res["message"]

def test_change_status_partial_failure_matrix_unit(self, document_app_module, monkeypatch):
module = document_app_module
calls = {"docstore_update": []}
Expand Down
2 changes: 1 addition & 1 deletion web/src/utils/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ export default {
documentCreate: `${webAPI}/document/create`,
documentRun: `${webAPI}/document/run`,
documentChangeParser: `${webAPI}/document/change_parser`,
documentThumbnails: `${webAPI}/document/thumbnails`,
documentThumbnails: `${restAPIv1}/thumbnails`,
getDocumentFile: `${webAPI}/document/get`,
getDocumentFileDownload: (docId: string) =>
`${webAPI}/document/download/${docId}`,
Expand Down
Loading