Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 0 additions & 70 deletions api/apps/document_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,76 +201,6 @@ def thumbnails():
return server_error_response(e)


@manager.route("/change_status", methods=["POST"]) # noqa: F821
@login_required
@validate_request("doc_ids", "status")
async def change_status():
req = await get_request_json()
doc_ids = req.get("doc_ids", [])
status = str(req.get("status", ""))

if status not in ["0", "1"]:
return get_json_result(data=False, message='"Status" must be either 0 or 1!', code=RetCode.ARGUMENT_ERROR)

result = {}
has_error = False
for doc_id in doc_ids:
if not DocumentService.accessible(doc_id, current_user.id):
result[doc_id] = {"error": "No authorization."}
has_error = True
continue

try:
e, doc = DocumentService.get_by_id(doc_id)
if not e:
result[doc_id] = {"error": "No authorization."}
has_error = True
continue
e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
if not e:
result[doc_id] = {"error": "Can't find this dataset!"}
has_error = True
continue
current_status = str(doc.status)
if current_status == status:
result[doc_id] = {"status": status}
continue
if not DocumentService.update_by_id(doc_id, {"status": str(status)}):
result[doc_id] = {"error": "Database error (Document update)!"}
has_error = True
continue

status_int = int(status)
if getattr(doc, "chunk_num", 0) > 0:
try:
ok = settings.docStoreConn.update(
{"doc_id": doc_id},
{"available_int": status_int},
search.index_name(kb.tenant_id),
doc.kb_id,
)
except Exception as exc:
msg = str(exc)
if "3022" in msg:
result[doc_id] = {"error": "Document store table missing."}
else:
result[doc_id] = {"error": f"Document store update failed: {msg}"}
has_error = True
continue
if not ok:
result[doc_id] = {"error": "Database error (docStore update)!"}
has_error = True
continue
result[doc_id] = {"status": status}
except Exception as e:
result[doc_id] = {"error": f"Internal server error: {str(e)}"}
has_error = True

if has_error:
return get_json_result(data=result, message="Partial failure", code=RetCode.SERVER_ERROR)
return get_json_result(data=result)


@manager.route("/run", methods=["POST"]) # noqa: F821
@login_required
@validate_request("doc_ids", "run")
Expand Down
119 changes: 119 additions & 0 deletions api/apps/restful_apis/document_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,3 +1019,122 @@ async def update_metadata(tenant_id, dataset_id):
target_doc_ids = list(target_doc_ids)
updated = DocMetadataService.batch_update_metadata(dataset_id, target_doc_ids, updates, deletes)
return get_result(data={"updated": updated, "matched_docs": len(target_doc_ids)})


@manager.route("/datasets/<dataset_id>/documents/batch-update-status", methods=["POST"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
async def batch_update_document_status(tenant_id, dataset_id):
"""
Batch update status of documents within a dataset.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- in: path
name: dataset_id
type: string
required: true
description: ID of the dataset.
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
- in: body
name: body
description: Document status update parameters.
required: true
schema:
type: object
required:
- doc_ids
- status
properties:
doc_ids:
type: array
items:
type: string
description: List of document IDs to update.
status:
type: string
enum: ["0", "1"]
description: New status (0 = disabled, 1 = enabled).
responses:
200:
description: Document statuses updated successfully.
"""
from common import settings
from rag.nlp import search

req = await get_request_json()
doc_ids = req.get("doc_ids", [])
status = str(req.get("status", -1))

if status not in ["0", "1"]:
return get_error_argument_result(message=f'"Status" must be either 0 or 1:{status}!')

# Verify dataset ownership
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
return get_error_data_result(message="You don't own the dataset.")

e, kb = KnowledgebaseService.get_by_id(dataset_id)
if not e:
return get_error_data_result(message="Can't find this dataset!")

result = {}
has_error = False
for doc_id in doc_ids:
try:
e, doc = DocumentService.get_by_id(doc_id)
if not e:
result[doc_id] = {"error": "Document not found"}
has_error = True
continue

if doc.kb_id != dataset_id:
logging.warning(f"Document {doc.kb_id} not in dataset {dataset_id}")
result[doc_id] = {"error": "Document not found in this dataset."}
has_error = True
continue

current_status = str(doc.status)
if current_status == status:
result[doc_id] = {"status": status}
continue
if not DocumentService.update_by_id(doc_id, {"status": str(status)}):
result[doc_id] = {"error": "Database error (Document update)!"}
has_error = True
continue

status_int = int(status)
if getattr(doc, "chunk_num", 0) > 0:
try:
ok = settings.docStoreConn.update(
{"doc_id": doc_id},
{"available_int": status_int},
search.index_name(kb.tenant_id),
doc.kb_id,
)
except Exception as exc:
msg = str(exc)
if "3022" in msg:
result[doc_id] = {"error": "Document store table missing."}
else:
result[doc_id] = {"error": f"Document store update failed: {msg}"}
has_error = True
continue
if not ok:
result[doc_id] = {"error": "Database error (docStore update)!"}
has_error = True
continue
result[doc_id] = {"status": status}
except Exception as e:
result[doc_id] = {"error": f"Internal server error: {str(e)}"}
has_error = True
Comment thread
xugangqiang marked this conversation as resolved.

if has_error:
return get_json_result(data=result, message="Partial failure", code=RetCode.SERVER_ERROR)
return get_json_result(data=result)
Comment on lines +1138 to +1140
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Partial-failure response should not use SERVER_ERROR.

When has_error is due solely to client-side/per-document issues (e.g., Document not found, status mismatch), returning RetCode.SERVER_ERROR misrepresents the failure class and will trigger 5xx error handling / alerts on the client. Consider returning a client-side code (e.g., RetCode.ARGUMENT_ERROR or RetCode.DATA_ERROR), or distinguishing between "all failed", "partial failure", and "all succeeded" so callers can handle each.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@api/apps/restful_apis/document_api.py` around lines 930 - 932, The
partial-failure branch currently returns a server-side code
(RetCode.SERVER_ERROR) which misclassifies client/document errors; update the
logic in the block that checks has_error (where get_json_result is called) to
return an appropriate client-side code such as RetCode.ARGUMENT_ERROR or
RetCode.DATA_ERROR instead, or add finer distinctions (e.g., separate conditions
for "all failed" vs "partial failure" vs "all succeeded") so
get_json_result(data=result, message="Partial failure", code=...) uses a non-5xx
RetCode and callers can handle partial vs total failures correctly.

12 changes: 10 additions & 2 deletions test/testcases/test_web_api/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,8 +423,16 @@ def document_update_metadata_setting(auth, dataset_id, doc_id, payload=None, *,
return res.json()


def document_change_status(auth, payload=None, *, headers=HEADERS, data=None):
res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/change_status", headers=headers, auth=auth, json=payload, data=data)
def document_change_status(auth, dataset_id, payload=None, *, headers=HEADERS, data=None):
"""
Batch update document status within a dataset.

Args:
auth: Authentication credentials
dataset_id: ID of the dataset
payload: Request body containing doc_ids and status
"""
res = requests.post(url=f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/documents/batch-update-status", headers=headers, auth=auth, json=payload, data=data)
return res.json()


Expand Down
Loading
Loading