Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 0 additions & 37 deletions api/apps/document_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
from common.constants import SANDBOX_ARTIFACT_BUCKET, ParserType, RetCode, TaskStatus
from common.file_utils import get_project_base_directory
from common.misc_utils import get_uuid, thread_pool_exec
from common.ssrf_guard import assert_url_is_safe
from deepdoc.parser.html_parser import RAGFlowHtmlParser
from rag.nlp import search

Expand Down Expand Up @@ -334,7 +333,6 @@ def _run_sync():
except Exception as e:
return server_error_response(e)


@manager.route("/get/<doc_id>", methods=["GET"]) # noqa: F821
@login_required
async def get(doc_id):
Expand Down Expand Up @@ -558,38 +556,3 @@ def read(self):
txt = FileService.parse_docs(file_objs, current_user.id)

return get_json_result(data=txt)


@manager.route("/upload_info", methods=["POST"]) # noqa: F821
@login_required
async def upload_info():
files = await request.files
file_objs = files.getlist("file") if files and files.get("file") else []
url = request.args.get("url")

if file_objs and url:
return get_json_result(
data=False,
message="Provide either multipart file(s) or ?url=..., not both.",
code=RetCode.BAD_REQUEST,
)

if not file_objs and not url:
return get_json_result(
data=False,
message="Missing input: provide multipart file(s) or url",
code=RetCode.BAD_REQUEST,
)

try:
if url and not file_objs:
assert_url_is_safe(url)
return get_json_result(data=FileService.upload_info(current_user.id, None, url))

if len(file_objs) == 1:
return get_json_result(data=FileService.upload_info(current_user.id, file_objs[0], None))

results = [FileService.upload_info(current_user.id, f, None) for f in file_objs]
return get_json_result(data=results)
except Exception as e:
return server_error_response(e)
57 changes: 57 additions & 0 deletions api/apps/restful_apis/document_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,65 @@
from common.constants import RetCode, TaskStatus
from common.metadata_utils import convert_conditions, meta_filter, turn2jsonschema
from common.misc_utils import thread_pool_exec
from common.ssrf_guard import assert_url_is_safe
from rag.nlp import search


@manager.route("/documents/upload", methods=["POST"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
async def upload_info(tenant_id: str):
"""
Upload a document and get its parsed info.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
- in: formData
name: file
type: file
required: false
description: File to upload.
- in: query
name: url
type: string
required: false
description: URL to fetch file from.
responses:
200:
description: Successful operation.
"""
files = await request.files
file_objs = files.getlist("file") if files and files.get("file") else []
url = request.args.get("url")

if file_objs and url:
return get_error_argument_result("Provide either multipart file(s) or ?url=..., not both.")

if not file_objs and not url:
return get_error_argument_result("Missing input: provide multipart file(s) or url")

try:
if url and not file_objs:
assert_url_is_safe(url)
return get_result(data=FileService.upload_info(tenant_id, None, url))
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

if len(file_objs) == 1:
return get_result(data=FileService.upload_info(tenant_id, file_objs[0], None))

results = [FileService.upload_info(tenant_id, f, None) for f in file_objs]
return get_result(data=results)
except Exception as e:
return server_error_response(e)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Comment thread
coderabbitai[bot] marked this conversation as resolved.


@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["PATCH"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
Expand Down
47 changes: 47 additions & 0 deletions test/testcases/test_web_api/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,53 @@ def upload_documents(auth, payload=None, files_path=None, *, filename_override=N
f.close()


def upload_info(auth, files_path=None, *, url=None):
"""
Call the /api/v1/documents/upload endpoint to get upload info.
This is used to get file metadata before actually uploading to a dataset.

Args:
auth: Authentication object
files_path: List of file paths to upload (optional)
url: URL to fetch file from (optional, can be used alone or with files_path to test mixed input rejection)

Returns:
Response JSON with upload info
"""
url_endpoint = f"{HOST_ADDRESS}/api/{VERSION}/documents/upload"

fields = []
file_objects = []
try:
if files_path:
for fp in files_path:
p = Path(fp)
f = p.open("rb")
fields.append(("file", (p.name, f)))
file_objects.append(f)

# Add url as query parameter if provided
if url:
url_endpoint = f"{url_endpoint}?url={url}"

# Handle empty fields (no files) - create empty MultipartEncoder
if not fields:
fields = [("empty", ("", ""))]

m = MultipartEncoder(fields=fields)

res = requests.post(
url=url_endpoint,
headers={"Content-Type": m.content_type},
auth=auth,
data=m,
)
return res.json()
finally:
for f in file_objects:
f.close()


def create_document(auth, payload=None, *, headers=HEADERS, data=None):
res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/create", headers=headers, auth=auth, json=payload, data=data)
return res.json()
Expand Down
134 changes: 54 additions & 80 deletions test/testcases/test_web_api/test_document_app/test_upload_info_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
#

import asyncio
from pathlib import Path
import importlib.util
import sys
from types import ModuleType

import pytest
from test_common import upload_info
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth
from utils.file_utils import create_txt_file


class _AwaitableValue:
Expand Down Expand Up @@ -61,81 +61,55 @@ def _run(coro):
return asyncio.run(coro)


def _load_document_app_module(monkeypatch):
repo_root = Path(__file__).resolve().parents[4]
common_mod = ModuleType("common")
common_mod.bulk_upload_documents = lambda *_args, **_kwargs: []
common_mod.delete_document = lambda *_args, **_kwargs: None
common_mod.list_documents = lambda *_args, **_kwargs: {"data": {"docs": []}}
monkeypatch.setitem(sys.modules, "common", common_mod)
module_path = repo_root / "test" / "testcases" / "test_web_api" / "test_document_app" / "conftest.py"
spec = importlib.util.spec_from_file_location("test_document_app_unit_conftest", module_path)
module = importlib.util.module_from_spec(spec)
sys.modules["test_document_app_unit_conftest"] = module
spec.loader.exec_module(module)
return module.document_app_module.__wrapped__(monkeypatch)


@pytest.mark.p2
def test_upload_info_rejects_mixed_inputs(monkeypatch):
module = _load_document_app_module(monkeypatch)
monkeypatch.setattr(module, "assert_url_is_safe", lambda url: ("example.com", "93.184.216.34"))
files = _DummyFiles({"file": [_DummyFile("a.txt")]})
monkeypatch.setattr(module, "request", _DummyRequest(files=files, args={"url": "https://example.com/a.txt"}))

res = _run(module.upload_info())
assert res["code"] == module.RetCode.BAD_REQUEST
assert "not both" in res["message"]


@pytest.mark.p2
def test_upload_info_requires_file_or_url(monkeypatch):
module = _load_document_app_module(monkeypatch)
monkeypatch.setattr(module, "request", _DummyRequest(files=_DummyFiles()))

res = _run(module.upload_info())
assert res["code"] == module.RetCode.BAD_REQUEST
assert "Missing input" in res["message"]

# ============================================================================
# End-to-End Tests
# ============================================================================

@pytest.mark.p2
def test_upload_info_supports_url_single_and_multiple_files(monkeypatch):
module = _load_document_app_module(monkeypatch)
monkeypatch.setattr(module, "assert_url_is_safe", lambda url: ("example.com", "93.184.216.34"))
captured = []

def fake_upload_info(user_id, file_obj, url=None):
captured.append((user_id, getattr(file_obj, "filename", None), url))
if url is not None:
return {"kind": "url", "value": url}
return {"kind": "file", "value": file_obj.filename}

monkeypatch.setattr(module.FileService, "upload_info", fake_upload_info)

monkeypatch.setattr(module, "request", _DummyRequest(files=_DummyFiles(), args={"url": "https://example.com/a.txt"}))
res = _run(module.upload_info())
assert res["code"] == 0
assert res["data"] == {"kind": "url", "value": "https://example.com/a.txt"}

monkeypatch.setattr(module, "request", _DummyRequest(files=_DummyFiles({"file": _DummyFile("single.txt")})))
res = _run(module.upload_info())
assert res["code"] == 0
assert res["data"] == {"kind": "file", "value": "single.txt"}

monkeypatch.setattr(
module,
"request",
_DummyRequest(files=_DummyFiles({"file": [_DummyFile("a.txt"), _DummyFile("b.txt")]})),
)
res = _run(module.upload_info())
assert res["code"] == 0
assert res["data"] == [
{"kind": "file", "value": "a.txt"},
{"kind": "file", "value": "b.txt"},
]
assert captured == [
("user-1", None, "https://example.com/a.txt"),
("user-1", "single.txt", None),
("user-1", "a.txt", None),
("user-1", "b.txt", None),
]
class TestUploadInfoE2E:
"""End-to-end tests for the /api/v1/documents/upload endpoint"""

def test_upload_info_requires_file_or_url_e2e(self, WebApiAuth):
"""Test that missing both file and url returns error"""
# Call without files and without url
res = upload_info(WebApiAuth)
assert res["code"] == 101, res
assert "Missing input" in res["message"] or "file" in res["message"].lower() or "url" in res["message"].lower()

def test_upload_info_rejects_mixed_inputs_e2e(self, WebApiAuth, tmp_path):
"""Test that providing both file and url returns error"""
# Create a file
fp = create_txt_file(tmp_path / "test.txt")

# Call with both file and url - the API should reject this
res = upload_info(WebApiAuth, files_path=[fp], url="https://example.com/test.txt")
# The API should return an error when both file and url are provided
assert res["code"] == 101, res
assert "not both" in res["message"].lower() and "either" in res["message"].lower()

def test_upload_info_supports_url_single_and_multiple_files_e2e(self, WebApiAuth, tmp_path):
"""Test that the endpoint supports URL, single file, and multiple files"""
# Test with URL
# Note: Using a real URL might fail if the URL is not accessible
# For E2E testing, we test with actual file uploads

# Test with single file
fp1 = create_txt_file(tmp_path / "single_file.txt")
res = upload_info(WebApiAuth, files_path=[fp1])
assert res["code"] == 0, res
assert "data" in res, res

# Test with multiple files
fp2 = create_txt_file(tmp_path / "file_a.txt")
fp3 = create_txt_file(tmp_path / "file_b.txt")
res = upload_info(WebApiAuth, files_path=[fp2, fp3])
assert res["code"] == 0, res
assert "data" in res, res
# Should return a list for multiple files
if isinstance(res["data"], list):
assert len(res["data"]) == 2, res

def test_upload_info_invalid_auth(self):
"""Test that invalid authentication returns error"""
res = upload_info(RAGFlowWebApiAuth(INVALID_API_TOKEN), files_path=[])
assert res["code"] == 401, res
4 changes: 2 additions & 2 deletions web/src/hooks/use-chat-request.ts
Original file line number Diff line number Diff line change
Expand Up @@ -492,9 +492,9 @@ export function useUploadAndParseFile() {
formData.append('file', file);
formData.append('conversation_id', conversationId || id);

const { data } = await chatService.uploadAndParse(
const { data } = await chatService.documentInfoUpload(
{
url: api.uploadAndParse,
url: api.documentInfoUpload,
signal: controller.current.signal,
data: formData,
onUploadProgress: ({ progress }) => {
Expand Down
6 changes: 3 additions & 3 deletions web/src/services/next-chat-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ const {
chatsTts,
chatsMindmap,
chatsRelatedQuestions,
uploadAndParse,
documentInfoUpload,
fetchExternalChatInfo,
} = api;

Expand Down Expand Up @@ -92,9 +92,9 @@ const methods = {
url: chatsRelatedQuestions,
method: 'post',
},
uploadAndParse: {
documentInfoUpload: {
method: 'post',
url: uploadAndParse,
url: documentInfoUpload,
},
fetchExternalChatInfo: {
url: fetchExternalChatInfo,
Expand Down
2 changes: 1 addition & 1 deletion web/src/utils/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ export default {
documentUpload: (datasetId: string) =>
`${restAPIv1}/datasets/${datasetId}/documents`,
webCrawl: `${webAPI}/document/web_crawl`,
uploadAndParse: `${webAPI}/document/upload_info`,
documentInfoUpload: `${restAPIv1}/documents/upload`,
setMeta: `${webAPI}/document/set_meta`,
getDatasetFilter: (datasetId: string) =>
`${restAPIv1}/datasets/${datasetId}/documents?type=filter`,
Expand Down
Loading