diff --git a/example/http/chat_assistant_example.sh b/example/http/chat_assistant_example.sh new file mode 100644 index 00000000000..bcac93fadd2 --- /dev/null +++ b/example/http/chat_assistant_example.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Variables +HOST_ADDRESS="${RAGFLOW_HOST_ADDRESS:-http://localhost:9380}" +API_KEY="${RAGFLOW_API_KEY:-ragflow-IzZmY1MGVhYTBhMjExZWZiYTdjMDI0Mm}" + +# Check for jq +if ! command -v jq &> /dev/null; then + echo "jq could not be found, please install it to run this example." + exit 1 +fi + +# 1. Create a chat assistant +echo -e "\n-- Create a chat assistant" +CHAT_RESPONSE=$(curl -s --request POST \ + --url "${HOST_ADDRESS}/api/v1/chats" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data '{ + "name": "My Assistant", + "llm_id": "deepseek-chat" + }') +CHAT_ID=$(echo $CHAT_RESPONSE | jq -r '.data.id') +echo "Chat Assistant ID: ${CHAT_ID}" + +# 2. Create a session for the assistant +echo -e "\n-- Create a session" +SESSION_RESPONSE=$(curl -s --request POST \ + --url "${HOST_ADDRESS}/api/v1/chats/${CHAT_ID}/sessions" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data '{ + "name": "New Session" + }') +SESSION_ID=$(echo $SESSION_RESPONSE | jq -r '.data.id') +echo "Session ID: ${SESSION_ID}" + +# 3. Ask a question (Non-streaming) +echo -e "\n-- Ask a question (Non-streaming)" +curl -s --request POST \ + --url "${HOST_ADDRESS}/api/v1/chats/${CHAT_ID}/completions" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data "{ + \"question\": \"What is RAGFlow?\", + \"stream\": false, + \"session_id\": \"${SESSION_ID}\" + }" | jq . + +# 4. Ask a question (Streaming) +echo -e "\n-- Ask a question (Streaming)" +# Note: Streaming output will be raw SSE data +curl -N -s --request POST \ + --url "${HOST_ADDRESS}/api/v1/chats/${CHAT_ID}/completions" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data "{ + \"question\": \"Tell me more.\", + \"stream\": true, + \"session_id\": \"${SESSION_ID}\" + }" + +# 5. List sessions +echo -e "\n-- List sessions" +curl -s --request GET \ + --url "${HOST_ADDRESS}/api/v1/chats/${CHAT_ID}/sessions" \ + --header "Authorization: Bearer ${API_KEY}" | jq . + +# 6. Delete sessions +echo -e "\n-- Delete sessions" +curl -s --request DELETE \ + --url "${HOST_ADDRESS}/api/v1/chats/${CHAT_ID}/sessions" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data "{ + \"ids\": [\"${SESSION_ID}\"] + }" | jq . + +# Cleanup +echo -e "\n-- Deleting chat assistant" +curl -s --request DELETE \ + --url "${HOST_ADDRESS}/api/v1/chats" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data "{\"ids\": [\"${CHAT_ID}\"]}" | jq . diff --git a/example/http/chunk_example.sh b/example/http/chunk_example.sh new file mode 100644 index 00000000000..98bbde81f39 --- /dev/null +++ b/example/http/chunk_example.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Variables +HOST_ADDRESS="${RAGFLOW_HOST_ADDRESS:-http://localhost:9380}" +API_KEY="${RAGFLOW_API_KEY:-ragflow-IzZmY1MGVhYTBhMjExZWZiYTdjMDI0Mm}" + +# Check for jq +if ! command -v jq &> /dev/null; then + echo "jq could not be found, please install it to run this example." + exit 1 +fi + +# 0. Setup: Create a dataset and upload a document to get IDs +echo -e "\n-- Creating a dataset" +DATASET_ID=$(curl -s --request POST \ + --url "${HOST_ADDRESS}/api/v1/datasets" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data '{"name": "chunk_shell_example"}' | jq -r '.data.id') +echo "Dataset ID: ${DATASET_ID}" + +echo -e "\n-- Uploading a document" +DOC_ID=$(curl -s --request POST \ + --url "${HOST_ADDRESS}/api/v1/datasets/${DATASET_ID}/documents" \ + --header "Authorization: Bearer ${API_KEY}" \ + --form 'file=@sample.txt;type=text/plain' \ + --form 'display_name=sample.txt' | jq -r '.data[0].id') +echo "Document ID: ${DOC_ID}" + +# 1. Add a chunk to a document +echo -e "\n-- Add a chunk to a document" +CHUNK_ID=$(curl -s --request POST \ + --url "${HOST_ADDRESS}/api/v1/datasets/${DATASET_ID}/documents/${DOC_ID}/chunks" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data '{ + "content": "RAGFlow is an open-source RAG engine.", + "important_keywords": ["RAGFlow", "open-source"] + }' | jq -r '.data.chunk.id') +echo "Chunk ID: ${CHUNK_ID}" + +# 2. List chunks of a document +echo -e "\n-- List chunks of a document" +curl -s --request GET \ + --url "${HOST_ADDRESS}/api/v1/datasets/${DATASET_ID}/documents/${DOC_ID}/chunks?page=1&page_size=10" \ + --header "Authorization: Bearer ${API_KEY}" | jq . + +# 3. Update a chunk +echo -e "\n-- Update a chunk" +curl -s --request PUT \ + --url "${HOST_ADDRESS}/api/v1/datasets/${DATASET_ID}/documents/${DOC_ID}/chunks/${CHUNK_ID}" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data '{ + "content": "RAGFlow is a powerful open-source RAG engine." + }' | jq . + +# 4. Delete chunks +echo -e "\n-- Delete chunks" +curl -s --request DELETE \ + --url "${HOST_ADDRESS}/api/v1/datasets/${DATASET_ID}/documents/${DOC_ID}/chunks" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data "{ + \"chunk_ids\": [\"${CHUNK_ID}\"] + }" | jq . + +# Cleanup +echo -e "\n-- Cleaning up dataset" +curl -s --request DELETE \ + --url "${HOST_ADDRESS}/api/v1/datasets" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data "{\"ids\": [\"${DATASET_ID}\"]}" | jq . diff --git a/example/http/retrieval_example.sh b/example/http/retrieval_example.sh new file mode 100644 index 00000000000..e8ad435dd89 --- /dev/null +++ b/example/http/retrieval_example.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Variables +HOST_ADDRESS="${RAGFLOW_HOST_ADDRESS:-http://localhost:9380}" +API_KEY="${RAGFLOW_API_KEY:-ragflow-IzZmY1MGVhYTBhMjExZWZiYTdjMDI0Mm}" + +# Check for jq +if ! command -v jq &> /dev/null; then + echo "jq could not be found, please install it to run this example." + exit 1 +fi + +# 0. Setup: Create a dataset to retrieve from +echo -e "\n-- Creating a dataset" +DATASET_ID=$(curl -s --request POST \ + --url "${HOST_ADDRESS}/api/v1/datasets" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data '{"name": "retrieval_shell_example"}' | jq -r '.data.id') +echo "Dataset ID: ${DATASET_ID}" + +# 1. Perform semantic retrieval from a dataset +echo -e "\n-- Perform semantic retrieval" +curl -s --request POST \ + --url "${HOST_ADDRESS}/api/v1/retrieval" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data "{ + \"dataset_ids\": [\"${DATASET_ID}\"], + \"question\": \"What is RAGFlow?\", + \"page\": 1, + \"page_size\": 5, + \"similarity_threshold\": 0.2, + \"vector_similarity_weight\": 0.3, + \"top_k\": 1024 + }" | jq . + +# 2. Perform retrieval with keyword search enabled +echo -e "\n-- Perform retrieval with keyword search" +curl -s --request POST \ + --url "${HOST_ADDRESS}/api/v1/retrieval" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data "{ + \"dataset_ids\": [\"${DATASET_ID}\"], + \"question\": \"workflow features\", + \"keyword\": true, + \"top_k\": 10 + }" | jq . + +# Cleanup +echo -e "\n-- Cleaning up dataset" +curl -s --request DELETE \ + --url "${HOST_ADDRESS}/api/v1/datasets" \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer ${API_KEY}" \ + --data "{\"ids\": [\"${DATASET_ID}\"]}" | jq . diff --git a/example/sdk/chat_assistant_example.py b/example/sdk/chat_assistant_example.py new file mode 100644 index 00000000000..6c2e38f5347 --- /dev/null +++ b/example/sdk/chat_assistant_example.py @@ -0,0 +1,93 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +The example demonstrates how to create a chat assistant, manage sessions, +and perform both standard and streaming chat. +""" + +from ragflow_sdk import RAGFlow +import sys +import os + +HOST_ADDRESS = os.environ.get("RAGFLOW_HOST_ADDRESS", "http://127.0.0.1") +API_KEY = os.environ.get("RAGFLOW_API_KEY", "ragflow-IzZmY1MGVhYTBhMjExZWZiYTdjMDI0Mm") + +try: + rag = RAGFlow(api_key=API_KEY, base_url=HOST_ADDRESS) + + # 1. Create a dataset to be used by the assistant + print("Creating dataset...") + dataset = rag.create_dataset(name="assistant_example_dataset") + + # 2. Create a chat assistant + print("Creating chat assistant...") + assistant = rag.create_chat( + name="Test Assistant", + dataset_ids=[dataset.id], + llm_id="deepseek-chat", # Example LLM ID, replace with your actual model ID + prompt_config={"system": "You are a helpful assistant."} + ) + print(f"Assistant created: {assistant.name} (ID: {assistant.id})") + + # 3. Create a session + print("Creating a new session...") + session = assistant.create_session(name="Example Session") + print(f"Session created: {session.name} (ID: {session.id})") + + # 4. Standard chat (non-streaming) + print("\n--- Standard Chat ---") + question = "What is RAGFlow?" + print(f"User: {question}") + + # ask returns a generator of Message objects + # for stream=False, it yields once with the full answer + for message in session.ask(question=question, stream=False): + print(f"Assistant: {message.content}") + if hasattr(message, 'reference') and message.reference: + print(f"References used: {len(message.reference)} chunks") + + # 5. Streaming chat + print("\n--- Streaming Chat ---") + question = "Tell me more about its features." + print(f"User: {question}") + print("Assistant: ", end="", flush=True) + + for message in session.ask(question=question, stream=True): + # In streaming mode, each message.content usually contains the incremental part + # or the full content so far depending on the SDK implementation. + # Based on RAGFlow SDK, it typically yields incremental parts. + print(message.content, end="", flush=True) + print("\n") + + # 6. List sessions + print("Listing sessions for this assistant...") + sessions = assistant.list_sessions(page=1, page_size=10) + for s in sessions: + print(f"- {s.name} (ID: {s.id})") + + # Cleanup + print("\nCleaning up...") + assistant.delete_sessions(ids=[session.id]) + rag.delete_chats(ids=[assistant.id]) + rag.delete_datasets(ids=[dataset.id]) + + print("Chat assistant example done.") + sys.exit(0) + +except Exception as e: + print(f"An error occurred: {e}") + sys.exit(-1) diff --git a/example/sdk/chunk_example.py b/example/sdk/chunk_example.py new file mode 100644 index 00000000000..aed2d9b2358 --- /dev/null +++ b/example/sdk/chunk_example.py @@ -0,0 +1,92 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +The example demonstrates chunk management (Add, List, Update, Delete, Retrieve) +within a RAGFlow dataset using the Python SDK. +""" + +from ragflow_sdk import RAGFlow +import sys +import time +import os + +HOST_ADDRESS = os.environ.get("RAGFLOW_HOST_ADDRESS", "http://127.0.0.1") +API_KEY = os.environ.get("RAGFLOW_API_KEY", "ragflow-IzZmY1MGVhYTBhMjExZWZiYTdjMDI0Mm") + +try: + rag = RAGFlow(api_key=API_KEY, base_url=HOST_ADDRESS) + + # 1. Create a dataset + print("Creating dataset...") + dataset = rag.create_dataset(name="chunk_example_dataset") + + # 2. Upload a document + print("Uploading document...") + # Using a simple text content for example + content = "RAGFlow is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding." + docs = dataset.upload_documents([{"display_name": "sample.txt", "blob": content.encode('utf-8')}]) + doc = docs[0] + + # 3. Parse the document (required before manual chunk operations if you want it to be processed) + print("Parsing document...") + dataset.async_parse_documents([doc.id]) + + # Wait for parsing to complete with timeout + MAX_WAIT = 120 # seconds + elapsed = 0 + while elapsed < MAX_WAIT: + doc_status = dataset.list_documents(id=doc.id)[0] + if doc_status.run == "1" and doc_status.progress >= 1.0: + print("Parsing completed.") + break + print(f"Parsing progress: {doc_status.progress:.2f}") + time.sleep(2) + elapsed += 2 + else: + print("Parsing timed out.") + sys.exit(-1) + + # 4. Add a manual chunk + print("Adding a manual chunk...") + chunk = doc.add_chunk(content="RAGFlow features a streamlined RAG workflow.") + print(f"Added chunk ID: {chunk.id}") + + # 5. List chunks + print("Listing chunks...") + chunks = doc.list_chunks(page=1, page_size=10) + print(f"Total chunks found: {len(chunks)}") + for i, c in enumerate(chunks): + print(f"Chunk {i}: {c.content[:50]}...") + + # 6. Update a chunk + print("Updating chunk...") + chunk.update({"content": "RAGFlow features a streamlined and powerful RAG workflow."}) + + # 7. Delete the chunk + print("Deleting chunk...") + doc.delete_chunks([chunk.id]) + + # Cleanup + print("Cleaning up dataset...") + rag.delete_datasets(ids=[dataset.id]) + + print("Chunk example done.") + sys.exit(0) + +except Exception as e: + print(f"An error occurred: {e}") + sys.exit(-1) diff --git a/example/sdk/retrieval_example.py b/example/sdk/retrieval_example.py new file mode 100644 index 00000000000..70afa776c4a --- /dev/null +++ b/example/sdk/retrieval_example.py @@ -0,0 +1,100 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +The example demonstrates the RAG retrieval flow using the Python SDK. +It shows how to perform semantic search across one or more datasets. +""" + +from ragflow_sdk import RAGFlow +import sys +import time +import os + +HOST_ADDRESS = os.environ.get("RAGFLOW_HOST_ADDRESS", "http://127.0.0.1") +API_KEY = os.environ.get("RAGFLOW_API_KEY", "ragflow-IzZmY1MGVhYTBhMjExZWZiYTdjMDI0Mm") + +try: + rag = RAGFlow(api_key=API_KEY, base_url=HOST_ADDRESS) + + # 1. Create a dataset + print("Creating dataset...") + dataset = rag.create_dataset(name="retrieval_example_dataset") + + # 2. Upload and parse a document to have content for retrieval + print("Uploading and parsing document...") + content = "RAGFlow is an open-source RAG engine based on deep document understanding. It features a streamlined RAG workflow for businesses of any size." + docs = dataset.upload_documents([{"display_name": "ragflow_info.txt", "blob": content.encode('utf-8')}]) + doc = docs[0] + + # Wait for parsing to complete with timeout + print("Parsing document...") + dataset.async_parse_documents([doc.id]) + MAX_WAIT = 120 # seconds + elapsed = 0 + while elapsed < MAX_WAIT: + doc_status = dataset.list_documents(id=doc.id)[0] + if doc_status.run == "1" and doc_status.progress >= 1.0: + break + print(f"Parsing progress: {doc_status.progress:.2f}") + time.sleep(2) + elapsed += 2 + else: + print("Parsing timed out.") + sys.exit(-1) + print("Document parsed and ready for retrieval.") + + # 3. Perform retrieval (Semantic Search) + print("\n--- Performing Retrieval ---") + question = "What is RAGFlow?" + print(f"Question: {question}") + + # Retrieve relevant chunks from one or more datasets + chunks = rag.retrieve( + dataset_ids=[dataset.id], + question=question, + top_k=5, + similarity_threshold=0.1 + ) + + print(f"Found {len(chunks)} relevant chunks:") + for i, chunk in enumerate(chunks): + print(f"\nChunk {i+1}:") + print(f"Content: {chunk.content[:200]}...") + print(f"Similarity Score: {chunk.similarity:.4f}") + print(f"Source Document: {chunk.document_name}") + + # 4. Perform retrieval with additional parameters + print("\n--- Performing Retrieval with Keyword Search ---") + chunks = rag.retrieve( + dataset_ids=[dataset.id], + question="workflow for businesses", + top_k=3, + keyword=True # Enable keyword search in addition to semantic search + ) + for i, chunk in enumerate(chunks): + print(f"Chunk {i+1}: {chunk.content[:100]}... (Score: {chunk.similarity:.4f})") + + # Cleanup + print("\nCleaning up...") + rag.delete_datasets(ids=[dataset.id]) + + print("Retrieval example done.") + sys.exit(0) + +except Exception as e: + print(f"An error occurred: {e}") + sys.exit(-1)