diff --git a/cmd/api.go b/cmd/api.go
index 86f3ddb..a9443b1 100644
--- a/cmd/api.go
+++ b/cmd/api.go
@@ -2,6 +2,7 @@ package cmd
import (
"context"
+ _ "embed"
"encoding/json"
"fmt"
"net/http"
@@ -25,6 +26,9 @@ import (
"github.com/spf13/viper"
)
+//go:embed openapi.yaml
+var openapiSpec []byte
+
var apiCmd = &cobra.Command{
Use: "api",
Short: "Start the Distill API server (standalone, no vector DB required)",
@@ -274,6 +278,8 @@ func runAPI(cmd *cobra.Command, args []string) error {
mux.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) {
m.Handler().ServeHTTP(w, r)
})
+ mux.HandleFunc("/openapi.yaml", server.handleOpenAPISpec)
+ mux.HandleFunc("/docs", server.handleDocs)
mux.HandleFunc("/", server.handleRoot)
// CORS middleware
@@ -349,17 +355,53 @@ func (s *APIServer) handleRoot(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]interface{}{
"name": "Distill API",
- "version": "1.0.0",
- "docs": "https://distill.siddhantkhare.com/docs",
+ "version": "0.9.0",
+ "docs": "/docs",
+ "openapi": "/openapi.yaml",
"endpoints": map[string]string{
"dedupe": "POST /v1/dedupe",
"dedupe_stream": "POST /v1/dedupe/stream",
+ "pipeline": "POST /v1/pipeline",
+ "memory_store": "POST /v1/memory/store",
+ "memory_recall": "POST /v1/memory/recall",
"health": "GET /health",
"metrics": "GET /metrics",
},
})
}
+func (s *APIServer) handleOpenAPISpec(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/yaml")
+ w.Header().Set("Access-Control-Allow-Origin", "*")
+ _, _ = w.Write(openapiSpec)
+}
+
+func (s *APIServer) handleDocs(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ _, _ = w.Write([]byte(`
+
+
+ Distill API Docs
+
+
+
+
+
+
+
+
+
+`))
+}
+
func (s *APIServer) handleDedupe(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
diff --git a/cmd/openapi.yaml b/cmd/openapi.yaml
new file mode 100644
index 0000000..d41f7d8
--- /dev/null
+++ b/cmd/openapi.yaml
@@ -0,0 +1,928 @@
+openapi: "3.1.0"
+info:
+ title: Distill API
+ version: 0.9.0
+ description: |
+ Context intelligence layer for LLM agents. Distill deduplicates, compresses,
+ and caches context before it reaches the model, and provides persistent memory
+ with sensitivity tagging and conflict detection.
+ license:
+ name: MIT
+ url: https://github.com/Siddhant-K-code/distill/blob/main/LICENSE
+
+servers:
+ - url: http://localhost:8080
+ description: Local development server
+
+tags:
+ - name: Dedupe
+ description: Semantic deduplication of context chunks
+ - name: Pipeline
+ description: Full dedup + compress + cache pipeline
+ - name: Batch
+ description: Async batch processing
+ - name: Memory
+ description: Persistent context memory store
+ - name: Session
+ description: Stateful context window management
+ - name: Health
+ description: Server health and metrics
+
+paths:
+ /v1/dedupe:
+ post:
+ tags: [Dedupe]
+ summary: Deduplicate chunks
+ description: |
+ Clusters semantically similar chunks and returns one representative per cluster.
+ Supports MMR re-ranking for relevance + diversity balance.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/DedupeRequest"
+ responses:
+ "200":
+ description: Deduplicated chunks
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/DedupeResponse"
+ "400":
+ description: Invalid request
+
+ /v1/dedupe/stream:
+ post:
+ tags: [Dedupe]
+ summary: Deduplicate chunks (SSE stream)
+ description: |
+ Same as `/v1/dedupe` but returns results as Server-Sent Events with
+ per-stage progress updates.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/DedupeRequest"
+ responses:
+ "200":
+ description: SSE stream of dedup progress and results
+ content:
+ text/event-stream: {}
+
+ /v1/pipeline:
+ post:
+ tags: [Pipeline]
+ summary: Run full pipeline
+ description: |
+ Runs the complete dedup → compress → summarize → cache pipeline.
+ Returns processed chunks with per-stage statistics.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/PipelineRequest"
+ responses:
+ "200":
+ description: Pipeline results
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/PipelineResponse"
+ "400":
+ description: Invalid request
+
+ /v1/batch:
+ post:
+ tags: [Batch]
+ summary: Submit batch job
+ description: Submit a batch of chunks for async processing.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/BatchSubmitRequest"
+ responses:
+ "202":
+ description: Job accepted
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/BatchSubmitResponse"
+
+ /v1/batch/{job_id}:
+ get:
+ tags: [Batch]
+ summary: Get batch job status
+ parameters:
+ - name: job_id
+ in: path
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: Job status
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/BatchStatusResponse"
+ "404":
+ description: Job not found
+
+ /v1/batch/{job_id}/results:
+ get:
+ tags: [Batch]
+ summary: Get batch job results
+ parameters:
+ - name: job_id
+ in: path
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: Job results
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/BatchResultsResponse"
+ "404":
+ description: Job not found
+
+ /v1/memory/store:
+ post:
+ tags: [Memory]
+ summary: Store memories
+ description: |
+ Store one or more memory entries with write-time deduplication.
+ Supports sensitivity tagging (explicit or auto-classified) and
+ conflict detection against existing entries.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/StoreRequest"
+ responses:
+ "200":
+ description: Store result with conflict information
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/StoreResult"
+
+ /v1/memory/recall:
+ post:
+ tags: [Memory]
+ summary: Recall memories
+ description: |
+ Retrieve memories ranked by relevance and recency. Supports tag boosting,
+ task context matching, and minimum relevance filtering.
+ Expired entries are excluded by default.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/RecallRequest"
+ responses:
+ "200":
+ description: Recalled memories with sensitivity metadata
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/RecallResult"
+
+ /v1/memory/forget:
+ post:
+ tags: [Memory]
+ summary: Forget memories
+ description: Permanently remove memories by ID, tag, or age.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ForgetRequest"
+ responses:
+ "200":
+ description: Forget result
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ForgetResult"
+
+ /v1/memory/expire:
+ post:
+ tags: [Memory]
+ summary: Expire memories
+ description: |
+ Mark memories as expired. Expired entries are excluded from recall
+ by default but remain in the store for auditing.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ExpireRequest"
+ responses:
+ "200":
+ description: Expire result
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ExpireResult"
+
+ /v1/memory/supersede:
+ post:
+ tags: [Memory]
+ summary: Supersede a memory
+ description: |
+ Mark a memory as superseded by a newer entry. The old entry is expired
+ and a forward pointer to the replacement is stored.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SupersedeRequest"
+ responses:
+ "200":
+ description: Supersede result
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SupersedeResult"
+ "404":
+ description: Old entry not found
+ "409":
+ description: Old entry already expired
+
+ /v1/memory/stats:
+ get:
+ tags: [Memory]
+ summary: Memory store statistics
+ responses:
+ "200":
+ description: Store statistics
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/MemoryStats"
+
+ /v1/session/create:
+ post:
+ tags: [Session]
+ summary: Create a session
+ description: Create a new context window session with a token budget.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SessionCreateRequest"
+ responses:
+ "200":
+ description: Created session
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Session"
+ "409":
+ description: Session already exists
+
+ /v1/session/push:
+ post:
+ tags: [Session]
+ summary: Push entries to a session
+ description: |
+ Add context entries to a session. Distill deduplicates and compresses
+ to stay within the token budget.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SessionPushRequest"
+ responses:
+ "200":
+ description: Push result
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SessionPushResult"
+ "404":
+ description: Session not found
+ "413":
+ description: Over token budget
+
+ /v1/session/context:
+ post:
+ tags: [Session]
+ summary: Get session context
+ description: Retrieve the current context window for a session.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SessionContextRequest"
+ responses:
+ "200":
+ description: Session context
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SessionContextResult"
+ "404":
+ description: Session not found
+
+ /v1/session/get:
+ get:
+ tags: [Session]
+ summary: Get session metadata
+ parameters:
+ - name: session_id
+ in: query
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: Session metadata
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Session"
+ "404":
+ description: Session not found
+
+ /v1/session/delete:
+ post:
+ tags: [Session]
+ summary: Delete a session
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required: [session_id]
+ properties:
+ session_id:
+ type: string
+ responses:
+ "200":
+ description: Delete result
+ "404":
+ description: Session not found
+
+ /health:
+ get:
+ tags: [Health]
+ summary: Health check
+ responses:
+ "200":
+ description: Server is healthy
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ status:
+ type: string
+ example: ok
+
+ /metrics:
+ get:
+ tags: [Health]
+ summary: Prometheus metrics
+ responses:
+ "200":
+ description: Prometheus-formatted metrics
+ content:
+ text/plain: {}
+
+components:
+ schemas:
+ DedupeChunk:
+ type: object
+ required: [text]
+ properties:
+ id:
+ type: string
+ text:
+ type: string
+ embedding:
+ type: array
+ items:
+ type: number
+ format: float
+ score:
+ type: number
+ format: float
+ cache_control:
+ type: string
+ description: Anthropic cache_control marker
+
+ DedupeRequest:
+ type: object
+ required: [chunks]
+ properties:
+ chunks:
+ type: array
+ items:
+ $ref: "#/components/schemas/DedupeChunk"
+ threshold:
+ type: number
+ format: double
+ description: Cosine distance threshold for clustering
+ lambda:
+ type: number
+ format: double
+ description: MMR lambda (0=diversity, 1=relevance)
+ target_k:
+ type: integer
+ description: Target number of output chunks
+ options:
+ type: object
+ properties:
+ preserve_cache_prefix:
+ type: boolean
+ description: Freeze chunks before the last cache_control marker
+
+ DedupeResponse:
+ type: object
+ properties:
+ chunks:
+ type: array
+ items:
+ type: object
+ properties:
+ id:
+ type: string
+ text:
+ type: string
+ score:
+ type: number
+ format: float
+ cluster_id:
+ type: integer
+ cache_control:
+ type: string
+ stats:
+ type: object
+ properties:
+ input_chunks:
+ type: integer
+ output_chunks:
+ type: integer
+ reduction_pct:
+ type: number
+ clusters:
+ type: integer
+ latency_ms:
+ type: number
+
+ PipelineRequest:
+ type: object
+ required: [chunks]
+ properties:
+ chunks:
+ type: array
+ items:
+ $ref: "#/components/schemas/DedupeChunk"
+ options:
+ type: object
+ properties:
+ dedup:
+ type: boolean
+ compress:
+ type: boolean
+ summarize:
+ type: boolean
+ cache:
+ type: boolean
+
+ PipelineResponse:
+ type: object
+ properties:
+ chunks:
+ type: array
+ items:
+ $ref: "#/components/schemas/DedupeChunk"
+ stats:
+ type: object
+ properties:
+ total_input_tokens:
+ type: integer
+ total_output_tokens:
+ type: integer
+ total_reduction:
+ type: number
+ total_latency_ms:
+ type: number
+ stages:
+ type: object
+ additionalProperties:
+ type: object
+ properties:
+ enabled:
+ type: boolean
+ input_tokens:
+ type: integer
+ output_tokens:
+ type: integer
+ reduction:
+ type: number
+ latency_ms:
+ type: number
+
+ BatchSubmitRequest:
+ type: object
+ required: [chunks]
+ properties:
+ chunks:
+ type: array
+ items:
+ $ref: "#/components/schemas/DedupeChunk"
+ options:
+ $ref: "#/components/schemas/PipelineRequest/properties/options"
+
+ BatchSubmitResponse:
+ type: object
+ properties:
+ job_id:
+ type: string
+ status:
+ type: string
+
+ BatchStatusResponse:
+ type: object
+ properties:
+ job_id:
+ type: string
+ status:
+ type: string
+ enum: [pending, running, completed, failed]
+ progress:
+ type: number
+ error:
+ type: string
+ created_at:
+ type: string
+ started_at:
+ type: string
+ completed_at:
+ type: string
+
+ BatchResultsResponse:
+ type: object
+ properties:
+ job_id:
+ type: string
+ status:
+ type: string
+ chunks:
+ type: array
+ items:
+ $ref: "#/components/schemas/DedupeChunk"
+ stats:
+ $ref: "#/components/schemas/PipelineResponse/properties/stats"
+
+ StoreRequest:
+ type: object
+ required: [entries]
+ properties:
+ session_id:
+ type: string
+ entries:
+ type: array
+ items:
+ type: object
+ required: [text]
+ properties:
+ text:
+ type: string
+ embedding:
+ type: array
+ items:
+ type: number
+ format: float
+ source:
+ type: string
+ tags:
+ type: array
+ items:
+ type: string
+ metadata:
+ type: object
+ additionalProperties: true
+ expires_at:
+ type: string
+ format: date-time
+ sensitivity:
+ type: integer
+ description: "0=none, 1=pii, 2=internal, 3=credentials"
+ auto_classify:
+ type: boolean
+ description: Run pattern-based sensitivity classification
+
+ StoreResult:
+ type: object
+ properties:
+ stored:
+ type: integer
+ merged:
+ type: integer
+ deduplicated:
+ type: integer
+ total_memories:
+ type: integer
+ conflicts:
+ type: array
+ items:
+ $ref: "#/components/schemas/Conflict"
+
+ Conflict:
+ type: object
+ properties:
+ new_id:
+ type: string
+ new_text:
+ type: string
+ existing_id:
+ type: string
+ existing_text:
+ type: string
+ distance:
+ type: number
+ format: double
+
+ RecallRequest:
+ type: object
+ required: [query]
+ properties:
+ query:
+ type: string
+ query_embedding:
+ type: array
+ items:
+ type: number
+ format: float
+ tags:
+ type: array
+ items:
+ type: string
+ max_tokens:
+ type: integer
+ max_results:
+ type: integer
+ recency_weight:
+ type: number
+ format: double
+ description: Weight for recency vs relevance (0-1)
+ include_expired:
+ type: boolean
+ task_context:
+ type: string
+ description: Task description for source-matching boost
+ boost_tags:
+ type: array
+ items:
+ type: string
+ description: Tags that receive a relevance boost
+ min_relevance:
+ type: number
+ format: double
+ description: Filter out memories below this score (0-1)
+
+ RecallResult:
+ type: object
+ properties:
+ memories:
+ type: array
+ items:
+ type: object
+ properties:
+ id:
+ type: string
+ text:
+ type: string
+ source:
+ type: string
+ tags:
+ type: array
+ items:
+ type: string
+ relevance:
+ type: number
+ format: double
+ decay_level:
+ type: integer
+ sensitivity:
+ type: integer
+ last_referenced:
+ type: string
+ format: date-time
+ stats:
+ type: object
+ properties:
+ candidates:
+ type: integer
+ deduplicated:
+ type: integer
+ returned:
+ type: integer
+ token_count:
+ type: integer
+ max_sensitivity:
+ type: integer
+ description: Highest sensitivity level across returned memories
+ sensitive_chunks:
+ type: array
+ items:
+ type: object
+ properties:
+ chunk_id:
+ type: string
+ sensitivity:
+ type: integer
+
+ ForgetRequest:
+ type: object
+ properties:
+ ids:
+ type: array
+ items:
+ type: string
+ tags:
+ type: array
+ items:
+ type: string
+ before:
+ type: string
+ format: date-time
+
+ ForgetResult:
+ type: object
+ properties:
+ forgotten:
+ type: integer
+
+ ExpireRequest:
+ type: object
+ required: [ids]
+ properties:
+ ids:
+ type: array
+ items:
+ type: string
+
+ ExpireResult:
+ type: object
+ properties:
+ expired:
+ type: integer
+
+ SupersedeRequest:
+ type: object
+ required: [old_id]
+ properties:
+ old_id:
+ type: string
+ new_id:
+ type: string
+
+ SupersedeResult:
+ type: object
+ properties:
+ superseded:
+ type: boolean
+
+ MemoryStats:
+ type: object
+ properties:
+ total_memories:
+ type: integer
+ expired_count:
+ type: integer
+ active_count:
+ type: integer
+ by_decay_level:
+ type: object
+ additionalProperties:
+ type: integer
+ by_source:
+ type: object
+ additionalProperties:
+ type: integer
+ oldest_memory:
+ type: string
+ format: date-time
+ newest_memory:
+ type: string
+ format: date-time
+
+ SessionCreateRequest:
+ type: object
+ required: [max_tokens]
+ properties:
+ session_id:
+ type: string
+ description: Auto-generated if empty
+ max_tokens:
+ type: integer
+ dedup_threshold:
+ type: number
+ format: double
+ preserve_recent:
+ type: integer
+ description: Always keep last N entries at full fidelity
+
+ Session:
+ type: object
+ properties:
+ id:
+ type: string
+ max_tokens:
+ type: integer
+ used_tokens:
+ type: integer
+ entry_count:
+ type: integer
+ created_at:
+ type: string
+ format: date-time
+
+ SessionPushRequest:
+ type: object
+ required: [session_id, entries]
+ properties:
+ session_id:
+ type: string
+ entries:
+ type: array
+ items:
+ type: object
+ required: [role, content]
+ properties:
+ role:
+ type: string
+ content:
+ type: string
+ embedding:
+ type: array
+ items:
+ type: number
+ format: float
+
+ SessionPushResult:
+ type: object
+ properties:
+ added:
+ type: integer
+ deduplicated:
+ type: integer
+ compressed:
+ type: integer
+ tokens_used:
+ type: integer
+ tokens_remaining:
+ type: integer
+
+ SessionContextRequest:
+ type: object
+ required: [session_id]
+ properties:
+ session_id:
+ type: string
+ max_tokens:
+ type: integer
+ description: "0 = return full window"
+ role:
+ type: string
+ description: Filter by role
+
+ SessionContextResult:
+ type: object
+ properties:
+ entries:
+ type: array
+ items:
+ type: object
+ properties:
+ role:
+ type: string
+ content:
+ type: string
+ compression_level:
+ type: string
+ tokens:
+ type: integer
+ total_tokens:
+ type: integer
diff --git a/openapi.yaml b/openapi.yaml
new file mode 100644
index 0000000..d41f7d8
--- /dev/null
+++ b/openapi.yaml
@@ -0,0 +1,928 @@
+openapi: "3.1.0"
+info:
+ title: Distill API
+ version: 0.9.0
+ description: |
+ Context intelligence layer for LLM agents. Distill deduplicates, compresses,
+ and caches context before it reaches the model, and provides persistent memory
+ with sensitivity tagging and conflict detection.
+ license:
+ name: MIT
+ url: https://github.com/Siddhant-K-code/distill/blob/main/LICENSE
+
+servers:
+ - url: http://localhost:8080
+ description: Local development server
+
+tags:
+ - name: Dedupe
+ description: Semantic deduplication of context chunks
+ - name: Pipeline
+ description: Full dedup + compress + cache pipeline
+ - name: Batch
+ description: Async batch processing
+ - name: Memory
+ description: Persistent context memory store
+ - name: Session
+ description: Stateful context window management
+ - name: Health
+ description: Server health and metrics
+
+paths:
+ /v1/dedupe:
+ post:
+ tags: [Dedupe]
+ summary: Deduplicate chunks
+ description: |
+ Clusters semantically similar chunks and returns one representative per cluster.
+ Supports MMR re-ranking for relevance + diversity balance.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/DedupeRequest"
+ responses:
+ "200":
+ description: Deduplicated chunks
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/DedupeResponse"
+ "400":
+ description: Invalid request
+
+ /v1/dedupe/stream:
+ post:
+ tags: [Dedupe]
+ summary: Deduplicate chunks (SSE stream)
+ description: |
+ Same as `/v1/dedupe` but returns results as Server-Sent Events with
+ per-stage progress updates.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/DedupeRequest"
+ responses:
+ "200":
+ description: SSE stream of dedup progress and results
+ content:
+ text/event-stream: {}
+
+ /v1/pipeline:
+ post:
+ tags: [Pipeline]
+ summary: Run full pipeline
+ description: |
+ Runs the complete dedup → compress → summarize → cache pipeline.
+ Returns processed chunks with per-stage statistics.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/PipelineRequest"
+ responses:
+ "200":
+ description: Pipeline results
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/PipelineResponse"
+ "400":
+ description: Invalid request
+
+ /v1/batch:
+ post:
+ tags: [Batch]
+ summary: Submit batch job
+ description: Submit a batch of chunks for async processing.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/BatchSubmitRequest"
+ responses:
+ "202":
+ description: Job accepted
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/BatchSubmitResponse"
+
+ /v1/batch/{job_id}:
+ get:
+ tags: [Batch]
+ summary: Get batch job status
+ parameters:
+ - name: job_id
+ in: path
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: Job status
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/BatchStatusResponse"
+ "404":
+ description: Job not found
+
+ /v1/batch/{job_id}/results:
+ get:
+ tags: [Batch]
+ summary: Get batch job results
+ parameters:
+ - name: job_id
+ in: path
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: Job results
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/BatchResultsResponse"
+ "404":
+ description: Job not found
+
+ /v1/memory/store:
+ post:
+ tags: [Memory]
+ summary: Store memories
+ description: |
+ Store one or more memory entries with write-time deduplication.
+ Supports sensitivity tagging (explicit or auto-classified) and
+ conflict detection against existing entries.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/StoreRequest"
+ responses:
+ "200":
+ description: Store result with conflict information
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/StoreResult"
+
+ /v1/memory/recall:
+ post:
+ tags: [Memory]
+ summary: Recall memories
+ description: |
+ Retrieve memories ranked by relevance and recency. Supports tag boosting,
+ task context matching, and minimum relevance filtering.
+ Expired entries are excluded by default.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/RecallRequest"
+ responses:
+ "200":
+ description: Recalled memories with sensitivity metadata
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/RecallResult"
+
+ /v1/memory/forget:
+ post:
+ tags: [Memory]
+ summary: Forget memories
+ description: Permanently remove memories by ID, tag, or age.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ForgetRequest"
+ responses:
+ "200":
+ description: Forget result
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ForgetResult"
+
+ /v1/memory/expire:
+ post:
+ tags: [Memory]
+ summary: Expire memories
+ description: |
+ Mark memories as expired. Expired entries are excluded from recall
+ by default but remain in the store for auditing.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ExpireRequest"
+ responses:
+ "200":
+ description: Expire result
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ExpireResult"
+
+ /v1/memory/supersede:
+ post:
+ tags: [Memory]
+ summary: Supersede a memory
+ description: |
+ Mark a memory as superseded by a newer entry. The old entry is expired
+ and a forward pointer to the replacement is stored.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SupersedeRequest"
+ responses:
+ "200":
+ description: Supersede result
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SupersedeResult"
+ "404":
+ description: Old entry not found
+ "409":
+ description: Old entry already expired
+
+ /v1/memory/stats:
+ get:
+ tags: [Memory]
+ summary: Memory store statistics
+ responses:
+ "200":
+ description: Store statistics
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/MemoryStats"
+
+ /v1/session/create:
+ post:
+ tags: [Session]
+ summary: Create a session
+ description: Create a new context window session with a token budget.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SessionCreateRequest"
+ responses:
+ "200":
+ description: Created session
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Session"
+ "409":
+ description: Session already exists
+
+ /v1/session/push:
+ post:
+ tags: [Session]
+ summary: Push entries to a session
+ description: |
+ Add context entries to a session. Distill deduplicates and compresses
+ to stay within the token budget.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SessionPushRequest"
+ responses:
+ "200":
+ description: Push result
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SessionPushResult"
+ "404":
+ description: Session not found
+ "413":
+ description: Over token budget
+
+ /v1/session/context:
+ post:
+ tags: [Session]
+ summary: Get session context
+ description: Retrieve the current context window for a session.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SessionContextRequest"
+ responses:
+ "200":
+ description: Session context
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SessionContextResult"
+ "404":
+ description: Session not found
+
+ /v1/session/get:
+ get:
+ tags: [Session]
+ summary: Get session metadata
+ parameters:
+ - name: session_id
+ in: query
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: Session metadata
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Session"
+ "404":
+ description: Session not found
+
+ /v1/session/delete:
+ post:
+ tags: [Session]
+ summary: Delete a session
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required: [session_id]
+ properties:
+ session_id:
+ type: string
+ responses:
+ "200":
+ description: Delete result
+ "404":
+ description: Session not found
+
+ /health:
+ get:
+ tags: [Health]
+ summary: Health check
+ responses:
+ "200":
+ description: Server is healthy
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ status:
+ type: string
+ example: ok
+
+ /metrics:
+ get:
+ tags: [Health]
+ summary: Prometheus metrics
+ responses:
+ "200":
+ description: Prometheus-formatted metrics
+ content:
+ text/plain: {}
+
+components:
+ schemas:
+ DedupeChunk:
+ type: object
+ required: [text]
+ properties:
+ id:
+ type: string
+ text:
+ type: string
+ embedding:
+ type: array
+ items:
+ type: number
+ format: float
+ score:
+ type: number
+ format: float
+ cache_control:
+ type: string
+ description: Anthropic cache_control marker
+
+ DedupeRequest:
+ type: object
+ required: [chunks]
+ properties:
+ chunks:
+ type: array
+ items:
+ $ref: "#/components/schemas/DedupeChunk"
+ threshold:
+ type: number
+ format: double
+ description: Cosine distance threshold for clustering
+ lambda:
+ type: number
+ format: double
+ description: MMR lambda (0=diversity, 1=relevance)
+ target_k:
+ type: integer
+ description: Target number of output chunks
+ options:
+ type: object
+ properties:
+ preserve_cache_prefix:
+ type: boolean
+ description: Freeze chunks before the last cache_control marker
+
+ DedupeResponse:
+ type: object
+ properties:
+ chunks:
+ type: array
+ items:
+ type: object
+ properties:
+ id:
+ type: string
+ text:
+ type: string
+ score:
+ type: number
+ format: float
+ cluster_id:
+ type: integer
+ cache_control:
+ type: string
+ stats:
+ type: object
+ properties:
+ input_chunks:
+ type: integer
+ output_chunks:
+ type: integer
+ reduction_pct:
+ type: number
+ clusters:
+ type: integer
+ latency_ms:
+ type: number
+
+ PipelineRequest:
+ type: object
+ required: [chunks]
+ properties:
+ chunks:
+ type: array
+ items:
+ $ref: "#/components/schemas/DedupeChunk"
+ options:
+ type: object
+ properties:
+ dedup:
+ type: boolean
+ compress:
+ type: boolean
+ summarize:
+ type: boolean
+ cache:
+ type: boolean
+
+ PipelineResponse:
+ type: object
+ properties:
+ chunks:
+ type: array
+ items:
+ $ref: "#/components/schemas/DedupeChunk"
+ stats:
+ type: object
+ properties:
+ total_input_tokens:
+ type: integer
+ total_output_tokens:
+ type: integer
+ total_reduction:
+ type: number
+ total_latency_ms:
+ type: number
+ stages:
+ type: object
+ additionalProperties:
+ type: object
+ properties:
+ enabled:
+ type: boolean
+ input_tokens:
+ type: integer
+ output_tokens:
+ type: integer
+ reduction:
+ type: number
+ latency_ms:
+ type: number
+
+ BatchSubmitRequest:
+ type: object
+ required: [chunks]
+ properties:
+ chunks:
+ type: array
+ items:
+ $ref: "#/components/schemas/DedupeChunk"
+ options:
+ $ref: "#/components/schemas/PipelineRequest/properties/options"
+
+ BatchSubmitResponse:
+ type: object
+ properties:
+ job_id:
+ type: string
+ status:
+ type: string
+
+ BatchStatusResponse:
+ type: object
+ properties:
+ job_id:
+ type: string
+ status:
+ type: string
+ enum: [pending, running, completed, failed]
+ progress:
+ type: number
+ error:
+ type: string
+ created_at:
+ type: string
+ started_at:
+ type: string
+ completed_at:
+ type: string
+
+ BatchResultsResponse:
+ type: object
+ properties:
+ job_id:
+ type: string
+ status:
+ type: string
+ chunks:
+ type: array
+ items:
+ $ref: "#/components/schemas/DedupeChunk"
+ stats:
+ $ref: "#/components/schemas/PipelineResponse/properties/stats"
+
+ StoreRequest:
+ type: object
+ required: [entries]
+ properties:
+ session_id:
+ type: string
+ entries:
+ type: array
+ items:
+ type: object
+ required: [text]
+ properties:
+ text:
+ type: string
+ embedding:
+ type: array
+ items:
+ type: number
+ format: float
+ source:
+ type: string
+ tags:
+ type: array
+ items:
+ type: string
+ metadata:
+ type: object
+ additionalProperties: true
+ expires_at:
+ type: string
+ format: date-time
+ sensitivity:
+ type: integer
+ description: "0=none, 1=pii, 2=internal, 3=credentials"
+ auto_classify:
+ type: boolean
+ description: Run pattern-based sensitivity classification
+
+ StoreResult:
+ type: object
+ properties:
+ stored:
+ type: integer
+ merged:
+ type: integer
+ deduplicated:
+ type: integer
+ total_memories:
+ type: integer
+ conflicts:
+ type: array
+ items:
+ $ref: "#/components/schemas/Conflict"
+
+ Conflict:
+ type: object
+ properties:
+ new_id:
+ type: string
+ new_text:
+ type: string
+ existing_id:
+ type: string
+ existing_text:
+ type: string
+ distance:
+ type: number
+ format: double
+
+ RecallRequest:
+ type: object
+ required: [query]
+ properties:
+ query:
+ type: string
+ query_embedding:
+ type: array
+ items:
+ type: number
+ format: float
+ tags:
+ type: array
+ items:
+ type: string
+ max_tokens:
+ type: integer
+ max_results:
+ type: integer
+ recency_weight:
+ type: number
+ format: double
+ description: Weight for recency vs relevance (0-1)
+ include_expired:
+ type: boolean
+ task_context:
+ type: string
+ description: Task description for source-matching boost
+ boost_tags:
+ type: array
+ items:
+ type: string
+ description: Tags that receive a relevance boost
+ min_relevance:
+ type: number
+ format: double
+ description: Filter out memories below this score (0-1)
+
+ RecallResult:
+ type: object
+ properties:
+ memories:
+ type: array
+ items:
+ type: object
+ properties:
+ id:
+ type: string
+ text:
+ type: string
+ source:
+ type: string
+ tags:
+ type: array
+ items:
+ type: string
+ relevance:
+ type: number
+ format: double
+ decay_level:
+ type: integer
+ sensitivity:
+ type: integer
+ last_referenced:
+ type: string
+ format: date-time
+ stats:
+ type: object
+ properties:
+ candidates:
+ type: integer
+ deduplicated:
+ type: integer
+ returned:
+ type: integer
+ token_count:
+ type: integer
+ max_sensitivity:
+ type: integer
+ description: Highest sensitivity level across returned memories
+ sensitive_chunks:
+ type: array
+ items:
+ type: object
+ properties:
+ chunk_id:
+ type: string
+ sensitivity:
+ type: integer
+
+ ForgetRequest:
+ type: object
+ properties:
+ ids:
+ type: array
+ items:
+ type: string
+ tags:
+ type: array
+ items:
+ type: string
+ before:
+ type: string
+ format: date-time
+
+ ForgetResult:
+ type: object
+ properties:
+ forgotten:
+ type: integer
+
+ ExpireRequest:
+ type: object
+ required: [ids]
+ properties:
+ ids:
+ type: array
+ items:
+ type: string
+
+ ExpireResult:
+ type: object
+ properties:
+ expired:
+ type: integer
+
+ SupersedeRequest:
+ type: object
+ required: [old_id]
+ properties:
+ old_id:
+ type: string
+ new_id:
+ type: string
+
+ SupersedeResult:
+ type: object
+ properties:
+ superseded:
+ type: boolean
+
+ MemoryStats:
+ type: object
+ properties:
+ total_memories:
+ type: integer
+ expired_count:
+ type: integer
+ active_count:
+ type: integer
+ by_decay_level:
+ type: object
+ additionalProperties:
+ type: integer
+ by_source:
+ type: object
+ additionalProperties:
+ type: integer
+ oldest_memory:
+ type: string
+ format: date-time
+ newest_memory:
+ type: string
+ format: date-time
+
+ SessionCreateRequest:
+ type: object
+ required: [max_tokens]
+ properties:
+ session_id:
+ type: string
+ description: Auto-generated if empty
+ max_tokens:
+ type: integer
+ dedup_threshold:
+ type: number
+ format: double
+ preserve_recent:
+ type: integer
+ description: Always keep last N entries at full fidelity
+
+ Session:
+ type: object
+ properties:
+ id:
+ type: string
+ max_tokens:
+ type: integer
+ used_tokens:
+ type: integer
+ entry_count:
+ type: integer
+ created_at:
+ type: string
+ format: date-time
+
+ SessionPushRequest:
+ type: object
+ required: [session_id, entries]
+ properties:
+ session_id:
+ type: string
+ entries:
+ type: array
+ items:
+ type: object
+ required: [role, content]
+ properties:
+ role:
+ type: string
+ content:
+ type: string
+ embedding:
+ type: array
+ items:
+ type: number
+ format: float
+
+ SessionPushResult:
+ type: object
+ properties:
+ added:
+ type: integer
+ deduplicated:
+ type: integer
+ compressed:
+ type: integer
+ tokens_used:
+ type: integer
+ tokens_remaining:
+ type: integer
+
+ SessionContextRequest:
+ type: object
+ required: [session_id]
+ properties:
+ session_id:
+ type: string
+ max_tokens:
+ type: integer
+ description: "0 = return full window"
+ role:
+ type: string
+ description: Filter by role
+
+ SessionContextResult:
+ type: object
+ properties:
+ entries:
+ type: array
+ items:
+ type: object
+ properties:
+ role:
+ type: string
+ content:
+ type: string
+ compression_level:
+ type: string
+ tokens:
+ type: integer
+ total_tokens:
+ type: integer