intitni
diff --git a/‎Core/Sources/CodeCompletionService/CodeCompletionService.swift‎
Lines changed: 27 additions & 3 deletions b/‎Core/Sources/CodeCompletionService/CodeCompletionService.swift‎
Lines changed: 27 additions & 3 deletions
diff --git a/‎Core/Sources/CodeCompletionService/OllamaService.swift‎
Lines changed: 259 additions & 0 deletions b/‎Core/Sources/CodeCompletionService/OllamaService.swift‎
Lines changed: 259 additions & 0 deletions
diff --git a/‎Core/Sources/CodeCompletionService/ResponseStream.swift‎
Lines changed: 38 additions & 0 deletions b/‎Core/Sources/CodeCompletionService/ResponseStream.swift‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎Core/Sources/Fundamental/Models/ChatModel.swift‎
Lines changed: 10 additions & 1 deletion b/‎Core/Sources/Fundamental/Models/ChatModel.swift‎
Lines changed: 10 additions & 1 deletion
@@ -3,9 +3,9 @@ import Fundamental
 import Storage
 
 protocol CodeCompletionServiceType {
-    func getCompletion(
-        _ request: PromptStrategy
-    ) async throws -> AsyncStream<String>
+    associatedtype CompletionSequence: AsyncSequence where CompletionSequence.Element == String
+
+    func getCompletion(_ request: PromptStrategy) async throws -> CompletionSequence
 }
 
 extension CodeCompletionServiceType {
@@ -115,6 +115,18 @@ public struct CodeCompletionService {
             let result = try await service.getCompletions(prompt, count: count)
             try Task.checkCancellation()
             return result
+        case .ollama:
+            let service = OllamaService(
+                url: model.endpoint,
+                endpoint: .chatCompletion,
+                modelName: model.info.modelName,
+                stopWords: prompt.stopWords,
+                keepAlive: model.info.ollamaKeepAlive,
+                format: .none
+            )
+            let result = try await service.getCompletions(prompt, count: count)
+            try Task.checkCancellation()
+            return result
         case .unknown:
             throw Error.unknownFormat
         }
@@ -150,6 +162,18 @@ public struct CodeCompletionService {
             let result = try await service.getCompletions(prompt, count: count)
             try Task.checkCancellation()
             return result
+        case .ollama:
+            let service = OllamaService(
+                url: model.endpoint,
+                endpoint: .completion,
+                modelName: model.info.modelName,
+                stopWords: prompt.stopWords,
+                keepAlive: model.info.ollamaKeepAlive,
+                format: .none
+            )
+            let result = try await service.getCompletions(prompt, count: count)
+            try Task.checkCancellation()
+            return result
         case .unknown:
             throw Error.unknownFormat
         }
 
@@ -0,0 +1,259 @@
+import CopilotForXcodeKit
+import Foundation
+import Fundamental
+
+public actor OllamaService {
+    let url: URL
+    let endpoint: Endpoint
+    let modelName: String
+    let maxToken: Int
+    let temperature: Double
+    let stopWords: [String]
+    let keepAlive: String
+    let format: ResponseFormat
+    
+    public enum ResponseFormat: String {
+        case none = ""
+        case json = "json"
+    }
+
+    public enum Endpoint {
+        case completion
+        case chatCompletion
+    }
+
+    init(
+        url: String? = nil,
+        endpoint: Endpoint,
+        modelName: String,
+        maxToken: Int? = nil,
+        temperature: Double = 0.2,
+        stopWords: [String] = [],
+        keepAlive: String = "",
+        format: ResponseFormat = .none
+    ) {
+        self.url = url.flatMap(URL.init(string:)) ?? {
+            switch endpoint {
+            case .chatCompletion:
+                URL(string: "https://127.0.0.1:11434/api/chat")!
+            case .completion:
+                URL(string: "https://127.0.0.1:11434/api/generate")!
+            }
+        }()
+
+        self.endpoint = endpoint
+        self.modelName = modelName
+        self.maxToken = maxToken ?? 4096
+        self.temperature = temperature
+        self.stopWords = stopWords
+        self.keepAlive = keepAlive
+        self.format = format
+    }
+}
+
+extension OllamaService: CodeCompletionServiceType {
+    typealias CompletionSequence = AsyncThrowingCompactMapSequence<
+        ResponseStream<OllamaService.ChatCompletionResponseChunk>,
+        String
+    >
+
+    func getCompletion(
+        _ request: PromptStrategy
+    ) async throws -> CompletionSequence {
+        switch endpoint {
+        case .chatCompletion:
+            let messages = createMessages(from: request)
+            CodeCompletionLogger.logger.logPrompt(messages.map {
+                ($0.content, $0.role.rawValue)
+            })
+            let stream = try await sendMessages(messages)
+            return stream.compactMap { $0.message?.content }
+        case .completion:
+            let prompt = createPrompt(from: request)
+            CodeCompletionLogger.logger.logPrompt([(prompt, "user")])
+            let stream = try await sendPrompt(prompt)
+            return stream.compactMap { $0.response }
+        }
+    }
+}
+
+extension OllamaService {
+    struct Message: Codable, Equatable {
+        public enum Role: String, Codable {
+            case user
+            case assistant
+            case system
+        }
+
+        /// The role of the message.
+        public var role: Role
+        /// The content of the message.
+        public var content: String
+    }
+
+    enum Error: Swift.Error, LocalizedError {
+        case decodeError(Swift.Error)
+        case otherError(String)
+
+        public var errorDescription: String? {
+            switch self {
+            case let .decodeError(error):
+                return error.localizedDescription
+            case let .otherError(message):
+                return message
+            }
+        }
+    }
+}
+
+// MARK: - Chat Completion API
+
+/// https://github.com/ollama/ollama/blob/main/docs/api.md#chat-request-streaming
+extension OllamaService {
+    struct ChatCompletionRequestBody: Codable {
+        struct Options: Codable {
+            var temperature: Double
+            var stop: [String]
+            var num_predict: Int
+            var top_k: Int?
+            var top_p: Double?
+        }
+
+        var model: String
+        var messages: [Message]
+        var stream: Bool
+        var options: Options
+        var keep_alive: String?
+        var format: String?
+    }
+
+    struct ChatCompletionResponseChunk: Decodable {
+        var model: String
+        var message: Message?
+        var response: String?
+        var done: Bool
+        var total_duration: Int64?
+        var load_duration: Int64?
+        var prompt_eval_count: Int?
+        var prompt_eval_duration: Int64?
+        var eval_count: Int?
+        var eval_duration: Int64?
+    }
+
+    func createMessages(from request: PromptStrategy) -> [Message] {
+        let strategy = DefaultTruncateStrategy(maxTokenLimit: max(
+            maxToken / 3 * 2,
+            maxToken - 300 - 20
+        ))
+        let prompts = strategy.createTruncatedPrompt(promptStrategy: request)
+        return [
+            .init(role: .system, content: request.systemPrompt),
+        ] + prompts.map { prompt in
+            switch prompt.role {
+            case .user:
+                return .init(role: .user, content: prompt.content)
+            case .assistant:
+                return .init(role: .assistant, content: prompt.content)
+            }
+        }
+    }
+
+    func sendMessages(_ messages: [Message]) async throws
+        -> ResponseStream<ChatCompletionResponseChunk>
+    {
+        let requestBody = ChatCompletionRequestBody(
+            model: modelName,
+            messages: messages,
+            stream: true,
+            options: .init(
+                temperature: temperature,
+                stop: stopWords,
+                num_predict: 300
+            ),
+            keep_alive: keepAlive.isEmpty ? nil : keepAlive,
+            format: format == .none ? nil : format.rawValue
+        )
+
+        var request = URLRequest(url: url)
+        request.httpMethod = "POST"
+        let encoder = JSONEncoder()
+        request.httpBody = try encoder.encode(requestBody)
+        request.setValue("application/json", forHTTPHeaderField: "Content-Type")
+        let (result, response) = try await URLSession.shared.bytes(for: request)
+
+        guard let response = response as? HTTPURLResponse else {
+            throw CancellationError()
+        }
+
+        guard response.statusCode == 200 else {
+            let text = try await result.lines.reduce(into: "") { partialResult, current in
+                partialResult += current
+            }
+            throw Error.otherError(text)
+        }
+
+        return ResponseStream(result: result)
+    }
+}
+
+// MARK: - Completion API
+
+extension OllamaService {
+    struct CompletionRequestBody: Codable {
+        var model: String
+        var prompt: String
+        var stream: Bool
+        var options: ChatCompletionRequestBody.Options
+        var keep_alive: String?
+        var format: String?
+    }
+
+    func createPrompt(from request: PromptStrategy) -> String {
+        let strategy = DefaultTruncateStrategy(maxTokenLimit: max(
+            maxToken / 3 * 2,
+            maxToken - 300 - 20
+        ))
+        let prompts = strategy.createTruncatedPrompt(promptStrategy: request)
+        return ([request.systemPrompt] + prompts.map(\.content)).joined(separator: "\n\n")
+    }
+
+    func sendPrompt(_ prompt: String) async throws -> ResponseStream<ChatCompletionResponseChunk> {
+        let requestBody = CompletionRequestBody(
+            model: modelName,
+            prompt: prompt,
+            stream: true,
+            options: .init(
+                temperature: temperature,
+                stop: stopWords,
+                num_predict: 300
+            ),
+            keep_alive: keepAlive.isEmpty ? nil : keepAlive,
+            format: format == .none ? nil : format.rawValue
+        )
+
+        var request = URLRequest(url: url)
+        request.httpMethod = "POST"
+        let encoder = JSONEncoder()
+        request.httpBody = try encoder.encode(requestBody)
+        request.setValue("application/json", forHTTPHeaderField: "Content-Type")
+        let (result, response) = try await URLSession.shared.bytes(for: request)
+
+        guard let response = response as? HTTPURLResponse else {
+            throw CancellationError()
+        }
+
+        guard response.statusCode == 200 else {
+            let text = try await result.lines.reduce(into: "") { partialResult, current in
+                partialResult += current
+            }
+            throw Error.otherError(text)
+        }
+
+        return ResponseStream(result: result)
+    }
+
+    func countToken(_ message: Message) -> Int {
+        message.content.count
+    }
+}
+
@@ -0,0 +1,38 @@
+import Foundation
+
+struct ResponseStream<Chunk: Decodable>: AsyncSequence {
+    func makeAsyncIterator() -> Stream.AsyncIterator {
+        stream.makeAsyncIterator()
+    }
+
+    typealias Stream = AsyncThrowingStream<Chunk, Error>
+    typealias AsyncIterator = Stream.AsyncIterator
+    typealias Element = Chunk
+
+    let stream: Stream
+
+    init(result: URLSession.AsyncBytes, lineExtractor: @escaping (String) -> String? = { $0 }) {
+        stream = AsyncThrowingStream<Chunk, Error> { continuation in
+            let task = Task {
+                do {
+                    for try await line in result.lines {
+                        if Task.isCancelled { break }
+                        guard let content = lineExtractor(line)?.data(using: .utf8)
+                        else { continue }
+                        let chunk = try JSONDecoder().decode(Chunk.self, from: content)
+                        continuation.yield(chunk)
+                    }
+                    continuation.finish()
+                } catch {
+                    continuation.finish(throwing: error)
+                    result.task.cancel()
+                }
+            }
+            continuation.onTermination = { _ in
+                task.cancel()
+                result.task.cancel()
+            }
+        }
+    }
+}
+
@@ -22,6 +22,7 @@ public struct ChatModel: Codable, Equatable, Identifiable {
         case azureOpenAI
         case openAICompatible
         case googleAI
+        case ollama
 
         case unknown
     }
@@ -45,6 +46,8 @@ public struct ChatModel: Codable, Equatable, Identifiable {
             get { modelName }
             set { modelName = newValue }
         }
+        @FallbackDecoding<EmptyString>
+        public var ollamaKeepAlive: String
 
         public init(
             apiKeyName: String = "",
@@ -53,7 +56,8 @@ public struct ChatModel: Codable, Equatable, Identifiable {
             maxTokens: Int = 4000,
             supportsFunctionCalling: Bool = true,
             supportsOpenAIAPI2023_11: Bool = false,
-            modelName: String = ""
+            modelName: String = "",
+            ollamaKeepAlive: String = ""
         ) {
             self.apiKeyName = apiKeyName
             self.baseURL = baseURL
@@ -62,6 +66,7 @@ public struct ChatModel: Codable, Equatable, Identifiable {
             self.supportsFunctionCalling = supportsFunctionCalling
             self.supportsOpenAIAPI2023_11 = supportsOpenAIAPI2023_11
             self.modelName = modelName
+            self.ollamaKeepAlive = ollamaKeepAlive
         }
     }
 
@@ -86,6 +91,10 @@ public struct ChatModel: Codable, Equatable, Identifiable {
             let baseURL = info.baseURL
             if baseURL.isEmpty { return "https://generativelanguage.googleapis.com/v1" }
             return "\(baseURL)/v1/chat/completions"
+        case .ollama:
+            let baseURL = info.baseURL
+            if baseURL.isEmpty { return "http://localhost:11434/api/chat" }
+            return "\(baseURL)/api/chat"
         case .unknown:
             return ""
         }