@@ -12,6 +12,12 @@ public actor OllamaService {
1212 let stopWords : [ String ]
1313 let keepAlive : String
1414 let format : ResponseFormat
15+ let authenticationMode : AuthenticationMode ?
16+
17+ enum AuthenticationMode {
18+ case bearerToken( String )
19+ case header( name: String , value: String )
20+ }
1521
1622 public enum ResponseFormat : String {
1723 case none = " "
@@ -21,6 +27,7 @@ public actor OllamaService {
2127 public enum Endpoint {
2228 case completion
2329 case chatCompletion
30+ case completionWithSuffix
2431 }
2532
2633 init (
@@ -32,13 +39,14 @@ public actor OllamaService {
3239 temperature: Double = 0.2 ,
3340 stopWords: [ String ] = [ ] ,
3441 keepAlive: String = " " ,
35- format: ResponseFormat = . none
42+ format: ResponseFormat = . none,
43+ authenticationMode: AuthenticationMode ? = nil
3644 ) {
3745 self . url = url. flatMap ( URL . init ( string: ) ) ?? {
3846 switch endpoint {
3947 case . chatCompletion:
4048 URL ( string: " https://127.0.0.1:11434/api/chat " ) !
41- case . completion:
49+ case . completion, . completionWithSuffix :
4250 URL ( string: " https://127.0.0.1:11434/api/generate " ) !
4351 }
4452 } ( )
@@ -51,6 +59,7 @@ public actor OllamaService {
5159 self . keepAlive = keepAlive
5260 self . format = format
5361 self . contextWindow = contextWindow
62+ self . authenticationMode = authenticationMode
5463 }
5564}
5665
@@ -74,7 +83,24 @@ extension OllamaService: CodeCompletionServiceType {
7483 case . completion:
7584 let prompt = createPrompt ( from: request)
7685 CodeCompletionLogger . logger. logPrompt ( [ ( prompt, " user " ) ] )
77- let stream = try await sendPrompt ( prompt)
86+ let stream = try await sendPrompt ( prompt, raw: request. promptIsRaw)
87+ return stream. compactMap { $0. response }
88+ case . completionWithSuffix:
89+ let strategy = DefaultTruncateStrategy ( maxTokenLimit: max (
90+ contextWindow / 3 * 2 ,
91+ contextWindow - maxToken - 20
92+ ) )
93+ let prompts = strategy. createTruncatedPrompt ( promptStrategy: request)
94+
95+ let prefix = prompts. first { $0. role == . prefix } ? . content ?? " "
96+ let suffix = prompts. last { $0. role == . suffix } ? . content ?? " "
97+
98+ CodeCompletionLogger . logger. logPrompt ( [
99+ ( prefix, " prefix " ) ,
100+ ( suffix, " suffix " ) ,
101+ ] )
102+
103+ let stream = try await sendPrompt ( prefix, suffix: suffix)
78104 return stream. compactMap { $0. response }
79105 }
80106 }
@@ -215,6 +241,8 @@ extension OllamaService {
215241 var options : ChatCompletionRequestBody . Options
216242 var keep_alive : String ?
217243 var format : String ?
244+ var raw : Bool ?
245+ var suffix : String ?
218246 }
219247
220248 func createPrompt( from request: PromptStrategy ) -> String {
@@ -227,7 +255,11 @@ extension OllamaService {
227255 . trimmingCharacters ( in: . whitespacesAndNewlines)
228256 }
229257
230- func sendPrompt( _ prompt: String ) async throws -> ResponseStream < ChatCompletionResponseChunk > {
258+ func sendPrompt(
259+ _ prompt: String ,
260+ raw: Bool ? = nil ,
261+ suffix: String ? = nil
262+ ) async throws -> ResponseStream < ChatCompletionResponseChunk > {
231263 let requestBody = CompletionRequestBody (
232264 model: modelName,
233265 prompt: prompt,
@@ -238,14 +270,26 @@ extension OllamaService {
238270 num_predict: maxToken
239271 ) ,
240272 keep_alive: keepAlive. isEmpty ? nil : keepAlive,
241- format: format == . none ? nil : format. rawValue
273+ format: format == . none ? nil : format. rawValue,
274+ raw: raw,
275+ suffix: suffix
242276 )
243277
244278 var request = URLRequest ( url: url)
245279 request. httpMethod = " POST "
246280 let encoder = JSONEncoder ( )
247281 request. httpBody = try encoder. encode ( requestBody)
248282 request. setValue ( " application/json " , forHTTPHeaderField: " Content-Type " )
283+
284+ switch authenticationMode{
285+ case . none:
286+ break
287+ case let . bearerToken( key) :
288+ request. setValue ( " Bearer \( key) " , forHTTPHeaderField: " Authorization " )
289+ case let . header( name, value) :
290+ request. setValue ( value, forHTTPHeaderField: name)
291+ }
292+
249293 let ( result, response) = try await URLSession . shared. bytes ( for: request)
250294
251295 guard let response = response as? HTTPURLResponse else {
0 commit comments