Ollama4j
A Java library (wrapper/binding) for Ollama server.
Loading...
Searching...
No Matches
Ollama.java
Go to the documentation of this file.
1/*
2 * Ollama4j - Java library for interacting with Ollama server.
3 * Copyright (c) 2025 Amith Koujalgi and contributors.
4 *
5 * Licensed under the MIT License (the "License");
6 * you may not use this file except in compliance with the License.
7 *
8*/
9package io.github.ollama4j;
10
11import com.fasterxml.jackson.databind.ObjectMapper;
12import io.github.ollama4j.exceptions.OllamaException;
13import io.github.ollama4j.exceptions.RoleNotFoundException;
14import io.github.ollama4j.exceptions.ToolInvocationException;
15import io.github.ollama4j.metrics.MetricsRecorder;
16import io.github.ollama4j.models.chat.*;
17import io.github.ollama4j.models.chat.OllamaChatTokenHandler;
18import io.github.ollama4j.models.embed.OllamaEmbedRequest;
19import io.github.ollama4j.models.embed.OllamaEmbedResult;
20import io.github.ollama4j.models.generate.OllamaGenerateRequest;
21import io.github.ollama4j.models.generate.OllamaGenerateStreamObserver;
22import io.github.ollama4j.models.generate.OllamaGenerateTokenHandler;
23import io.github.ollama4j.models.ps.ModelProcessesResult;
24import io.github.ollama4j.models.request.*;
25import io.github.ollama4j.models.response.*;
26import io.github.ollama4j.tools.*;
27import io.github.ollama4j.tools.annotations.OllamaToolService;
28import io.github.ollama4j.tools.annotations.ToolProperty;
29import io.github.ollama4j.tools.annotations.ToolSpec;
30import io.github.ollama4j.utils.Constants;
31import io.github.ollama4j.utils.Utils;
32import java.io.*;
33import java.lang.reflect.InvocationTargetException;
34import java.lang.reflect.Method;
35import java.lang.reflect.Parameter;
36import java.net.URI;
37import java.net.URISyntaxException;
38import java.net.http.HttpClient;
39import java.net.http.HttpRequest;
40import java.net.http.HttpResponse;
41import java.nio.charset.StandardCharsets;
42import java.nio.file.Files;
43import java.time.Duration;
44import java.util.*;
45import java.util.stream.Collectors;
46import lombok.Setter;
47import org.slf4j.Logger;
48import org.slf4j.LoggerFactory;
49
55@SuppressWarnings({"DuplicatedCode", "resource", "SpellCheckingInspection"})
56public class Ollama {
57
58 private static final Logger LOG = LoggerFactory.getLogger(Ollama.class);
59
60 private final String host;
61 private Auth auth;
62
63 private final ToolRegistry toolRegistry = new ToolRegistry();
64
71 @Setter private long requestTimeoutSeconds = 10;
72
76 @Setter private int imageURLReadTimeoutSeconds = 10;
77
81 @Setter private int imageURLConnectTimeoutSeconds = 10;
82
89 @Setter private int maxChatToolCallRetries = 3;
90
99 @Setter
100 @SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
101 private int numberOfRetriesForModelPull = 0;
102
109 @Setter private boolean metricsEnabled = false;
110
114 public Ollama() {
115 this.host = "http://localhost:11434";
116 }
117
123 public Ollama(String host) {
124 if (host.endsWith("/")) {
125 this.host = host.substring(0, host.length() - 1);
126 } else {
127 this.host = host;
128 }
129 LOG.info("Ollama4j client initialized. Connected to Ollama server at: {}", this.host);
130 }
131
138 public void setBasicAuth(String username, String password) {
139 this.auth = new BasicAuth(username, password);
140 }
141
147 public void setBearerAuth(String bearerToken) {
148 this.auth = new BearerAuth(bearerToken);
149 }
150
157 public boolean ping() throws OllamaException {
158 long startTime = System.currentTimeMillis();
159 String url = "/api/tags";
160 int statusCode = -1;
161 Object out = null;
162 try {
163 HttpClient httpClient = HttpClient.newHttpClient();
164 HttpRequest httpRequest;
165 HttpResponse<String> response;
166 httpRequest =
167 getRequestBuilderDefault(new URI(this.host + url))
168 .header(
169 Constants.HttpConstants.HEADER_KEY_ACCEPT,
170 Constants.HttpConstants.APPLICATION_JSON)
171 .header(
172 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
173 Constants.HttpConstants.APPLICATION_JSON)
174 .GET()
175 .build();
176 response = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
177 statusCode = response.statusCode();
178 return statusCode == 200;
179 } catch (InterruptedException ie) {
180 Thread.currentThread().interrupt();
181 throw new OllamaException("Ping interrupted", ie);
182 } catch (Exception e) {
183 throw new OllamaException("Ping failed", e);
184 } finally {
186 url, "", false, false, false, null, null, startTime, statusCode, out);
187 }
188 }
189
197 long startTime = System.currentTimeMillis();
198 String url = "/api/ps";
199 int statusCode = -1;
200 Object out = null;
201 try {
202 HttpClient httpClient = HttpClient.newHttpClient();
203 HttpRequest httpRequest = null;
204 try {
205 httpRequest =
206 getRequestBuilderDefault(new URI(this.host + url))
207 .header(
208 Constants.HttpConstants.HEADER_KEY_ACCEPT,
209 Constants.HttpConstants.APPLICATION_JSON)
210 .header(
211 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
212 Constants.HttpConstants.APPLICATION_JSON)
213 .GET()
214 .build();
215 } catch (URISyntaxException e) {
216 throw new OllamaException(e.getMessage(), e);
217 }
218 HttpResponse<String> response = null;
219 response = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
220 statusCode = response.statusCode();
221 String responseString = response.body();
222 if (statusCode == 200) {
223 return Utils.getObjectMapper()
224 .readValue(responseString, ModelProcessesResult.class);
225 } else {
226 throw new OllamaException(statusCode + " - " + responseString);
227 }
228 } catch (InterruptedException ie) {
229 Thread.currentThread().interrupt();
230 throw new OllamaException("ps interrupted", ie);
231 } catch (Exception e) {
232 throw new OllamaException("ps failed", e);
233 } finally {
235 url, "", false, false, false, null, null, startTime, statusCode, out);
236 }
237 }
238
245 public List<Model> listModels() throws OllamaException {
246 long startTime = System.currentTimeMillis();
247 String url = "/api/tags";
248 int statusCode = -1;
249 Object out = null;
250 try {
251 HttpClient httpClient = HttpClient.newHttpClient();
252 HttpRequest httpRequest =
253 getRequestBuilderDefault(new URI(this.host + url))
254 .header(
255 Constants.HttpConstants.HEADER_KEY_ACCEPT,
256 Constants.HttpConstants.APPLICATION_JSON)
257 .header(
258 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
259 Constants.HttpConstants.APPLICATION_JSON)
260 .GET()
261 .build();
262 HttpResponse<String> response =
263 httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
264 statusCode = response.statusCode();
265 String responseString = response.body();
266 if (statusCode == 200) {
267 return Utils.getObjectMapper()
268 .readValue(responseString, ListModelsResponse.class)
269 .getModels();
270 } else {
271 throw new OllamaException(statusCode + " - " + responseString);
272 }
273 } catch (InterruptedException ie) {
274 Thread.currentThread().interrupt();
275 throw new OllamaException("listModels interrupted", ie);
276 } catch (Exception e) {
277 throw new OllamaException(e.getMessage(), e);
278 } finally {
280 url, "", false, false, false, null, null, startTime, statusCode, out);
281 }
282 }
283
293 private void handlePullRetry(
294 String modelName, int currentRetry, int maxRetries, long baseDelayMillis)
295 throws InterruptedException {
296 int attempt = currentRetry + 1;
297 if (attempt < maxRetries) {
298 long backoffMillis = baseDelayMillis * (1L << currentRetry);
299 LOG.error(
300 "Failed to pull model {}, retrying in {}s... (attempt {}/{})",
301 modelName,
302 backoffMillis / 1000,
303 attempt,
304 maxRetries);
305 try {
306 Thread.sleep(backoffMillis);
307 } catch (InterruptedException ie) {
308 Thread.currentThread().interrupt();
309 throw ie;
310 }
311 } else {
312 LOG.error(
313 "Failed to pull model {} after {} attempts, no more retries.",
314 modelName,
315 maxRetries);
316 }
317 }
318
325 private void doPullModel(String modelName) throws OllamaException {
326 long startTime = System.currentTimeMillis();
327 String url = "/api/pull";
328 int statusCode = -1;
329 Object out = null;
330 try {
331 String jsonData = new ModelRequest(modelName).toString();
332 HttpRequest request =
333 getRequestBuilderDefault(new URI(this.host + url))
334 .POST(HttpRequest.BodyPublishers.ofString(jsonData))
335 .header(
336 Constants.HttpConstants.HEADER_KEY_ACCEPT,
337 Constants.HttpConstants.APPLICATION_JSON)
338 .header(
339 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
340 Constants.HttpConstants.APPLICATION_JSON)
341 .build();
342 HttpClient client = HttpClient.newHttpClient();
343 HttpResponse<InputStream> response =
344 client.send(request, HttpResponse.BodyHandlers.ofInputStream());
345 statusCode = response.statusCode();
346 InputStream responseBodyStream = response.body();
347 String responseString = "";
348 boolean success = false; // Flag to check the pull success.
349
350 try (BufferedReader reader =
351 new BufferedReader(
352 new InputStreamReader(responseBodyStream, StandardCharsets.UTF_8))) {
353 String line;
354 while ((line = reader.readLine()) != null) {
355 ModelPullResponse modelPullResponse =
356 Utils.getObjectMapper().readValue(line, ModelPullResponse.class);
357 success = processModelPullResponse(modelPullResponse, modelName) || success;
358 }
359 }
360 if (!success) {
361 LOG.error("Model pull failed or returned invalid status.");
362 throw new OllamaException("Model pull failed or returned invalid status.");
363 }
364 if (statusCode != 200) {
365 throw new OllamaException(statusCode + " - " + responseString);
366 }
367 } catch (InterruptedException ie) {
368 Thread.currentThread().interrupt();
369 throw new OllamaException("Thread was interrupted during model pull.", ie);
370 } catch (Exception e) {
371 throw new OllamaException(e.getMessage(), e);
372 } finally {
374 url, "", false, false, false, null, null, startTime, statusCode, out);
375 }
376 }
377
387 @SuppressWarnings("RedundantIfStatement")
388 private boolean processModelPullResponse(ModelPullResponse modelPullResponse, String modelName)
389 throws OllamaException {
390 if (modelPullResponse == null) {
391 LOG.error("Received null response for model pull.");
392 return false;
393 }
394 String error = modelPullResponse.getError();
395 if (error != null && !error.trim().isEmpty()) {
396 throw new OllamaException("Model pull failed: " + error);
397 }
398 String status = modelPullResponse.getStatus();
399 if (status != null) {
400 LOG.debug("{}: {}", modelName, status);
401 if ("success".equalsIgnoreCase(status)) {
402 return true;
403 }
404 }
405 return false;
406 }
407
414 public String getVersion() throws OllamaException {
415 String url = "/api/version";
416 long startTime = System.currentTimeMillis();
417 int statusCode = -1;
418 Object out = null;
419 try {
420 HttpClient httpClient = HttpClient.newHttpClient();
421 HttpRequest httpRequest =
422 getRequestBuilderDefault(new URI(this.host + url))
423 .header(
424 Constants.HttpConstants.HEADER_KEY_ACCEPT,
425 Constants.HttpConstants.APPLICATION_JSON)
426 .header(
427 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
428 Constants.HttpConstants.APPLICATION_JSON)
429 .GET()
430 .build();
431 HttpResponse<String> response =
432 httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
433 statusCode = response.statusCode();
434 String responseString = response.body();
435 if (statusCode == 200) {
436 return Utils.getObjectMapper()
437 .readValue(responseString, OllamaVersion.class)
438 .getVersion();
439 } else {
440 throw new OllamaException(statusCode + " - " + responseString);
441 }
442 } catch (InterruptedException ie) {
443 Thread.currentThread().interrupt();
444 throw new OllamaException("Thread was interrupted", ie);
445 } catch (Exception e) {
446 throw new OllamaException(e.getMessage(), e);
447 } finally {
449 url, "", false, false, false, null, null, startTime, statusCode, out);
450 }
451 }
452
461 public void pullModel(String modelName) throws OllamaException {
462 try {
463 if (numberOfRetriesForModelPull == 0) {
464 this.doPullModel(modelName);
465 return;
466 }
467 int numberOfRetries = 0;
468 long baseDelayMillis = 3000L; // 3 seconds base delay
469 while (numberOfRetries < numberOfRetriesForModelPull) {
470 try {
471 this.doPullModel(modelName);
472 return;
473 } catch (OllamaException e) {
474 handlePullRetry(
475 modelName,
476 numberOfRetries,
477 numberOfRetriesForModelPull,
478 baseDelayMillis);
479 numberOfRetries++;
480 }
481 }
482 throw new OllamaException(
483 "Failed to pull model "
484 + modelName
485 + " after "
486 + numberOfRetriesForModelPull
487 + " retries");
488 } catch (InterruptedException ie) {
489 Thread.currentThread().interrupt();
490 throw new OllamaException("Thread was interrupted", ie);
491 } catch (Exception e) {
492 throw new OllamaException(e.getMessage(), e);
493 }
494 }
495
503 public ModelDetail getModelDetails(String modelName) throws OllamaException {
504 long startTime = System.currentTimeMillis();
505 String url = "/api/show";
506 int statusCode = -1;
507 Object out = null;
508 try {
509 String jsonData = new ModelRequest(modelName).toString();
510 HttpRequest request =
511 getRequestBuilderDefault(new URI(this.host + url))
512 .header(
513 Constants.HttpConstants.HEADER_KEY_ACCEPT,
514 Constants.HttpConstants.APPLICATION_JSON)
515 .header(
516 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
517 Constants.HttpConstants.APPLICATION_JSON)
518 .POST(HttpRequest.BodyPublishers.ofString(jsonData))
519 .build();
520 HttpClient client = HttpClient.newHttpClient();
521 HttpResponse<String> response =
522 client.send(request, HttpResponse.BodyHandlers.ofString());
523 statusCode = response.statusCode();
524 String responseBody = response.body();
525 if (statusCode == 200) {
526 return Utils.getObjectMapper().readValue(responseBody, ModelDetail.class);
527 } else {
528 throw new OllamaException(statusCode + " - " + responseBody);
529 }
530 } catch (InterruptedException ie) {
531 Thread.currentThread().interrupt();
532 throw new OllamaException("Thread was interrupted", ie);
533 } catch (Exception e) {
534 throw new OllamaException(e.getMessage(), e);
535 } finally {
537 url, "", false, false, false, null, null, startTime, statusCode, out);
538 }
539 }
540
548 public void createModel(CustomModelRequest customModelRequest) throws OllamaException {
549 long startTime = System.currentTimeMillis();
550 String url = "/api/create";
551 int statusCode = -1;
552 Object out = null;
553 try {
554 String jsonData = customModelRequest.toString();
555 HttpRequest request =
556 getRequestBuilderDefault(new URI(this.host + url))
557 .header(
558 Constants.HttpConstants.HEADER_KEY_ACCEPT,
559 Constants.HttpConstants.APPLICATION_JSON)
560 .header(
561 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
562 Constants.HttpConstants.APPLICATION_JSON)
563 .POST(
564 HttpRequest.BodyPublishers.ofString(
565 jsonData, StandardCharsets.UTF_8))
566 .build();
567 HttpClient client = HttpClient.newHttpClient();
568 HttpResponse<InputStream> response =
569 client.send(request, HttpResponse.BodyHandlers.ofInputStream());
570 statusCode = response.statusCode();
571 if (statusCode != 200) {
572 String errorBody =
573 new String(response.body().readAllBytes(), StandardCharsets.UTF_8);
574 out = errorBody;
575 throw new OllamaException(statusCode + " - " + errorBody);
576 }
577 try (BufferedReader reader =
578 new BufferedReader(
579 new InputStreamReader(response.body(), StandardCharsets.UTF_8))) {
580 String line;
581 StringBuilder lines = new StringBuilder();
582 while ((line = reader.readLine()) != null) {
584 Utils.getObjectMapper().readValue(line, ModelPullResponse.class);
585 lines.append(line);
586 LOG.debug(res.getStatus());
587 if (res.getError() != null) {
588 out = res.getError();
589 throw new OllamaException(res.getError());
590 }
591 }
592 out = lines;
593 }
594 } catch (InterruptedException e) {
595 Thread.currentThread().interrupt();
596 throw new OllamaException("Thread was interrupted", e);
597 } catch (Exception e) {
598 throw new OllamaException(e.getMessage(), e);
599 } finally {
601 url, "", false, false, false, null, null, startTime, statusCode, out);
602 }
603 }
604
612 public void deleteModel(String modelName, boolean ignoreIfNotPresent) throws OllamaException {
613 long startTime = System.currentTimeMillis();
614 String url = "/api/delete";
615 int statusCode = -1;
616 Object out = null;
617 try {
618 String jsonData = new ModelRequest(modelName).toString();
619 HttpRequest request =
620 getRequestBuilderDefault(new URI(this.host + url))
621 .method(
622 "DELETE",
623 HttpRequest.BodyPublishers.ofString(
624 jsonData, StandardCharsets.UTF_8))
625 .header(
626 Constants.HttpConstants.HEADER_KEY_ACCEPT,
627 Constants.HttpConstants.APPLICATION_JSON)
628 .header(
629 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
630 Constants.HttpConstants.APPLICATION_JSON)
631 .build();
632 HttpClient client = HttpClient.newHttpClient();
633 HttpResponse<String> response =
634 client.send(request, HttpResponse.BodyHandlers.ofString());
635 statusCode = response.statusCode();
636 String responseBody = response.body();
637 out = responseBody;
638 if (statusCode == 404
639 && responseBody.contains("model")
640 && responseBody.contains("not found")) {
641 return;
642 }
643 if (statusCode != 200) {
644 throw new OllamaException(statusCode + " - " + responseBody);
645 }
646 } catch (InterruptedException e) {
647 Thread.currentThread().interrupt();
648 throw new OllamaException("Thread was interrupted", e);
649 } catch (Exception e) {
650 throw new OllamaException(statusCode + " - " + out, e);
651 } finally {
653 url, "", false, false, false, null, null, startTime, statusCode, out);
654 }
655 }
656
666 public void unloadModel(String modelName) throws OllamaException {
667 long startTime = System.currentTimeMillis();
668 String url = "/api/generate";
669 int statusCode = -1;
670 Object out = null;
671 try {
672 ObjectMapper objectMapper = new ObjectMapper();
673 Map<String, Object> jsonMap = new java.util.HashMap<>();
674 jsonMap.put("model", modelName);
675 jsonMap.put("keep_alive", 0);
676 String jsonData = objectMapper.writeValueAsString(jsonMap);
677 HttpRequest request =
678 getRequestBuilderDefault(new URI(this.host + url))
679 .method(
680 "POST",
681 HttpRequest.BodyPublishers.ofString(
682 jsonData, StandardCharsets.UTF_8))
683 .header(
684 Constants.HttpConstants.HEADER_KEY_ACCEPT,
685 Constants.HttpConstants.APPLICATION_JSON)
686 .header(
687 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
688 Constants.HttpConstants.APPLICATION_JSON)
689 .build();
690 LOG.debug("Unloading model with request: {}", jsonData);
691 HttpClient client = HttpClient.newHttpClient();
692 HttpResponse<String> response =
693 client.send(request, HttpResponse.BodyHandlers.ofString());
694 statusCode = response.statusCode();
695 String responseBody = response.body();
696 if (statusCode == 404
697 && responseBody.contains("model")
698 && responseBody.contains("not found")) {
699 LOG.debug("Unload response: {} - {}", statusCode, responseBody);
700 return;
701 }
702 if (statusCode != 200) {
703 LOG.debug("Unload response: {} - {}", statusCode, responseBody);
704 throw new OllamaException(statusCode + " - " + responseBody);
705 }
706 } catch (InterruptedException e) {
707 Thread.currentThread().interrupt();
708 LOG.debug("Unload interrupted: {} - {}", statusCode, out);
709 throw new OllamaException(statusCode + " - " + out, e);
710 } catch (Exception e) {
711 LOG.debug("Unload failed: {} - {}", statusCode, out);
712 throw new OllamaException(statusCode + " - " + out, e);
713 } finally {
715 url, "", false, false, false, null, null, startTime, statusCode, out);
716 }
717 }
718
727 long startTime = System.currentTimeMillis();
728 String url = "/api/embed";
729 int statusCode = -1;
730 Object out = null;
731 try {
732 String jsonData = Utils.getObjectMapper().writeValueAsString(modelRequest);
733 HttpClient httpClient = HttpClient.newHttpClient();
734 HttpRequest request =
735 HttpRequest.newBuilder(new URI(this.host + url))
736 .header(
737 Constants.HttpConstants.HEADER_KEY_ACCEPT,
738 Constants.HttpConstants.APPLICATION_JSON)
739 .POST(HttpRequest.BodyPublishers.ofString(jsonData))
740 .build();
741 HttpResponse<String> response =
742 httpClient.send(request, HttpResponse.BodyHandlers.ofString());
743 statusCode = response.statusCode();
744 String responseBody = response.body();
745 if (statusCode == 200) {
746 return Utils.getObjectMapper().readValue(responseBody, OllamaEmbedResult.class);
747 } else {
748 throw new OllamaException(statusCode + " - " + responseBody);
749 }
750 } catch (InterruptedException e) {
751 Thread.currentThread().interrupt();
752 throw new OllamaException("Thread was interrupted", e);
753 } catch (Exception e) {
754 throw new OllamaException(e.getMessage(), e);
755 } finally {
757 url, "", false, false, false, null, null, startTime, statusCode, out);
758 }
759 }
760
772 throws OllamaException {
773 try {
774 if (request.isUseTools()) {
775 return generateWithToolsInternal(request, streamObserver);
776 }
777
778 if (streamObserver != null) {
779 if (request.isThink()) {
780 return generateSyncForOllamaRequestModel(
781 request,
782 streamObserver.getThinkingStreamHandler(),
783 streamObserver.getResponseStreamHandler());
784 } else {
785 return generateSyncForOllamaRequestModel(
786 request, null, streamObserver.getResponseStreamHandler());
787 }
788 }
789 return generateSyncForOllamaRequestModel(request, null, null);
790 } catch (Exception e) {
791 throw new OllamaException(e.getMessage(), e);
792 }
793 }
794
795 // (No javadoc for private helper, as is standard)
796 private OllamaResult generateWithToolsInternal(
798 throws OllamaException {
799 ArrayList<OllamaChatMessage> msgs = new ArrayList<>();
800 OllamaChatRequest chatRequest = new OllamaChatRequest();
801 chatRequest.setModel(request.getModel());
803 ocm.setRole(OllamaChatMessageRole.USER);
804 ocm.setResponse(request.getPrompt());
805 chatRequest.setMessages(msgs);
806 msgs.add(ocm);
807 OllamaChatTokenHandler hdlr = null;
808 chatRequest.setUseTools(true);
809 chatRequest.setTools(request.getTools());
810 if (streamObserver != null) {
811 chatRequest.setStream(true);
812 if (streamObserver.getResponseStreamHandler() != null) {
813 hdlr =
814 chatResponseModel ->
815 streamObserver
816 .getResponseStreamHandler()
817 .accept(chatResponseModel.getMessage().getResponse());
818 }
819 }
820 OllamaChatResult res = chat(chatRequest, hdlr);
821 return new OllamaResult(
822 res.getResponseModel().getMessage().getResponse(),
823 res.getResponseModel().getMessage().getThinking(),
824 res.getResponseModel().getTotalDuration(),
825 -1);
826 }
827
839 String model, String prompt, boolean raw, boolean think) throws OllamaException {
840 long startTime = System.currentTimeMillis();
841 String url = "/api/generate";
842 int statusCode = -1;
843 try {
844 OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt);
845 ollamaRequestModel.setRaw(raw);
846 ollamaRequestModel.setThink(think);
847 OllamaAsyncResultStreamer ollamaAsyncResultStreamer =
849 getRequestBuilderDefault(new URI(this.host + url)),
850 ollamaRequestModel,
851 requestTimeoutSeconds);
852 ollamaAsyncResultStreamer.start();
853 statusCode = ollamaAsyncResultStreamer.getHttpStatusCode();
854 return ollamaAsyncResultStreamer;
855 } catch (Exception e) {
856 throw new OllamaException(e.getMessage(), e);
857 } finally {
859 url, model, raw, think, true, null, null, startTime, statusCode, null);
860 }
861 }
862
876 throws OllamaException {
877 try {
878 OllamaChatEndpointCaller requestCaller =
879 new OllamaChatEndpointCaller(host, auth, requestTimeoutSeconds);
880 OllamaChatResult result;
881
882 // only add tools if tools flag is set
883 if (request.isUseTools()) {
884 // add all registered tools to request
885 request.getTools().addAll(toolRegistry.getRegisteredTools());
886 }
887
888 if (tokenHandler != null) {
889 request.setStream(true);
890 result = requestCaller.call(request, tokenHandler);
891 } else {
892 result = requestCaller.callSync(request);
893 }
894
895 // check if toolCallIsWanted
896 List<OllamaChatToolCalls> toolCalls =
897 result.getResponseModel().getMessage().getToolCalls();
898 int toolCallTries = 0;
899 while (toolCalls != null
900 && !toolCalls.isEmpty()
901 && toolCallTries < maxChatToolCallRetries) {
902 for (OllamaChatToolCalls toolCall : toolCalls) {
903 String toolName = toolCall.getFunction().getName();
904 for (Tools.Tool t : request.getTools()) {
905 if (t.getToolSpec().getName().equals(toolName)) {
906 ToolFunction toolFunction = t.getToolFunction();
907 if (toolFunction == null) {
908 throw new ToolInvocationException(
909 "Tool function not found: " + toolName);
910 }
911 LOG.debug(
912 "Invoking tool {} with arguments: {}",
913 toolCall.getFunction().getName(),
914 toolCall.getFunction().getArguments());
915 Map<String, Object> arguments = toolCall.getFunction().getArguments();
916 Object res = toolFunction.apply(arguments);
917 String argumentKeys =
918 arguments.keySet().stream()
919 .map(Object::toString)
920 .collect(Collectors.joining(", "));
921 request.getMessages()
922 .add(
925 "[TOOL_RESULTS] "
926 + toolName
927 + "("
928 + argumentKeys
929 + "): "
930 + res
931 + " [/TOOL_RESULTS]"));
932 }
933 }
934 }
935 if (tokenHandler != null) {
936 result = requestCaller.call(request, tokenHandler);
937 } else {
938 result = requestCaller.callSync(request);
939 }
940 toolCalls = result.getResponseModel().getMessage().getToolCalls();
941 toolCallTries++;
942 }
943 return result;
944 } catch (InterruptedException e) {
945 Thread.currentThread().interrupt();
946 throw new OllamaException("Thread was interrupted", e);
947 } catch (Exception e) {
948 throw new OllamaException(e.getMessage(), e);
949 }
950 }
951
957 public void registerTool(Tools.Tool tool) {
958 toolRegistry.addTool(tool);
959 LOG.debug("Registered tool: {}", tool.getToolSpec().getName());
960 }
961
968 public void registerTools(List<Tools.Tool> tools) {
969 toolRegistry.addTools(tools);
970 }
971
973 return toolRegistry.getRegisteredTools();
974 }
975
980 public void deregisterTools() {
981 toolRegistry.clear();
982 LOG.debug("All tools have been deregistered.");
983 }
984
994 try {
995 Class<?> callerClass = null;
996 try {
997 callerClass =
998 Class.forName(Thread.currentThread().getStackTrace()[2].getClassName());
999 } catch (ClassNotFoundException e) {
1000 throw new OllamaException(e.getMessage(), e);
1001 }
1002
1003 OllamaToolService ollamaToolServiceAnnotation =
1004 callerClass.getDeclaredAnnotation(OllamaToolService.class);
1005 if (ollamaToolServiceAnnotation == null) {
1006 throw new IllegalStateException(
1007 callerClass + " is not annotated as " + OllamaToolService.class);
1008 }
1009
1010 Class<?>[] providers = ollamaToolServiceAnnotation.providers();
1011 for (Class<?> provider : providers) {
1012 registerAnnotatedTools(provider.getDeclaredConstructor().newInstance());
1013 }
1014 } catch (InstantiationException
1015 | NoSuchMethodException
1016 | IllegalAccessException
1017 | InvocationTargetException e) {
1018 throw new OllamaException(e.getMessage());
1019 }
1020 }
1021
1031 public void registerAnnotatedTools(Object object) {
1032 Class<?> objectClass = object.getClass();
1033 Method[] methods = objectClass.getMethods();
1034 for (Method m : methods) {
1035 ToolSpec toolSpec = m.getDeclaredAnnotation(ToolSpec.class);
1036 if (toolSpec == null) {
1037 continue;
1038 }
1039 String operationName = !toolSpec.name().isBlank() ? toolSpec.name() : m.getName();
1040 String operationDesc = !toolSpec.desc().isBlank() ? toolSpec.desc() : operationName;
1041
1042 final Map<String, Tools.Property> params = new HashMap<String, Tools.Property>() {};
1043 LinkedHashMap<String, String> methodParams = new LinkedHashMap<>();
1044 for (Parameter parameter : m.getParameters()) {
1045 final ToolProperty toolPropertyAnn =
1046 parameter.getDeclaredAnnotation(ToolProperty.class);
1047 String propType = parameter.getType().getTypeName();
1048 if (toolPropertyAnn == null) {
1049 methodParams.put(parameter.getName(), null);
1050 continue;
1051 }
1052 String propName =
1053 !toolPropertyAnn.name().isBlank()
1054 ? toolPropertyAnn.name()
1055 : parameter.getName();
1056 methodParams.put(propName, propType);
1057 params.put(
1058 propName,
1059 Tools.Property.builder()
1060 .type(propType)
1061 .description(toolPropertyAnn.desc())
1062 .required(toolPropertyAnn.required())
1063 .build());
1064 }
1065 Tools.ToolSpec toolSpecification =
1066 Tools.ToolSpec.builder()
1067 .name(operationName)
1068 .description(operationDesc)
1069 .parameters(Tools.Parameters.of(params))
1070 .build();
1071 ReflectionalToolFunction reflectionalToolFunction =
1072 new ReflectionalToolFunction(object, m, methodParams);
1073 toolRegistry.addTool(
1074 Tools.Tool.builder()
1075 .toolFunction(reflectionalToolFunction)
1076 .toolSpec(toolSpecification)
1077 .build());
1078 }
1079 }
1080
1087 public OllamaChatMessageRole addCustomRole(String roleName) {
1088 return OllamaChatMessageRole.newCustomRole(roleName);
1089 }
1090
1096 public List<OllamaChatMessageRole> listRoles() {
1098 }
1099
1107 public OllamaChatMessageRole getRole(String roleName) throws RoleNotFoundException {
1108 return OllamaChatMessageRole.getRole(roleName);
1109 }
1110
1111 // technical private methods //
1112
1120 private static String encodeFileToBase64(File file) throws IOException {
1121 return Base64.getEncoder().encodeToString(Files.readAllBytes(file.toPath()));
1122 }
1123
1130 private static String encodeByteArrayToBase64(byte[] bytes) {
1131 return Base64.getEncoder().encodeToString(bytes);
1132 }
1133
1145 private OllamaResult generateSyncForOllamaRequestModel(
1146 OllamaGenerateRequest ollamaRequestModel,
1147 OllamaGenerateTokenHandler thinkingStreamHandler,
1148 OllamaGenerateTokenHandler responseStreamHandler)
1149 throws OllamaException {
1150 long startTime = System.currentTimeMillis();
1151 int statusCode = -1;
1152 Object out = null;
1153 try {
1154 OllamaGenerateEndpointCaller requestCaller =
1155 new OllamaGenerateEndpointCaller(host, auth, requestTimeoutSeconds);
1156 OllamaResult result;
1157 if (responseStreamHandler != null) {
1158 ollamaRequestModel.setStream(true);
1159 result =
1160 requestCaller.call(
1161 ollamaRequestModel, thinkingStreamHandler, responseStreamHandler);
1162 } else {
1163 result = requestCaller.callSync(ollamaRequestModel);
1164 }
1165 statusCode = result.getHttpStatusCode();
1166 out = result;
1167 return result;
1168 } catch (InterruptedException e) {
1169 Thread.currentThread().interrupt();
1170 throw new OllamaException("Thread was interrupted", e);
1171 } catch (Exception e) {
1172 throw new OllamaException(e.getMessage(), e);
1173 } finally {
1176 ollamaRequestModel.getModel(),
1177 ollamaRequestModel.isRaw(),
1178 ollamaRequestModel.isThink(),
1179 ollamaRequestModel.isStream(),
1180 ollamaRequestModel.getOptions(),
1181 ollamaRequestModel.getFormat(),
1182 startTime,
1183 statusCode,
1184 out);
1185 }
1186 }
1187
1194 private HttpRequest.Builder getRequestBuilderDefault(URI uri) {
1195 HttpRequest.Builder requestBuilder =
1196 HttpRequest.newBuilder(uri)
1197 .header(
1198 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
1199 Constants.HttpConstants.APPLICATION_JSON)
1200 .timeout(Duration.ofSeconds(requestTimeoutSeconds));
1201 if (isAuthSet()) {
1202 requestBuilder.header("Authorization", auth.getAuthHeaderValue());
1203 }
1204 return requestBuilder;
1205 }
1206
1212 private boolean isAuthSet() {
1213 return auth != null;
1214 }
1215}
void setBasicAuth(String username, String password)
Definition Ollama.java:138
OllamaChatMessageRole getRole(String roleName)
Definition Ollama.java:1107
OllamaEmbedResult embed(OllamaEmbedRequest modelRequest)
Definition Ollama.java:726
List< Tools.Tool > getRegisteredTools()
Definition Ollama.java:972
OllamaResult generate(OllamaGenerateRequest request, OllamaGenerateStreamObserver streamObserver)
Definition Ollama.java:770
void unloadModel(String modelName)
Definition Ollama.java:666
void registerTool(Tools.Tool tool)
Definition Ollama.java:957
void pullModel(String modelName)
Definition Ollama.java:461
OllamaChatResult chat(OllamaChatRequest request, OllamaChatTokenHandler tokenHandler)
Definition Ollama.java:875
List< Model > listModels()
Definition Ollama.java:245
List< OllamaChatMessageRole > listRoles()
Definition Ollama.java:1096
OllamaChatMessageRole addCustomRole(String roleName)
Definition Ollama.java:1087
OllamaAsyncResultStreamer generateAsync(String model, String prompt, boolean raw, boolean think)
Definition Ollama.java:838
void setBearerAuth(String bearerToken)
Definition Ollama.java:147
ModelProcessesResult ps()
Definition Ollama.java:196
void createModel(CustomModelRequest customModelRequest)
Definition Ollama.java:548
void registerAnnotatedTools(Object object)
Definition Ollama.java:1031
void registerTools(List< Tools.Tool > tools)
Definition Ollama.java:968
void deleteModel(String modelName, boolean ignoreIfNotPresent)
Definition Ollama.java:612
ModelDetail getModelDetails(String modelName)
Definition Ollama.java:503
static void record(String endpoint, String model, boolean raw, boolean thinking, boolean streaming, Map< String, Object > options, Object format, long startTime, int responseHttpStatus, Object response)
static OllamaChatMessageRole newCustomRole(String roleName)
static OllamaChatMessageRole getRole(String roleName)
OllamaChatResult call(OllamaChatRequest body, OllamaChatTokenHandler tokenHandler)
OllamaResult call(OllamaRequestBody body, OllamaGenerateTokenHandler thinkingStreamHandler, OllamaGenerateTokenHandler responseStreamHandler)
void addTools(List< Tools.Tool > tools)
static ObjectMapper getObjectMapper()
Definition Utils.java:32
Object apply(Map< String, Object > arguments)